Compare commits

..

2 Commits

Author SHA1 Message Date
Aliaksandr Valialkin
d27a375d34 docs/CHANGELOG.md: cut v1.78.1 2022-07-08 01:01:17 +03:00
Aliaksandr Valialkin
e323afd6ca lib/storage: put the (date, metricID) entry in dateMetricIDCache just after the corresponding series is registered in the per-day inverted index
Previously the time series could be put into dateMetricIDCache without
registering in the per-day inverted index if GetOrCreateTSIDByName
finds TSID entry in the global index. This could lead to missing
series in query results.

The issue has been introduced in the commit 55e7afae3a,
which has been included in VictoriaMetrics v1.78.0
2022-07-05 15:19:10 +03:00
8399 changed files with 463605 additions and 1828159 deletions

0
.codex
View File

View File

@@ -1,86 +0,0 @@
name: Bug report
description: Create a report to help us improve
labels: [bug]
body:
- type: markdown
attributes:
value: |
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade)
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
and verify whether the bug is reproducible there.
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/victoriametrics/troubleshooting/) first.
- type: textarea
id: describe-the-bug
attributes:
label: Describe the bug
description: |
A clear and concise description of what the bug is.
placeholder: |
When I do `A` VictoriaMetrics does `B`. I expect it to do `C`.
validations:
required: true
- type: textarea
id: to-reproduce
attributes:
label: To Reproduce
description: |
Steps to reproduce the behavior.
If reproducing an issue requires some specific configuration file, please paste it here.
placeholder: |
Steps to reproduce the behavior.
validations:
required: true
- type: textarea
id: version
attributes:
label: Version
description: |
The line returned when passing `--version` command line flag to the binary. For example:
```
$ ./victoria-metrics-prod --version
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
```
validations:
required: true
- type: textarea
id: logs
attributes:
label: Logs
description: |
Check if any warnings or errors were logged by VictoriaMetrics components
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
validations:
required: false
- type: textarea
id: screenshots
attributes:
label: Screenshots
description: |
If applicable, add screenshots to help explain your problem.
For VictoriaMetrics health-state issues please provide full-length screenshots
of Grafana dashboards if possible:
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
See how to setup monitoring here:
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#monitoring)
validations:
required: false
- type: textarea
id: flags
attributes:
label: Used command-line flags
description: |
Please provide the command-line flags used for running VictoriaMetrics and its components.
validations:
required: false
- type: textarea
id: additional-info
attributes:
label: Additional information
placeholder: |
Additional information that doesn't fit elsewhere
validations:
required: false

View File

@@ -1,5 +0,0 @@
blank_issues_enabled: true
contact_links:
- name: Ask on Slack
url: https://slack.victoriametrics.com/
about: You can ask for help here!

View File

@@ -1,43 +0,0 @@
name: Feature request
description: Suggest an idea for this project
labels: [enhancement]
body:
- type: textarea
id: describe-the-problem
attributes:
label: Is your feature request related to a problem? Please describe
description: |
A clear and concise description of what the problem is.
placeholder: |
Ex. I'm always frustrated when [...]
validations:
required: false
- type: textarea
id: describe-the-solution
attributes:
label: Describe the solution you'd like
description: |
A clear and concise description of what you want to happen.
validations:
required: true
- type: textarea
id: alternative-solutions
attributes:
label: Describe alternatives you've considered
description: |
A clear and concise description of any alternative solutions or features you've considered.
placeholder: |
I have tried to do `A`, but that doesn't solve a problem completely.
I have tried to do `A` and `B`, but implementing this would be better.
validations:
required: false
- type: textarea
id: feature-additional-info
attributes:
label: Additional information
description: |
Additional information which you consider helpful for implementing this feature.
placeholder: |
Add any other context or screenshots about the feature request here.
validations:
required: false

View File

@@ -1,32 +0,0 @@
name: Question
description: Ask a question regarding VictoriaMetrics or its components
labels: [question]
body:
- type: textarea
id: describe-the-component
attributes:
label: Is your question related to a specific component?
placeholder: |
VictoriaMetrics, vmagent, vmalert, vmui, etc...
validations:
required: false
- type: textarea
id: describe-the-question
attributes:
label: Describe the question in detail
description: |
A clear and concise description of the issue and the question.
validations:
required: true
- type: checkboxes
id: troubleshooting
attributes:
label: Troubleshooting docs
description: I am familiar with the following troubleshooting docs
options:
- label: General - https://docs.victoriametrics.com/victoriametrics/troubleshooting/
required: false
- label: vmagent - https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting
required: false
- label: vmalert - https://docs.victoriametrics.com/victoriametrics/vmalert/#troubleshooting
required: false

View File

@@ -4,18 +4,14 @@ updates:
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 0
- package-ecosystem: "bundler"
directory: "/docs"
schedule:
interval: "weekly"
open-pull-requests-limit: 0
interval: "daily"
- package-ecosystem: "gomod"
directory: "/app/vmui/packages/vmui/web"
schedule:
@@ -25,8 +21,6 @@ updates:
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "npm"
directory: "/app/vmui/packages/vmui"
schedule:

View File

@@ -1,3 +0,0 @@
**PLEASE REMOVE LINE BELOW BEFORE SUBMITTING**
Before creating the PR, make sure you have read and followed the [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env sh
set -e
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
GIT_REMOTE=${GIT_REMOTE:-"origin"}
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
if ! grep -q "^+" diff.txt; then
echo "No additions in CHANGELOG.md"
exit 0
fi
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
if [ -z "$START_TIP" ]; then
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
exit 1
fi
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
if [ -z "$END_TIP" ]; then
END_TIP=$(wc -l < "$CHANGELOG_FILE")
fi
BAD=0
while IFS= read -r line; do
# Grep exact line inside the file and get line numbers
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
for m in $MATCHES; do
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
BAD=1
fi
done
done << EOF
$ADDED_LINES
EOF
if [ "$BAD" -ne 0 ]; then
echo "CHANGELOG modifications must be placed inside the ## tip section."
exit 1
fi
echo "CHANGELOG modifications are valid."

View File

@@ -1,77 +0,0 @@
name: build
on:
push:
branches:
- cluster
- master
paths:
- '**.go'
- '**/Dockerfile'
- '**/Makefile'
- '!app/vmui/**'
- '.github/workflows/build.yml'
pull_request:
branches:
- cluster
- master
paths:
- '**.go'
- '**/Dockerfile'
- '**/Makefile'
- '!app/vmui/**'
- '.github/workflows/build.yml'
permissions: {}
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
jobs:
build:
name: ${{ matrix.os }}-${{ matrix.arch }}
permissions:
contents: read
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- os: linux
arch: amd64
- os: linux
arch: arm64
- os: linux
arch: arm
- os: linux
arch: ppc64le
- os: linux
arch: 386
- os: linux
arch: s390x
- os: freebsd
arch: amd64
- os: openbsd
arch: amd64
- os: netbsd
arch: amd64
- os: windows
arch: amd64
steps:
- name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
- name: Build vmcluster for ${{ matrix.os }}-${{ matrix.arch }}
run: make vmcluster-${{ matrix.os }}-${{ matrix.arch }}

View File

@@ -1,23 +0,0 @@
name: 'changelog-linter'
on:
pull_request:
paths:
- "docs/victoriametrics/changelog/CHANGELOG.md"
permissions: {}
jobs:
tip-lint:
permissions:
contents: read
runs-on: 'ubuntu-latest'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# needed for proper diff
fetch-depth: 0
- name: 'Validate that changelog changes are under ## tip'
run: |
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh

View File

@@ -1,51 +0,0 @@
name: check-commit-signed
on:
pull_request:
permissions: {}
jobs:
check-commit-signed:
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0 # we need full history for commit verification
- name: Check commit signatures
run: |
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "Not a PR event, skipping signature check"
exit 0
fi
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
echo "Checking commits in PR range: $RANGE"
if [ -z "$(git rev-list $RANGE)" ]; then
echo "No new commits in this PR, skipping signature check"
exit 0
fi
# Check raw commit objects for a "gpgsig" header as a fast early signal for
# contributors. Both GPG and SSH signatures use this header.
# This avoids relying on %G? which returns N for SSH commits.
# This check is not a security enforcement — unsigned commits cannot be merged
# anyway due to the GitHub repository merge policy.
unsigned=""
for sha in $(git rev-list $RANGE); do
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
unsigned="$unsigned $sha"
fi
done
if [ -n "$unsigned" ]; then
echo "Found unsigned commits:"
echo "$unsigned"
exit 1
fi
echo "All commits in PR are signed (GPG or SSH)"

View File

@@ -6,37 +6,21 @@ on:
pull_request:
paths:
- 'vendor'
permissions: {}
permissions:
contents: read
jobs:
build:
name: Build
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
uses: actions/setup-go@main
with:
go-version: 1.17
id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: 'go.mod'
cache: false
- run: go version
- name: Cache Go artifacts
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: |
~/.cache/go-build
~/go/pkg/mod
~/go/bin
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
- name: Code checkout
uses: actions/checkout@master
- name: Check License
run: make check-licenses
run: |
make check-licenses

View File

@@ -1,65 +0,0 @@
name: 'CodeQL Go'
on:
push:
branches:
- cluster
- master
paths:
- '**.go'
pull_request:
branches:
- cluster
- master
paths:
- '**.go'
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
permissions: {}
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Go
id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache: false
go-version-file: 'go.mod'
- run: go version
- name: Cache Go artifacts
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: |
~/.cache/go-build
~/go/bin
~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
- name: Initialize CodeQL
uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
with:
languages: go
- name: Autobuild
uses: github/codeql-action/autobuild@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
with:
category: 'language:go'

70
.github/workflows/codeql-analysis.yml vendored Normal file
View File

@@ -0,0 +1,70 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: [ master, cluster ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master, cluster ]
schedule:
- cron: '30 18 * * 2'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'go', 'javascript' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

View File

@@ -1,59 +0,0 @@
name: publish-docs
on:
push:
branches:
- 'master'
paths:
- 'docs/**'
- '.github/workflows/docs.yaml'
workflow_dispatch: {}
permissions: {}
jobs:
build:
name: Build
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
path: __vm
- name: Checkout private code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }}
path: __vm-docs
- name: Import GPG key
uses: crazy-max/ghaction-import-gpg@2dc316deee8e90f13e1a351ab510b4d5bc0c82cd # v7.0.0
id: import-gpg
with:
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
passphrase: ${{ secrets.VM_BOT_PASSPHRASE }}
git_user_signingkey: true
git_commit_gpgsign: true
git_config_global: true
- name: Copy docs
id: update
run: |
find docs -type d -maxdepth 1 -mindepth 1 -exec \
sh -c 'rsync -zarvh --delete {}/ ../__vm-docs/content/$(basename {})/' \;
echo "SHORT_SHA=$(git rev-parse --short $GITHUB_SHA)" >> $GITHUB_OUTPUT
working-directory: __vm
- name: Push to vmdocs
run: |
git config --global user.name "${{ steps.import-gpg.outputs.email }}"
git config --global user.email "${{ steps.import-gpg.outputs.email }}"
if [[ -n $(git status --porcelain) ]]; then
git add .
git commit -S -m "sync docs with VictoriaMetrics/VictoriaMetrics commit: ${{ steps.update.outputs.SHORT_SHA }}"
git push
fi
working-directory: __vm-docs

52
.github/workflows/main.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
name: main
on:
push:
paths-ignore:
- 'docs/**'
- '**.md'
pull_request:
paths-ignore:
- 'docs/**'
- '**.md'
permissions:
contents: read
jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- name: Setup Go
uses: actions/setup-go@main
with:
go-version: 1.17
id: go
- name: Code checkout
uses: actions/checkout@master
- name: Dependencies
run: |
make install-golint
make install-errcheck
make install-golangci-lint
- name: Build
env:
GO111MODULE: on
run: |
export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
make check-all
git diff --exit-code
make test-full
make test-pure
make test-full-386
make vminsert vmselect vmstorage
make vminsert-pure vmselect-pure vmstorage-pure
make vmutils
GOOS=freebsd go build -mod=vendor ./app/vminsert
GOOS=freebsd go build -mod=vendor ./app/vmselect
GOOS=freebsd go build -mod=vendor ./app/vmstorage
GOOS=openbsd go build -mod=vendor ./app/vminsert
GOOS=openbsd go build -mod=vendor ./app/vmselect
GOOS=openbsd go build -mod=vendor ./app/vmstorage
GOOS=darwin go build -mod=vendor ./app/vminsert
GOOS=darwin go build -mod=vendor ./app/vmselect
GOOS=darwin go build -mod=vendor ./app/vmstorage

View File

@@ -1,116 +0,0 @@
name: test
on:
push:
branches:
- cluster
- master
paths:
- '**.go'
- 'go.*'
- '.github/workflows/main.yml'
pull_request:
branches:
- cluster
- master
paths:
- '**.go'
- 'go.*'
- '.github/workflows/main.yml'
permissions: {}
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
jobs:
lint:
name: lint
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
- name: Cache golangci-lint
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: |
~/.cache/golangci-lint
~/go/bin
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
- name: Run check-all
run: |
make check-all
git diff --exit-code
unit:
name: unit
permissions:
contents: read
runs-on: ubuntu-latest
strategy:
matrix:
scenario:
- 'test'
- 'test-386'
- 'test-pure'
steps:
- name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
- name: Run tests
run: make ${{ matrix.scenario}}
apptest:
name: apptest
permissions:
contents: read
runs-on: apptest
steps:
- name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
- name: Run app tests
run: make apptest

View File

@@ -1,88 +0,0 @@
name: vmui
on:
push:
branches:
- cluster
- master
paths:
- 'app/vmui/packages/vmui/**'
- '.github/workflows/vmui.yml'
pull_request:
branches:
- cluster
- master
paths:
- 'app/vmui/packages/vmui/**'
- '.github/workflows/vmui.yml'
permissions: {}
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
jobs:
vmui-checks:
name: VMUI Checks (lint, test, typecheck)
permissions:
checks: write
contents: read
pull-requests: read
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Cache node_modules
id: cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: app/vmui/packages/vmui/node_modules
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
restore-keys: |
vmui-deps-${{ runner.os }}-
- name: Install dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: make vmui-install
- name: Run lint
id: lint
run: make vmui-lint
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run tests
id: test
run: make vmui-test
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run typecheck
id: typecheck
run: make vmui-typecheck
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Annotate Code Linting Results
uses: ataylorme/eslint-annotate-action@d57a1193d4c59cbfbf3f86c271f42612f9dbd9e9 # 3.0.0
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
report-json: app/vmui/packages/vmui/vmui-lint-report.json
- name: Check overall status
run: |
echo "Lint status: ${{ steps.lint.outcome }}"
echo "Test status: ${{ steps.test.outcome }}"
echo "Typecheck status: ${{ steps.typecheck.outcome }}"
if [[ "${{ steps.lint.outcome }}" == "failure" || "${{ steps.test.outcome }}" == "failure" || "${{ steps.typecheck.outcome }}" == "failure" ]]; then
echo "One or more checks failed"
exit 1
else
echo "All checks passed"
fi

22
.gitignore vendored
View File

@@ -1,4 +1,3 @@
/tmp
/tags
/pkg
*.pprof
@@ -7,25 +6,18 @@
.vscode
*.test
*.swp
/vmdocs
/gocache-for-docker
/victoria-logs-data
/victoria-metrics-data
/vmagent-remotewrite-data
/vlagent-remotewritewrite
/vmstorage-data
/vmselect-cache
/package/temp-deb-*
/package/temp-rpm-*
/package/*.deb
/package/*.rpm
.DS_store
.DS_Store
### terraform
terraform.tfstate
terraform.tfstate.*
.terraform/
Gemfile.lock
/_site
_site
*.tmp
/docs/.jekyll-metadata
coverage.txt
cspell.json
*~
deployment/docker/provisioning/plugins/

View File

@@ -1,29 +0,0 @@
version: "2"
linters:
settings:
errcheck:
exclude-functions:
- fmt.Fprintf
- fmt.Fprint
- (net/http.ResponseWriter).Write
exclusions:
generated: lax
presets:
- common-false-positives
- legacy
- std-error-handling
rules:
- linters:
- staticcheck
text: 'SA(4003|1019|5011):'
paths:
- third_party$
- builtin$
- examples$
formatters:
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$

View File

@@ -3,5 +3,3 @@ allowlist:
- MIT
- BSD-3-Clause
- BSD-2-Clause
- ISC
- MPL-2.0

120
CODE_OF_CONDUCT_RU.md Normal file
View File

@@ -0,0 +1,120 @@
# Кодекс Поведения участника
## Наши обязательства
Мы, как участники, авторы и лидеры обязуемся сделать участие в сообществе
свободным от притеснений для всех, независимо от возраста, телосложения,
видимых или невидимых ограничений способности, этнической принадлежности,
половых признаков, гендерной идентичности и выражения, уровня опыта,
образования, социо-экономического статуса, национальности, внешности,
расы, религии, или сексуальной идентичности и ориентации.
Мы обещаем действовать и взаимодействовать таким образом, чтобы вносить вклад в открытое,
дружелюбное, многообразное, инклюзивное и здоровое сообщество.
## Наши стандарты
Примеры поведения, создающие условия для благоприятных взаимоотношений включают в себя:
* Проявление доброты и эмпатии к другим участникам проекта
* Уважение к чужой точке зрения и опыту
* Конструктивная критика и принятие конструктивной критики
* Принятие ответственности, принесение извинений тем, кто пострадал от наших ошибок
и извлечение уроков из опыта
* Ориентирование на то, что лучше подходит для сообщества, а не только для нас лично
Примеры неприемлемого поведения участников включают в себя:
* Использование выражений или изображений сексуального характера и нежелательное сексуальное внимание или домогательство в любой форме
* Троллинг, оскорбительные или уничижительные комментарии, переход на личности или затрагивание политических убеждений
* Публичное или приватное домогательство
* Публикация личной информации других лиц, например, физического или электронного адреса, без явного разрешения
* Иное поведение, которое обоснованно считать неуместным в профессиональной обстановке
## Обязанности
Лидеры сообщества отвечают за разъяснение и применение наших стандартов приемлемого
поведения и будут предпринимать соответствующие и честные меры по исправлению положения
в ответ на любое поведение, которое они сочтут неприемлемым, угрожающим, оскорбительным или вредным.
Лидеры сообщества обладают правом и обязанностью удалять, редактировать или отклонять
комментарии, коммиты, код, изменения в вики, вопросы и другой вклад, который не совпадает
с Кодексом Поведения, и предоставят причины принятого решения, когда сочтут нужным.
## Область применения
Данный Кодекс Поведения применим во всех во всех публичных физических и цифровых пространства сообщества,
а также когда человек официально представляет сообщество в публичных местах.
Примеры представления проекта или сообщества включают использование официальной электронной почты,
публикации в официальном аккаунте в социальных сетях,
или упоминания как представителя в онлайн или оффлайн мероприятии.
## Приведение в исполнение
О случаях домогательства, а так же оскорбительного или иного другого неприемлемого
поведения можно сообщить ответственным лидерам сообщества с помощью письма на info@victoriametrics.com
Все жалобы будут рассмотрены и расследованы оперативно и беспристрастно.
Все лидеры сообщества обязаны уважать неприкосновенность частной жизни и личную
неприкосновенность автора сообщения.
## Руководство по исполнению
Лидеры сообщества будут следовать следующим Принципам Воздействия в Сообществе,
чтобы определить последствия для тех, кого они считают виновными в нарушении данного Кодекса Поведения:
### 1. Исправление
**Общественное влияние**: Использование недопустимой лексики или другое поведение,
считающиеся непрофессиональным или нежелательным в сообществе.
**Последствия**: Личное, письменное предупреждение от лидеров сообщества,
объясняющее суть нарушения и почему такое поведение
было неуместно. Лидеры сообщества могут попросить принести публичное извинение.
### 2. Предупреждение
**Общественное влияние**: Нарушение в результате одного инцидента или серии действий.
**Последствия**: Предупреждение о последствиях в случае продолжающегося неуместного поведения.
На определенное время не допускается взаимодействие с людьми, вовлеченными в инцидент,
включая незапрошенное взаимодействие
с теми, кто обеспечивает соблюдение Кодекса. Это включает в себя избегание взаимодействия
в публичных пространствах, а так же во внешних каналах,
таких как социальные сети. Нарушение этих правил влечет за собой временный или вечный бан.
### 3. Временный бан
**Общественное влияние**: Серьёзное нарушение стандартов сообщества,
включая продолжительное неуместное поведение.
**Последствия**: Временный запрет (бан) на любое взаимодействие
или публичное общение с сообществом на определенный период времени.
На этот период не допускается публичное или личное взаимодействие с людьми,
вовлеченными в инцидент, включая незапрошенное взаимодействие
с теми, кто обеспечивает соблюдение Кодекса.
Нарушение этих правил влечет за собой вечный бан.
### 4. Вечный бан
**Общественное влияние**: Демонстрация систематических нарушений стандартов сообщества,
включая продолжающееся неуместное поведение, домогательство до отдельных лиц,
или проявление агрессии либо пренебрежительного отношения к категориям лиц.
**Последствия**: Вечный запрет на любое публичное взаимодействие с сообществом.
## Атрибуция
Данный Кодекс Поведения основан на [Кодекс Поведения участника][homepage],
версии 2.0, доступной по адресу
<https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>.
Принципы Воздействия в Сообществе были вдохновлены [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
Ответы на общие вопросы о данном кодексе поведения ищите на странице FAQ:
<https://www.contributor-covenant.org/faq>. Переводы доступны по адресу
<https://www.contributor-covenant.org/translations>.

View File

@@ -1 +1,16 @@
The document has been moved [here](https://docs.victoriametrics.com/victoriametrics/contributing/).
If you like VictoriaMetrics and want to contribute, then we need the following:
- Filing issues and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
- Spreading a word about VictoriaMetrics: conference talks, articles, comments, experience sharing with colleagues.
- Updating documentation.
We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):
- Prefer simple code and architecture.
- Avoid complex abstractions.
- Avoid magic code and fancy algorithms.
- Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d).
- Minimize the number of moving parts in the distributed system.
- Avoid automated decisions, which may hurt cluster availability, consistency or performance.
Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS
Copyright 2019-2026 VictoriaMetrics, Inc.
Copyright 2019-2022 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

374
Makefile
View File

@@ -1,34 +1,18 @@
PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
MAKE_CONCURRENCY ?= $(shell getconf _NPROCESSORS_ONLN)
MAKE_PARALLEL := $(MAKE) -j $(MAKE_CONCURRENCY)
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -c 10-17)))
PKG_TAG ?= $(shell git tag -l --points-at HEAD)
ifeq ($(PKG_TAG),)
PKG_TAG := $(BUILDINFO_TAG)
endif
EXTRA_DOCKER_TAG_SUFFIX ?=
EXTRA_GO_BUILD_TAGS ?=
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
TAR_OWNERSHIP ?= --owner=1000 --group=1000
GOLANGCI_LINT_VERSION := 2.9.0
.PHONY: $(MAKECMDGOALS)
include app/*/Makefile
include codespell/Makefile
include docs/Makefile
include deployment/*/Makefile
include dashboards/Makefile
include package/release/Makefile
include benchmarks/Makefile
all: \
vminsert \
vmselect \
@@ -39,77 +23,12 @@ all-pure: \
vmselect-pure \
vmstorage-pure
include app/*/Makefile
include deployment/*/Makefile
clean:
rm -rf bin/*
vmcluster-linux-amd64: \
vminsert-linux-amd64 \
vmselect-linux-amd64 \
vmstorage-linux-amd64
vmcluster-linux-arm64: \
vminsert-linux-arm64 \
vmselect-linux-arm64 \
vmstorage-linux-arm64
vmcluster-linux-arm: \
vminsert-linux-arm \
vmselect-linux-arm \
vmstorage-linux-arm
vmcluster-linux-ppc64le: \
vminsert-linux-ppc64le \
vmselect-linux-ppc64le \
vmstorage-linux-ppc64le
vmcluster-linux-386: \
vminsert-linux-386 \
vmselect-linux-386 \
vmstorage-linux-386
vmcluster-linux-s390x: \
vminsert-linux-s390x \
vmselect-linux-s390x \
vmstorage-linux-s390x
vmcluster-freebsd-amd64: \
vminsert-freebsd-amd64 \
vmselect-freebsd-amd64 \
vmstorage-freebsd-amd64
vmcluster-openbsd-amd64: \
vminsert-openbsd-amd64 \
vmselect-openbsd-amd64 \
vmstorage-openbsd-amd64
vmcluster-windows-amd64: \
vminsert-windows-amd64 \
vmselect-windows-amd64 \
vmstorage-windows-amd64
vmcluster-darwin-amd64: \
vminsert-darwin-amd64 \
vmselect-darwin-amd64 \
vmstorage-darwin-amd64
vmcluster-darwin-arm64: \
vminsert-darwin-arm64 \
vmselect-darwin-arm64 \
vmstorage-darwin-arm64
# When adding a new crossbuild target, please also add it to the .github/workflows/build.yml
crossbuild: vmcluster-crossbuild
# When adding a new crossbuild target, please also add it to the .github/workflows/build.yml
vmcluster-crossbuild:
$(MAKE_PARALLEL) vmcluster-linux-amd64 \
vmcluster-linux-arm64 \
vmcluster-linux-arm \
vmcluster-linux-ppc64le \
vmcluster-linux-386 \
vmcluster-freebsd-amd64 \
vmcluster-openbsd-amd64
publish: \
publish-vminsert \
publish-vmselect \
@@ -120,246 +39,149 @@ package: \
package-vmselect \
package-vmstorage
publish-final-images:
PKG_TAG=$(TAG) APP_NAME=victoria-metrics $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) APP_NAME=vmagent $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) APP_NAME=vmalert $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) APP_NAME=vmalert-tool $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) APP_NAME=vmauth $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) APP_NAME=vmbackup $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) APP_NAME=vmrestore $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) APP_NAME=vmctl $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=victoria-metrics $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmagent $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmalert $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmauth $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackup $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmrestore $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmgateway $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackupmanager $(MAKE) publish-via-docker-from-rc && \
PKG_TAG=$(TAG) $(MAKE) publish-latest
publish-latest:
PKG_TAG=$(TAG) APP_NAME=victoria-metrics $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG) APP_NAME=vmagent $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG) APP_NAME=vmalert $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG) APP_NAME=vmalert-tool $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG) APP_NAME=vmauth $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG) APP_NAME=vmbackup $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG) APP_NAME=vmrestore $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG) APP_NAME=vmctl $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG)-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG)-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG)-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-latest && \
PKG_TAG=$(TAG)-enterprise APP_NAME=vmgateway $(MAKE) publish-via-docker-latest
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackupmanager $(MAKE) publish-via-docker-latest
publish-release:
rm -rf bin/*
git checkout $(TAG) && $(MAKE) release && $(MAKE) publish && \
git checkout $(TAG)-cluster && $(MAKE) release && $(MAKE) publish && \
git checkout $(TAG)-enterprise && $(MAKE) release && $(MAKE) publish && \
git checkout $(TAG)-enterprise-cluster && $(MAKE) release && $(MAKE) publish
git checkout $(TAG) && $(MAKE) release publish && \
git checkout $(TAG)-cluster && $(MAKE) release publish && \
git checkout $(TAG)-enterprise && $(MAKE) release publish && \
git checkout $(TAG)-enterprise-cluster && $(MAKE) release publish
release:
$(MAKE_PARALLEL) release-vmcluster
release: \
release-vmcluster
release-vmcluster: \
release-vmcluster-linux-amd64 \
release-vmcluster-linux-arm64 \
release-vmcluster-linux-s390x \
release-vmcluster-freebsd-amd64 \
release-vmcluster-openbsd-amd64 \
release-vmcluster-windows-amd64 \
release-vmcluster-darwin-amd64 \
release-vmcluster-darwin-arm64
release-vmcluster-amd64 \
release-vmcluster-arm64
release-vmcluster-linux-amd64:
GOOS=linux GOARCH=amd64 $(MAKE) release-vmcluster-goos-goarch
release-vmcluster-amd64:
GOARCH=amd64 $(MAKE) release-vmcluster-generic
release-vmcluster-linux-arm64:
GOOS=linux GOARCH=arm64 $(MAKE) release-vmcluster-goos-goarch
release-vmcluster-arm64:
GOARCH=arm64 $(MAKE) release-vmcluster-generic
release-vmcluster-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-vmcluster-goos-goarch
release-vmcluster-freebsd-amd64:
GOOS=freebsd GOARCH=amd64 $(MAKE) release-vmcluster-goos-goarch
release-vmcluster-openbsd-amd64:
GOOS=openbsd GOARCH=amd64 $(MAKE) release-vmcluster-goos-goarch
release-vmcluster-windows-amd64:
GOARCH=amd64 $(MAKE) release-vmcluster-windows-goarch
release-vmcluster-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmcluster-goos-goarch
release-vmcluster-darwin-arm64:
GOOS=darwin GOARCH=arm64 $(MAKE) release-vmcluster-goos-goarch
release-vmcluster-goos-goarch: \
vminsert-$(GOOS)-$(GOARCH)-prod \
vmselect-$(GOOS)-$(GOARCH)-prod \
vmstorage-$(GOOS)-$(GOARCH)-prod
release-vmcluster-generic: \
vminsert-$(GOARCH)-prod \
vmselect-$(GOARCH)-prod \
vmstorage-$(GOARCH)-prod
cd bin && \
tar $(TAR_OWNERSHIP) --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf victoria-metrics-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
vminsert-$(GOOS)-$(GOARCH)-prod \
vmselect-$(GOOS)-$(GOARCH)-prod \
vmstorage-$(GOOS)-$(GOARCH)-prod \
&& sha256sum victoria-metrics-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
vminsert-$(GOOS)-$(GOARCH)-prod \
vmselect-$(GOOS)-$(GOARCH)-prod \
vmstorage-$(GOOS)-$(GOARCH)-prod \
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > victoria-metrics-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
cd bin && rm -rf \
vminsert-$(GOOS)-$(GOARCH)-prod \
vmselect-$(GOOS)-$(GOARCH)-prod \
vmstorage-$(GOOS)-$(GOARCH)-prod
release-vmcluster-windows-goarch: \
vminsert-windows-$(GOARCH)-prod \
vmselect-windows-$(GOARCH)-prod \
vmstorage-windows-$(GOARCH)-prod
cd bin && \
zip victoria-metrics-windows-$(GOARCH)-$(PKG_TAG).zip \
vminsert-windows-$(GOARCH)-prod.exe \
vmselect-windows-$(GOARCH)-prod.exe \
vmstorage-windows-$(GOARCH)-prod.exe \
&& sha256sum victoria-metrics-windows-$(GOARCH)-$(PKG_TAG).zip \
vminsert-windows-$(GOARCH)-prod.exe \
vmselect-windows-$(GOARCH)-prod.exe \
vmstorage-windows-$(GOARCH)-prod.exe \
> victoria-metrics-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
cd bin && rm -rf \
vminsert-windows-$(GOARCH)-prod.exe \
vmselect-windows-$(GOARCH)-prod.exe \
vmstorage-windows-$(GOARCH)-prod.exe
tar --transform="flags=r;s|-$(GOARCH)||" -czf victoria-metrics-$(GOARCH)-$(PKG_TAG).tar.gz \
vminsert-$(GOARCH)-prod \
vmselect-$(GOARCH)-prod \
vmstorage-$(GOARCH)-prod \
&& sha256sum victoria-metrics-$(GOARCH)-$(PKG_TAG).tar.gz \
vminsert-$(GOARCH)-prod \
vmselect-$(GOARCH)-prod \
vmstorage-$(GOARCH)-prod \
| sed s/-$(GOARCH)-prod/-prod/ > victoria-metrics-$(GOARCH)-$(PKG_TAG)_checksums.txt
pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
fmt:
gofmt -l -w -s ./lib
gofmt -l -w -s ./app
gofmt -l -w -s ./apptest
GO111MODULE=on gofmt -l -w -s ./lib
GO111MODULE=on gofmt -l -w -s ./app
vet:
go vet -tags 'synctest' ./lib/...
go vet ./app/...
go vet ./apptest/...
GO111MODULE=on go vet -mod=vendor ./lib/...
GO111MODULE=on go vet -mod=vendor ./app/...
check-all: fmt vet golangci-lint govulncheck
lint: install-golint
golint lib/...
golint app/...
clean-checkers: remove-golangci-lint remove-govulncheck
install-golint:
which golint || GO111MODULE=off go get golang.org/x/lint/golint
errcheck: install-errcheck
errcheck -exclude=errcheck_excludes.txt ./lib/...
errcheck -exclude=errcheck_excludes.txt ./app/vminsert/...
errcheck -exclude=errcheck_excludes.txt ./app/vmselect/...
errcheck -exclude=errcheck_excludes.txt ./app/vmstorage/...
errcheck -exclude=errcheck_excludes.txt ./app/vmagent/...
errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
errcheck -exclude=errcheck_excludes.txt ./app/vmauth/...
errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
errcheck -exclude=errcheck_excludes.txt ./app/vmctl/...
install-errcheck:
which errcheck || GO111MODULE=off go get github.com/kisielk/errcheck
check-all: fmt vet lint errcheck golangci-lint
test:
go test -tags 'synctest' ./lib/... ./app/...
GO111MODULE=on go test -mod=vendor ./lib/... ./app/...
test-race:
go test -tags 'synctest' -race ./lib/... ./app/...
test-386:
GOARCH=386 go test -tags 'synctest' ./lib/... ./app/...
GO111MODULE=on go test -mod=vendor -race ./lib/... ./app/...
test-pure:
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor ./lib/... ./app/...
test-full:
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GO111MODULE=on go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386:
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
apptest:
$(MAKE) vminsert-race vmselect-race vmstorage-race vmagent-race vmctl-race vmbackup-race vmrestore-race
go test ./apptest/... -skip="^Test(Single|Legacy).*"
apptest-legacy: vminsert-race vmselect-race vmstorage-race vmbackup-race vmrestore-race
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
VERSION=v1.132.0; \
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
DIR=/tmp/$${VERSION}; \
test -d $${DIR} || (mkdir $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
); \
VMSINGLE_V1_132_0_PATH=$${DIR}/victoria-metrics-prod \
VMSTORAGE_V1_132_0_PATH=$${DIR}/vmstorage-prod \
go test ./apptest/tests -run="^TestLegacyCluster.*"
GO111MODULE=on GOARCH=386 go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
benchmark:
go test -run=NO_TESTS -bench=. ./lib/...
go test -run=NO_TESTS -bench=. ./app/...
GO111MODULE=on go test -mod=vendor -bench=. ./lib/...
GO111MODULE=on go test -mod=vendor -bench=. ./app/...
benchmark-pure:
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor -bench=. ./lib/...
GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor -bench=. ./app/...
vendor-update:
go get -u ./lib/...
go get -u ./app/...
go mod tidy -compat=1.26
go mod vendor
GO111MODULE=on go get -u -d ./lib/...
GO111MODULE=on go get -u -d ./app/...
GO111MODULE=on go mod tidy -compat=1.17
GO111MODULE=on go mod vendor
app-local:
CGO_ENABLED=1 go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
CGO_ENABLED=1 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
app-local-pure:
CGO_ENABLED=0 go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
app-local-goos-goarch:
CGO_ENABLED=$(CGO_ENABLED) GOOS=$(GOOS) GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-$(GOOS)-$(GOARCH)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
app-local-with-goarch:
GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-$(GOARCH)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
app-local-windows-goarch:
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
app-local-windows-with-goarch:
CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc
qtc -dir=lib
qtc -dir=app
qtc
install-qtc:
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
which qtc || GO111MODULE=off go get github.com/valyala/quicktemplate/qtc
golangci-lint: install-golangci-lint
golangci-lint run --build-tags 'synctest'
golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m
install-golangci-lint:
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
remove-golangci-lint:
rm -rf `which golangci-lint`
govulncheck: install-govulncheck
govulncheck ./...
govulncheck-docker:
docker run -w $(PWD) -v $(PWD):$(PWD) \
-v govulncheck-gomod-cache:/root/go/pkg/mod \
-v govulncheck-gobuild-cache:/root/.cache/go-build \
-v govulncheck-go-bin:/root/go/bin \
--env="GOCACHE=/root/.cache/go-build" \
--env="GOMODCACHE=/root/go/pkg/mod" \
"$(GO_BUILDER_IMAGE)" /bin/sh -c "which govulncheck || go install golang.org/x/vuln/cmd/govulncheck@latest && govulncheck ./..."
install-govulncheck:
which govulncheck || go install golang.org/x/vuln/cmd/govulncheck@latest
remove-govulncheck:
rm -rf `which govulncheck`
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.46.2
install-wwhrd:
which wwhrd || go install github.com/frapposelli/wwhrd@latest
which wwhrd || GO111MODULE=off go get github.com/frapposelli/wwhrd
check-licenses: install-wwhrd
wwhrd check -f .wwhrd.yml
copy-docs:
echo "---\nsort: ${ORDER}\n---\n" > ${DST}
cat ${SRC} >> ${DST}
# Copies docs for all components and adds the order tag.
# Cluster docs are supposed to be ordered as 9th.
# For The rest of docs is ordered manually.t
docs-sync:
SRC=README.md DST=docs/Cluster-VictoriaMetrics.md ORDER=2 $(MAKE) copy-docs
SRC=app/vmagent/README.md DST=docs/vmagent.md ORDER=3 $(MAKE) copy-docs
SRC=app/vmalert/README.md DST=docs/vmalert.md ORDER=4 $(MAKE) copy-docs
SRC=app/vmauth/README.md DST=docs/vmauth.md ORDER=5 $(MAKE) copy-docs
SRC=app/vmbackup/README.md DST=docs/vmbackup.md ORDER=6 $(MAKE) copy-docs
SRC=app/vmrestore/README.md DST=docs/vmrestore.md ORDER=7 $(MAKE) copy-docs
SRC=app/vmctl/README.md DST=docs/vmctl.md ORDER=8 $(MAKE) copy-docs
SRC=app/vmgateway/README.md DST=docs/vmgateway.md ORDER=9 $(MAKE) copy-docs
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md ORDER=10 $(MAKE) copy-docs

1053
README.md

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +0,0 @@
# Security Policy
You can find out about our security policy and VictoriaMetrics version support on the [security page](https://docs.victoriametrics.com/victoriametrics/#security) in the documentation.

Binary file not shown.

View File

@@ -1 +0,0 @@
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).

View File

@@ -1 +0,0 @@
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).

View File

@@ -1 +0,0 @@
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).

View File

@@ -1 +0,0 @@
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).

View File

@@ -1 +0,0 @@
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).

View File

@@ -1 +0,0 @@
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).

View File

@@ -1 +0,0 @@
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).

View File

@@ -12,23 +12,20 @@ vmagent-prod:
vmagent-pure-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-pure
vmagent-linux-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-amd64
vmagent-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-amd64
vmagent-linux-arm-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-arm
vmagent-arm-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-arm
vmagent-linux-arm64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-arm64
vmagent-arm64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-arm64
vmagent-linux-ppc64le-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-ppc64le
vmagent-ppc64le-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-ppc64le
vmagent-linux-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
vmagent-linux-s390x-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
vmagent-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-386
vmagent-darwin-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64
@@ -36,12 +33,6 @@ vmagent-darwin-amd64-prod:
vmagent-darwin-arm64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-arm64
vmagent-freebsd-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-freebsd-amd64
vmagent-openbsd-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-openbsd-amd64
vmagent-windows-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-windows-amd64
@@ -76,41 +67,26 @@ run-vmagent:
APP_NAME=vmagent \
$(MAKE) run-via-docker
vmagent-linux-amd64:
APP_NAME=vmagent CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmagent-amd64:
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmagent-local-with-goarch
vmagent-linux-arm:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
vmagent-arm:
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmagent-local-with-goarch
vmagent-linux-arm64:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
vmagent-arm64:
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmagent-local-with-goarch
vmagent-linux-ppc64le:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
vmagent-ppc64le:
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmagent-local-with-goarch
vmagent-linux-s390x:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
vmagent-386:
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmagent-local-with-goarch
vmagent-linux-loong64:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=loong64 $(MAKE) app-local-goos-goarch
vmagent-linux-386:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
vmagent-darwin-amd64:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmagent-darwin-arm64:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
vmagent-freebsd-amd64:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmagent-openbsd-amd64:
APP_NAME=vmagent CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmagent-windows-amd64:
GOARCH=amd64 APP_NAME=vmagent $(MAKE) app-local-windows-goarch
vmagent-local-with-goarch:
APP_NAME=vmagent $(MAKE) app-local-with-goarch
vmagent-pure:
APP_NAME=vmagent $(MAKE) app-local-pure
vmagent-windows-amd64:
GOARCH=amd64 APP_NAME=vmagent $(MAKE) app-local-windows-with-goarch

File diff suppressed because it is too large Load Diff

View File

@@ -3,27 +3,31 @@ package common
import (
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
// PushCtx is a context used for populating WriteRequest.
type PushCtx struct {
// WriteRequest contains the WriteRequest, which must be pushed later to remote storage.
//
// The actual labels and samples for the time series are stored in Labels and Samples fields.
WriteRequest prompb.WriteRequest
WriteRequest prompbmarshal.WriteRequest
// Labels contains flat list of all the labels used in WriteRequest.
Labels []prompb.Label
Labels []prompbmarshal.Label
// Samples contains flat list of all the samples used in WriteRequest.
Samples []prompb.Sample
Samples []prompbmarshal.Sample
}
// Reset resets ctx.
func (ctx *PushCtx) Reset() {
ctx.WriteRequest.Reset()
tss := ctx.WriteRequest.Timeseries
for i := range tss {
ts := &tss[i]
ts.Labels = nil
ts.Samples = nil
}
ctx.WriteRequest.Timeseries = ctx.WriteRequest.Timeseries[:0]
promrelabel.CleanLabels(ctx.Labels)
ctx.Labels = ctx.Labels[:0]
@@ -35,10 +39,15 @@ func (ctx *PushCtx) Reset() {
//
// Call PutPushCtx when the ctx is no longer needed.
func GetPushCtx() *PushCtx {
if v := pushCtxPool.Get(); v != nil {
return v.(*PushCtx)
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*PushCtx)
}
return &PushCtx{}
}
return &PushCtx{}
}
// PutPushCtx returns ctx to the pool.
@@ -46,7 +55,12 @@ func GetPushCtx() *PushCtx {
// ctx mustn't be used after returning to the pool.
func PutPushCtx(ctx *PushCtx) {
ctx.Reset()
pushCtxPool.Put(ctx)
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *PushCtx, cgroup.AvailableCPUs())

View File

@@ -6,11 +6,11 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -22,16 +22,18 @@ var (
// InsertHandler processes csv data from req.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
return stream.Parse(req, func(rows []csvimport.Row) error {
return insertRows(at, rows, extraLabels)
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
})
}
func insertRows(at *auth.Token, rows []csvimport.Row, extraLabels []prompb.Label) error {
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
@@ -41,23 +43,23 @@ func insertRows(at *auth.Token, rows []csvimport.Row, extraLabels []prompb.Label
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
labels = append(labels, extraLabels...)
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
@@ -65,9 +67,7 @@ func insertRows(at *auth.Token, rows []csvimport.Row, extraLabels []prompb.Label
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.PushWithAuthToken(at, &ctx.WriteRequest)
rowsInserted.Add(len(rows))
if at != nil {
rowsTenantInserted.Get(at).Add(len(rows))

View File

@@ -0,0 +1,99 @@
package datadog
import (
"fmt"
"net/http"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadog"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadog"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadog"}`)
)
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
//
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
ce := req.Header.Get("Content-Encoding")
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
return insertRows(at, series, extraLabels)
})
})
}
func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range series {
ss := &series[i]
rowsTotal += len(ss.Points)
labelsLen := len(labels)
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: ss.Metric,
})
labels = append(labels, prompbmarshal.Label{
Name: "host",
Value: ss.Host,
})
for _, tag := range ss.Tags {
n := strings.IndexByte(tag, ':')
if n < 0 {
return fmt.Errorf("cannot find ':' in tag %q", tag)
}
name := tag[:n]
value := tag[n+1:]
if name == "host" {
name = "exported_host"
}
labels = append(labels, prompbmarshal.Label{
Name: name,
Value: value,
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
for _, pt := range ss.Points {
samples = append(samples, prompbmarshal.Sample{
Timestamp: pt.Timestamp(),
Value: pt.Value(),
})
}
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.PushWithAuthToken(at, &ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -1,97 +0,0 @@
package datadogsketches
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogsketches"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogsketches/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogsketches"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogsketches"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogsketches"}`)
)
// InsertHandlerForHTTP processes remote write for DataDog POST /api/beta/sketches request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
ce := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, ce, func(sketches []*datadogsketches.Sketch) error {
return insertRows(at, sketches, extraLabels)
})
}
func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for _, sketch := range sketches {
ms := sketch.ToSummary()
for _, m := range ms {
labelsLen := len(labels)
labels = append(labels, prompb.Label{
Name: "__name__",
Value: m.Name,
})
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
labels = append(labels, prompb.Label{
Name: "host",
Value: sketch.Host,
})
for _, label := range m.Labels {
labels = append(labels, prompb.Label{
Name: label.Name,
Value: label.Value,
})
}
for _, tag := range sketch.Tags {
name, value := datadogutil.SplitTag(tag)
labels = append(labels, prompb.Label{
Name: name,
Value: value,
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
for _, p := range m.Points {
samples = append(samples, prompb.Sample{
Timestamp: p.Timestamp,
Value: p.Value,
})
}
rowsTotal += len(m.Points)
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -1,99 +0,0 @@
package datadogv1
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv1"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv1"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv1"}`)
)
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
ce := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, ce, func(series []datadogv1.Series) error {
return insertRows(at, series, extraLabels)
})
}
func insertRows(at *auth.Token, series []datadogv1.Series, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range series {
ss := &series[i]
rowsTotal += len(ss.Points)
labelsLen := len(labels)
labels = append(labels, prompb.Label{
Name: "__name__",
Value: ss.Metric,
})
if ss.Host != "" {
labels = append(labels, prompb.Label{
Name: "host",
Value: ss.Host,
})
}
if ss.Device != "" {
labels = append(labels, prompb.Label{
Name: "device",
Value: ss.Device,
})
}
for _, tag := range ss.Tags {
name, value := datadogutil.SplitTag(tag)
if name == "host" {
name = "exported_host"
}
labels = append(labels, prompb.Label{
Name: name,
Value: value,
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
for _, pt := range ss.Points {
samples = append(samples, prompb.Sample{
Timestamp: pt.Timestamp(),
Value: pt.Value(),
})
}
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -1,102 +0,0 @@
package datadogv2
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv2"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv2"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv2"}`)
)
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v2/series request.
//
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
ct := req.Header.Get("Content-Type")
ce := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, ce, ct, func(series []datadogv2.Series) error {
return insertRows(at, series, extraLabels)
})
}
func insertRows(at *auth.Token, series []datadogv2.Series, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range series {
ss := &series[i]
rowsTotal += len(ss.Points)
labelsLen := len(labels)
labels = append(labels, prompb.Label{
Name: "__name__",
Value: ss.Metric,
})
for _, rs := range ss.Resources {
labels = append(labels, prompb.Label{
Name: rs.Type,
Value: rs.Name,
})
}
if ss.SourceTypeName != "" {
labels = append(labels, prompb.Label{
Name: "source_type_name",
Value: ss.SourceTypeName,
})
}
for _, tag := range ss.Tags {
name, value := datadogutil.SplitTag(tag)
if name == "host" {
name = "exported_host"
}
labels = append(labels, prompb.Label{
Name: name,
Value: value,
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
for _, pt := range ss.Points {
samples = append(samples, prompb.Sample{
Timestamp: pt.Timestamp * 1000,
Value: pt.Value,
})
}
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -1,8 +1,8 @@
ARG base_image=non-existing
ARG base_image
FROM $base_image
EXPOSE 8429
ENTRYPOINT ["/vmagent-prod"]
ARG src_binary=non-existing
ARG src_binary
COPY $src_binary ./vmagent-prod

View File

@@ -5,10 +5,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -21,12 +20,12 @@ var (
//
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
func InsertHandler(r io.Reader) error {
return stream.Parse(r, "", func(rows []parser.Row) error {
return insertRows(nil, rows)
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
}
func insertRows(at *auth.Token, rows []parser.Row) error {
func insertRows(rows []parser.Row) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
@@ -36,22 +35,22 @@ func insertRows(at *auth.Token, rows []parser.Row) error {
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
@@ -59,9 +58,7 @@ func insertRows(at *auth.Token, rows []parser.Row) error {
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return nil

View File

@@ -10,18 +10,19 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metric name if InfluxDB line contains only a single field")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field")
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
dbLabel = flag.String("influxDBLabel", "db", "Default label for the DB name sent over '?db={db_name}' query parameter")
)
@@ -35,9 +36,11 @@ var (
// InsertHandlerForReader processes remote write for influx line protocol.
//
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.Parse(r, encoding, true, "", "", func(db string, rows []influx.Row) error {
return insertRows(at, db, rows, nil)
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
return insertRows(nil, db, rows, nil)
})
})
}
@@ -45,22 +48,23 @@ func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error
//
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
q := req.URL.Query()
precision := q.Get("precision")
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
db := q.Get("db")
encoding := req.Header.Get("Content-Encoding")
isStreamMode := req.Header.Get("Stream-Mode") == "1"
return stream.Parse(req.Body, encoding, isStreamMode, precision, db, func(db string, rows []influx.Row) error {
return insertRows(at, db, rows, extraLabels)
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
q := req.URL.Query()
precision := q.Get("precision")
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
db := q.Get("db")
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
return insertRows(at, db, rows, extraLabels)
})
})
}
func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prompb.Label) error {
func insertRows(at *auth.Token, db string, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
@@ -80,13 +84,13 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
if tag.Key == *dbLabel {
hasDBKey = true
}
commonLabels = append(commonLabels, prompb.Label{
commonLabels = append(commonLabels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
if len(db) > 0 && !hasDBKey {
commonLabels = append(commonLabels, prompb.Label{
commonLabels = append(commonLabels, prompbmarshal.Label{
Name: *dbLabel,
Value: db,
})
@@ -110,16 +114,16 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
}
metricGroup := bytesutil.ToUnsafeString(buf[bufLen:])
labelsLen := len(labels)
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: metricGroup,
})
labels = append(labels, commonLabels...)
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Timestamp: r.Timestamp,
Value: f.Value,
})
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
@@ -130,9 +134,7 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
ctx.ctx.Labels = labels
ctx.ctx.Samples = samples
ctx.commonLabels = commonLabels
if !remotewrite.TryPush(at, &ctx.ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.PushWithAuthToken(at, &ctx.ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
@@ -144,7 +146,7 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
type pushCtx struct {
ctx common.PushCtx
commonLabels []prompb.Label
commonLabels []prompbmarshal.Label
metricGroupBuf []byte
buf []byte
}
@@ -160,15 +162,25 @@ func (ctx *pushCtx) reset() {
}
func getPushCtx() *pushCtx {
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
return &pushCtx{}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
pushCtxPool.Put(ctx)
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, cgroup.AvailableCPUs())

View File

@@ -8,34 +8,27 @@ import (
"net/http"
"os"
"strings"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogsketches"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv1"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv2"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/newrelic"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentelemetry"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/prometheusimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/influxutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/influxutils"
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
influxserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/influx"
opentsdbserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdb"
@@ -43,49 +36,26 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "TCP address to listen for incoming http requests. "+
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. "+
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . "+
"See also -influxListenAddr.useProxyProtocol")
influxUseProxyProtocol = flag.Bool("influxListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -influxListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. "+
"See also -graphiteListenAddr.useProxyProtocol")
graphiteUseProxyProtocol = flag.Bool("graphiteListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -graphiteListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpenTSDB metrics. "+
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write")
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
"Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol")
opentsdbUseProxyProtocol = flag.Bool("opentsdbListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. "+
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
"Usually :4242 must be set. Doesn't work if empty")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . "+
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag")
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 0, "The maximum number of labels per time series to be accepted. Series with superfluous labels are ignored. In this case the vm_rows_ignored_total{reason=\"too_many_labels\"} metric at /metrics page is incremented")
maxLabelNameLen = flag.Int("maxLabelNameLen", 0, "The maximum length of label names in the accepted time series. Series with longer label name are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_name\"} metric at /metrics page is incremented")
maxLabelValueLen = flag.Int("maxLabelValueLen", 0, "The maximum length of label values in the accepted time series. Series with longer label value are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_value\"} metric at /metrics page is incremented")
enableMultitenancyViaHeaders = flag.Bool("enableMultitenancyViaHeaders", false, "Enables multitenancy via HTTP headers. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy")
)
var (
@@ -102,15 +72,6 @@ var (
)
func main() {
// vmagent is optimized for reduced memory allocations,
// so it can run with the reduced GOGC in order to reduce the used memory,
// while keeping CPU usage spent in GC at low levels.
//
// Some workloads may need increased GOGC values. Then such values can be set via GOGC environment variable.
// It is recommended increasing GOGC if go_memstats_gc_cpu_fraction metric exposed at /metrics page
// exceeds 0.05 for extended periods of time.
cgroup.SetGOGC(50)
// Write flags and help message to stdout, since it is easier to grep or pipe.
flag.CommandLine.SetOutput(os.Stdout)
flag.Usage = usage
@@ -118,75 +79,63 @@ func main() {
remotewrite.InitSecretFlags()
buildinfo.Init()
logger.Init()
opentelemetry.Init()
timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen)
if promscrape.IsDryRun() {
if err := promscrape.CheckConfig(); err != nil {
logger.Fatalf("error when checking -promscrape.config: %s", err)
}
logger.Infof("-promscrape.config is ok; exiting with 0 status code")
logger.Infof("-promscrape.config is ok; exitting with 0 status code")
return
}
if *dryRun {
if err := promscrape.CheckConfig(); err != nil {
logger.Fatalf("error when checking -promscrape.config: %s", err)
}
if err := remotewrite.CheckRelabelConfigs(); err != nil {
logger.Fatalf("error when checking relabel configs: %s", err)
}
if err := remotewrite.CheckStreamAggrConfigs(); err != nil {
logger.Fatalf("error when checking -streamAggr.config and -remoteWrite.streamAggr.config: %s", err)
if err := promscrape.CheckConfig(); err != nil {
logger.Fatalf("error when checking -promscrape.config: %s", err)
}
logger.Infof("all the configs are ok; exiting with 0 status code")
logger.Infof("all the configs are ok; exitting with 0 status code")
return
}
listenAddrs := *httpListenAddrs
if len(listenAddrs) == 0 {
listenAddrs = []string{":8429"}
}
logger.Infof("starting vmagent at %q...", listenAddrs)
logger.Infof("starting vmagent at %q...", *httpListenAddr)
startTime := time.Now()
remotewrite.StartIngestionRateLimiter()
remotewrite.Init()
protoparserutil.StartUnmarshalWorkers()
common.StartUnmarshalWorkers()
writeconcurrencylimiter.Init()
if len(*influxListenAddr) > 0 {
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
return influx.InsertHandlerForReader(nil, r, "")
influxServer = influxserver.MustStart(*influxListenAddr, func(r io.Reader) error {
return influx.InsertHandlerForReader(r, false)
})
}
if len(*graphiteListenAddr) > 0 {
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, *graphiteUseProxyProtocol, graphite.InsertHandler)
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, graphite.InsertHandler)
}
if len(*opentsdbListenAddr) > 0 {
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, *opentsdbUseProxyProtocol, opentsdb.InsertHandler, httpInsertHandler)
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, opentsdb.InsertHandler, opentsdbhttp.InsertHandler)
}
if len(*opentsdbHTTPListenAddr) > 0 {
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, opentsdbhttp.InsertHandler)
}
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
promscrape.Init(remotewrite.Push)
go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{
UseProxyProtocol: useProxyProtocol,
})
if len(*httpListenAddr) > 0 {
go httpserver.Serve(*httpListenAddr, requestHandler)
}
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
pushmetrics.Init()
sig := procutil.WaitForSigterm()
logger.Infof("received signal %s", sig)
remotewrite.StopIngestionRateLimiter()
pushmetrics.Stop()
startTime = time.Now()
logger.Infof("gracefully shutting down webservice at %q", listenAddrs)
if err := httpserver.Stop(listenAddrs); err != nil {
logger.Fatalf("cannot stop the webservice: %s", err)
if len(*httpListenAddr) > 0 {
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
if err := httpserver.Stop(*httpListenAddr); err != nil {
logger.Fatalf("cannot stop the webservice: %s", err)
}
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
}
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
promscrape.Stop()
@@ -202,71 +151,26 @@ func main() {
if len(*opentsdbHTTPListenAddr) > 0 {
opentsdbhttpServer.MustStop()
}
protoparserutil.StopUnmarshalWorkers()
common.StopUnmarshalWorkers()
remotewrite.Stop()
logger.Infof("successfully stopped vmagent in %.3f seconds", time.Since(startTime).Seconds())
}
func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
if !remotewrite.MultitenancyEnabled() {
return func(req *http.Request) error {
path := strings.ReplaceAll(req.URL.Path, "//", "/")
if path != "/api/put" {
return fmt.Errorf("unsupported path requested: %q; expecting '/api/put'", path)
}
return opentsdbhttp.InsertHandler(nil, req)
}
}
return func(req *http.Request) error {
path := strings.ReplaceAll(req.URL.Path, "//", "/")
at, err := getAuthTokenFromPath(path, req.Header)
if err != nil {
return fmt.Errorf("cannot obtain auth token from path %q: %w", path, err)
}
return opentsdbhttp.InsertHandler(at, req)
}
}
func parsePath(path string, header http.Header) (*httpserver.Path, error) {
if *enableMultitenancyViaHeaders {
return httpserver.ParsePathAndHeaders(path, header)
}
return httpserver.ParsePath(path)
}
func getAuthTokenFromPath(path string, header http.Header) (*auth.Token, error) {
p, err := parsePath(path, header)
if err != nil {
return nil, fmt.Errorf("cannot parse multitenant path: %w", err)
}
if p.Prefix != "insert" {
return nil, fmt.Errorf(`unsupported multitenant prefix: %q; expected "insert"`, p.Prefix)
}
if p.Suffix != "opentsdb/api/put" {
return nil, fmt.Errorf("unsupported path requested: %q; expecting 'opentsdb/api/put'", p.Suffix)
}
return auth.NewTokenPossibleMultitenant(p.AuthToken)
}
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
if r.URL.Path == "/" {
if r.Method != http.MethodGet {
if r.Method != "GET" {
return false
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>vmagent</h2>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/vmagent.html'>https://docs.victoriametrics.com/vmagent.html</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
{"targets", "status for discovered active targets"},
{"service-discovery", "labels before and after relabeling for discovered targets"},
{"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"-/reload", "reload configuration"},
@@ -274,34 +178,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
path := strings.ReplaceAll(r.URL.Path, "//", "/")
if strings.HasPrefix(path, "/prometheus/api/v1/import/prometheus") || strings.HasPrefix(path, "/api/v1/import/prometheus") {
prometheusimportRequests.Inc()
if err := prometheusimport.InsertHandler(nil, r); err != nil {
prometheusimportErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
statusCode := http.StatusNoContent
if strings.HasPrefix(path, "/prometheus/api/v1/import/prometheus/metrics/job/") ||
strings.HasPrefix(path, "/api/v1/import/prometheus/metrics/job/") {
// Return 200 status code for pushgateway requests.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
statusCode = http.StatusOK
}
w.WriteHeader(statusCode)
return true
}
if strings.HasPrefix(path, "/datadog/") {
path := strings.Replace(r.URL.Path, "//", "/", -1)
if strings.HasPrefix(path, "datadog/") {
// Trim suffix from paths starting from /datadog/ in order to support legacy DataDog agent.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2670
path = strings.TrimSuffix(path, "/")
}
switch path {
case "/prometheus/api/v1/write", "/api/v1/write", "/api/v1/push", "/prometheus/api/v1/push":
if protoparserutil.HandleVMProtoServerHandshake(w, r) {
return true
}
case "/api/v1/write":
prometheusWriteRequests.Inc()
if err := promremotewrite.InsertHandler(nil, r); err != nil {
prometheusWriteErrors.Inc()
@@ -310,7 +194,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.WriteHeader(http.StatusNoContent)
return true
case "/prometheus/api/v1/import", "/api/v1/import":
case "/api/v1/import":
vmimportRequests.Inc()
if err := vmimport.InsertHandler(nil, r); err != nil {
vmimportErrors.Inc()
@@ -319,7 +203,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.WriteHeader(http.StatusNoContent)
return true
case "/prometheus/api/v1/import/csv", "/api/v1/import/csv":
case "/api/v1/import/csv":
csvimportRequests.Inc()
if err := csvimport.InsertHandler(nil, r); err != nil {
csvimportErrors.Inc()
@@ -328,7 +212,16 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.WriteHeader(http.StatusNoContent)
return true
case "/prometheus/api/v1/import/native", "/api/v1/import/native":
case "/api/v1/import/prometheus":
prometheusimportRequests.Inc()
if err := prometheusimport.InsertHandler(nil, r); err != nil {
prometheusimportErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
case "/api/v1/import/native":
nativeimportRequests.Inc()
if err := native.InsertHandler(nil, r); err != nil {
nativeimportErrors.Inc()
@@ -337,7 +230,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.WriteHeader(http.StatusNoContent)
return true
case "/influx/write", "/influx/api/v2/write", "/write", "/api/v2/write":
case "/write", "/api/v2/write":
influxWriteRequests.Inc()
if err := influx.InsertHandlerForHTTP(nil, r); err != nil {
influxWriteErrors.Inc()
@@ -346,72 +239,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.WriteHeader(http.StatusNoContent)
return true
case "/influx/query", "/query":
case "/query":
influxQueryRequests.Inc()
influxutil.WriteDatabaseNames(w)
return true
case "/influx/health":
influxHealthRequests.Inc()
influxutil.WriteHealthCheckResponse(w)
return true
case "/opentelemetry/api/v1/push", "/opentelemetry/v1/metrics":
opentelemetryPushRequests.Inc()
if err := opentelemetry.InsertHandler(nil, r); err != nil {
opentelemetryPushErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
firehose.WriteSuccessResponse(w, r)
return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/newrelic/inventory/deltas":
newrelicInventoryRequests.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
return true
case "/newrelic/infra/v2/metrics/events/bulk":
newrelicWriteRequests.Inc()
if err := newrelic.InsertHandlerForHTTP(nil, r); err != nil {
newrelicWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
influxutils.WriteDatabaseNames(w)
return true
case "/datadog/api/v1/series":
datadogv1WriteRequests.Inc()
if err := datadogv1.InsertHandlerForHTTP(nil, r); err != nil {
datadogv1WriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/datadog/api/v2/series":
datadogv2WriteRequests.Inc()
if err := datadogv2.InsertHandlerForHTTP(nil, r); err != nil {
datadogv2WriteErrors.Inc()
datadogWriteRequests.Inc()
if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
datadogWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
@@ -420,15 +255,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/datadog/api/beta/sketches":
datadogsketchesWriteRequests.Inc()
if err := datadogsketches.InsertHandlerForHTTP(nil, r); err != nil {
datadogsketchesWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(202)
return true
case "/datadog/api/v1/validate":
datadogValidateRequests.Inc()
// See https://docs.datadoghq.com/api/latest/authentication/#validate-api-key
@@ -452,31 +278,15 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, `{}`)
return true
case "/prometheus/targets", "/targets":
case "/targets":
promscrapeTargetsRequests.Inc()
promscrape.WriteHumanReadableTargetsStatus(w, r)
return true
case "/prometheus/service-discovery", "/service-discovery":
case "/service-discovery":
promscrapeServiceDiscoveryRequests.Inc()
promscrape.WriteServiceDiscovery(w, r)
return true
case "/prometheus/metric-relabel-debug", "/metric-relabel-debug":
promscrapeMetricRelabelDebugRequests.Inc()
promscrape.WriteMetricRelabelDebug(w, r)
return true
case "/prometheus/target-relabel-debug", "/target-relabel-debug":
promscrapeTargetRelabelDebugRequests.Inc()
promscrape.WriteTargetRelabelDebug(w, r)
return true
case "/prometheus/api/v1/targets", "/api/v1/targets":
promscrapeAPIV1TargetsRequests.Inc()
w.Header().Set("Content-Type", "application/json")
// https://prometheus.io/docs/prometheus/latest/querying/api/#targets
state := r.FormValue("state")
scrapePool := r.FormValue("scrapePool")
promscrape.WriteAPIV1Targets(w, state, scrapePool)
return true
case "/prometheus/target_response", "/target_response":
case "/target_response":
promscrapeTargetResponseRequests.Inc()
if err := promscrape.WriteTargetResponse(w, r); err != nil {
promscrapeTargetResponseErrors.Inc()
@@ -484,71 +294,48 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
return true
case "/prometheus/config", "/config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
case "/config":
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
return true
}
promscrapeConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
promscrape.WriteConfigData(w)
return true
case "/prometheus/api/v1/status/config", "/api/v1/status/config":
case "/api/v1/status/config":
// See https://prometheus.io/docs/prometheus/latest/querying/api/#config
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
return true
}
promscrapeStatusConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
promscrape.WriteConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%q}}`, bb.B)
return true
case "/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusRelabelConfigRequests.Inc()
case "/api/v1/targets":
promscrapeAPIV1TargetsRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
state := r.FormValue("state")
promscrape.WriteAPIV1Targets(w, state)
return true
case "/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteURLRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteURLRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/prometheus/-/reload", "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true
}
case "/-/reload":
promscrapeConfigReloadRequests.Inc()
procutil.SelfSIGHUP()
w.WriteHeader(http.StatusOK)
return true
case "/ready":
if rdy := promscrape.PendingScrapeConfigs.Load(); rdy > 0 {
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)
http.Error(w, errMsg, http.StatusTooEarly)
} else {
@@ -570,35 +357,18 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path string) bool {
p, err := parsePath(path, r.Header)
p, err := httpserver.ParsePath(path)
if err != nil {
// Cannot parse multitenant path. Skip it - probably it will be parsed later.
return false
}
if p.Prefix != "insert" {
// processMultitenantRequest is called for all unmatched path variants,
// but we should try parsing only /insert prefixed to avoid catching all possible paths.
return false
}
at, err := auth.NewTokenPossibleMultitenant(p.AuthToken)
if err != nil {
httpserver.Errorf(w, r, "cannot obtain auth token: %s", err)
httpserver.Errorf(w, r, `unsupported multitenant prefix: %q; expected "insert"`, p.Prefix)
return true
}
if strings.HasPrefix(p.Suffix, "prometheus/api/v1/import/prometheus") {
prometheusimportRequests.Inc()
if err := prometheusimport.InsertHandler(at, r); err != nil {
prometheusimportErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
statusCode := http.StatusNoContent
if strings.HasPrefix(p.Suffix, "prometheus/api/v1/import/prometheus/metrics/job/") {
// Return 200 status code for pushgateway requests.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
statusCode = http.StatusOK
}
w.WriteHeader(statusCode)
at, err := auth.NewToken(p.AuthToken)
if err != nil {
httpserver.Errorf(w, r, "cannot obtain auth token: %s", err)
return true
}
if strings.HasPrefix(p.Suffix, "datadog/") {
@@ -607,7 +377,7 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
p.Suffix = strings.TrimSuffix(p.Suffix, "/")
}
switch p.Suffix {
case "prometheus/", "prometheus", "prometheus/api/v1/write", "prometheus/api/v1/push":
case "prometheus/", "prometheus", "prometheus/api/v1/write":
prometheusWriteRequests.Inc()
if err := promremotewrite.InsertHandler(at, r); err != nil {
prometheusWriteErrors.Inc()
@@ -634,6 +404,15 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
}
w.WriteHeader(http.StatusNoContent)
return true
case "prometheus/api/v1/import/prometheus":
prometheusimportRequests.Inc()
if err := prometheusimport.InsertHandler(at, r); err != nil {
prometheusimportErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
case "prometheus/api/v1/import/native":
nativeimportRequests.Inc()
if err := native.InsertHandler(at, r); err != nil {
@@ -654,69 +433,12 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
return true
case "influx/query":
influxQueryRequests.Inc()
influxutil.WriteDatabaseNames(w)
return true
case "influx/health":
influxHealthRequests.Inc()
influxutil.WriteHealthCheckResponse(w)
return true
case "opentelemetry/api/v1/push", "opentelemetry/v1/metrics":
opentelemetryPushRequests.Inc()
if err := opentelemetry.InsertHandler(at, r); err != nil {
opentelemetryPushErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
firehose.WriteSuccessResponse(w, r)
return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "newrelic/inventory/deltas":
newrelicInventoryRequests.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
return true
case "newrelic/infra/v2/metrics/events/bulk":
newrelicWriteRequests.Inc()
if err := newrelic.InsertHandlerForHTTP(at, r); err != nil {
newrelicWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
influxutils.WriteDatabaseNames(w)
return true
case "datadog/api/v1/series":
datadogv1WriteRequests.Inc()
if err := datadogv1.InsertHandlerForHTTP(at, r); err != nil {
datadogv1WriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "datadog/api/v2/series":
datadogv2WriteRequests.Inc()
if err := datadogv2.InsertHandlerForHTTP(at, r); err != nil {
datadogv2WriteErrors.Inc()
datadogWriteRequests.Inc()
if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
datadogWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
@@ -724,15 +446,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "datadog/api/beta/sketches":
datadogsketchesWriteRequests.Inc()
if err := datadogsketches.InsertHandlerForHTTP(at, r); err != nil {
datadogsketchesWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(202)
return true
case "datadog/api/v1/validate":
datadogValidateRequests.Inc()
// See https://docs.datadoghq.com/api/latest/authentication/#validate-api-key
@@ -781,42 +494,19 @@ var (
influxWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/write", protocol="influx"}`)
influxWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/influx/write", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
influxHealthRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/health", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
datadogv1WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogv1WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogv2WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v2/series", protocol="datadog"}`)
datadogv2WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v2/series", protocol="datadog"}`)
datadogsketchesWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/beta/sketches", protocol="datadog"}`)
datadogsketchesWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/beta/sketches", protocol="datadog"}`)
datadogWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogValidateRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/validate", protocol="datadog"}`)
datadogCheckRunRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/check_run", protocol="datadog"}`)
datadogIntakeRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/intake", protocol="datadog"}`)
datadogMetadataRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/metadata", protocol="datadog"}`)
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicInventoryRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/inventory/deltas", protocol="newrelic"}`)
newrelicCheckRequest = metrics.NewCounter(`vm_http_requests_total{path="/newrelic", protocol="newrelic"}`)
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
promscrapeServiceDiscoveryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/service-discovery"}`)
promscrapeMetricRelabelDebugRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/metric-relabel-debug"}`)
promscrapeTargetRelabelDebugRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/target-relabel-debug"}`)
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
promscrapeTargetResponseRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/target_response"}`)
promscrapeTargetResponseErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/target_response"}`)
@@ -824,12 +514,6 @@ var (
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
)
@@ -837,7 +521,7 @@ func usage() {
const s = `
vmagent collects metrics data via popular data ingestion protocols and routes it to VictoriaMetrics.
See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
See the docs at https://docs.victoriametrics.com/vmagent.html .
`
flagutil.Usage(s)
}

View File

@@ -1,13 +1,12 @@
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
ARG certs_image=non-existing
ARG root_image=non-existing
FROM $certs_image AS certs
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
ARG certs_image
ARG root_image
FROM $certs_image as certs
RUN apk --update --no-cache add ca-certificates
FROM $root_image
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
EXPOSE 8429
ENTRYPOINT ["/vmagent-prod"]
ARG TARGETARCH
ARG BINARY_SUFFIX=non-existing
COPY vmagent-linux-${TARGETARCH}-prod${BINARY_SUFFIX} ./vmagent-prod
COPY vmagent-${TARGETARCH}-prod ./vmagent-prod

View File

@@ -8,10 +8,11 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -25,17 +26,19 @@ var (
//
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(block *stream.Block) error {
return insertRows(at, block, extraLabels)
isGzip := req.Header.Get("Content-Encoding") == "gzip"
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
return insertRows(at, block, extraLabels)
})
})
}
func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompb.Label) error {
func insertRows(at *auth.Token, block *parser.Block, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
@@ -53,13 +56,13 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompb.Label)
samples := ctx.Samples[:0]
mn := &block.MetricName
labelsLen := len(labels)
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: bytesutil.ToUnsafeString(mn.MetricGroup),
})
for j := range mn.Tags {
tag := &mn.Tags[j]
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
@@ -72,20 +75,18 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompb.Label)
}
samplesLen := len(samples)
for j, value := range values {
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: value,
Timestamp: timestamps[j],
})
}
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.PushWithAuthToken(at, &ctx.WriteRequest)
return nil
}

View File

@@ -1,87 +0,0 @@
package newrelic
import (
"net/http"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="newrelic"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="newrelic"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="newrelic"}`)
)
// InsertHandlerForHTTP processes remote write for NewRelic POST /infra/v2/metrics/events/bulk request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []newrelic.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
samplesCount := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
tags := r.Tags
srcSamples := r.Samples
for j := range srcSamples {
s := &srcSamples[j]
labelsLen := len(labels)
labels = append(labels, prompb.Label{
Name: "__name__",
Value: bytesutil.ToUnsafeString(s.Name),
})
for k := range tags {
t := &tags[k]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(t.Key),
Value: bytesutil.ToUnsafeString(t.Value),
})
}
samples = append(samples, prompb.Sample{
Value: s.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
labels = append(labels, extraLabels...)
}
samplesCount += len(srcSamples)
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(samplesCount)
if at != nil {
rowsTenantInserted.Get(at).Add(samplesCount)
}
rowsPerInsert.Update(float64(samplesCount))
return nil
}

View File

@@ -1,102 +0,0 @@
package opentelemetry
import (
"fmt"
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="opentelemetry"}`)
metadataInserted = metrics.NewCounter(`vmagent_metadata_inserted_total{type="opentelemetry"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="opentelemetry"}`)
metadataTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_metadata_total{type="opentelemetry"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
)
// Init must be called after flag.Parse and before using the opentelemetry package.
func Init() {
stream.InitDecodeOptions()
}
// InsertHandlerForReader processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, nil)
})
}
// InsertHandler processes opentelemetry metrics.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
var processBody func([]byte) ([]byte, error)
if req.Header.Get("Content-Type") == "application/json" {
if req.Header.Get("X-Amz-Firehose-Protocol-Version") != "" {
processBody = firehose.ProcessRequestBody
} else {
return fmt.Errorf("json encoding isn't supported for opentelemetry format. Use protobuf encoding")
}
}
return stream.ParseStream(req.Body, encoding, processBody, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, extraLabels)
})
}
func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range tss {
ts := &tss[i]
rowsTotal += len(ts.Samples)
labelsLen := len(labels)
labels = append(labels, ts.Labels...)
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, ts.Samples...)
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
ctx.WriteRequest.Metadata = mms
metadataTotal = len(mms)
}
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
metadataInserted.Add(metadataTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
metadataTenantInserted.Get(at).Add(metadataTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -5,9 +5,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -20,7 +20,9 @@ var (
//
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
func InsertHandler(r io.Reader) error {
return stream.Parse(r, insertRows)
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
}
func insertRows(rows []parser.Row) error {
@@ -33,22 +35,22 @@ func insertRows(rows []parser.Row) error {
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
@@ -56,9 +58,7 @@ func insertRows(rows []parser.Row) error {
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(nil, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return nil

View File

@@ -5,11 +5,10 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -20,17 +19,19 @@ var (
// InsertHandler processes HTTP OpenTSDB put requests.
// See http://opentsdb.net/docs/build/html/api_http/put.html
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
func InsertHandler(req *http.Request) error {
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
return stream.Parse(req, func(rows []opentsdbhttp.Row) error {
return insertRows(at, rows, extraLabels)
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(rows []parser.Row) error {
return insertRows(rows, extraLabels)
})
})
}
func insertRows(at *auth.Token, rows []opentsdbhttp.Row, extraLabels []prompb.Label) error {
func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
@@ -40,23 +41,23 @@ func insertRows(at *auth.Token, rows []opentsdbhttp.Row, extraLabels []prompb.La
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
labels = append(labels, extraLabels...)
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
@@ -64,9 +65,7 @@ func insertRows(at *auth.Token, rows []opentsdbhttp.Row, extraLabels []prompb.La
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return nil

View File

@@ -1,102 +1,91 @@
package prometheusimport
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="prometheus"}`)
metadataInserted = metrics.NewCounter(`vmagent_metadata_inserted_total{type="prometheus"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="prometheus"}`)
metadataTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_metadata_total{type="prometheus"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="prometheus"}`)
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="prometheus"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="prometheus"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="prometheus"}`)
)
// InsertHandler processes `/api/v1/import/prometheus` request.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
defaultTimestamp, err := protoparserutil.GetTimestamp(req)
defaultTimestamp, err := parserCommon.GetTimestamp(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return insertRows(at, rows, mms, extraLabels)
}, func(s string) {
httpserver.LogError(req, s)
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
}, nil)
})
}
func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata, extraLabels []prompb.Label) error {
// InsertHandlerForReader processes metrics from given reader with optional gzip format
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, 0, isGzipped, func(rows []parser.Row) error {
return insertRows(nil, rows, nil)
}, nil)
})
}
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
tssDst := ctx.WriteRequest.Timeseries[:0]
mmsDst := ctx.WriteRequest.Metadata[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
labels = append(labels, extraLabels...)
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
}
for i := range mms {
mm := &mms[i]
mmsDst = append(mmsDst, prompb.MetricMetadata{
MetricFamilyName: mm.Metric,
Help: mm.Help,
Type: mm.Type,
// there is no unit in Prometheus exposition formats
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.WriteRequest.Metadata = mmsDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.PushWithAuthToken(at, &ctx.WriteRequest)
rowsInserted.Add(len(rows))
metadataInserted.Add(len(mms))
if at != nil {
rowsTenantInserted.Get(at).Add(len(rows))
metadataTenantInserted.Get(at).Add(len(mms))
}
rowsPerInsert.Update(float64(len(rows)))
return nil

View File

@@ -1,60 +0,0 @@
package prometheusimport
import (
"bytes"
"flag"
"log"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
)
var (
srv *httptest.Server
testOutput *bytes.Buffer
)
func TestInsertHandler(t *testing.T) {
setUp()
defer tearDown()
req := httptest.NewRequest(http.MethodPost, "/insert/0/api/v1/import/prometheus", bytes.NewBufferString(`{"foo":"bar"}
go_memstats_alloc_bytes_total 1`))
if err := InsertHandler(nil, req); err != nil {
t.Fatalf("unexpected error %s", err)
}
expectedMsg := "cannot unmarshal Prometheus line"
if !strings.Contains(testOutput.String(), expectedMsg) {
t.Fatalf("output %q should contain %q", testOutput.String(), expectedMsg)
}
}
func setUp() {
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(204)
}))
flag.Parse()
remoteWriteFlag := "remoteWrite.url"
if err := flag.Lookup(remoteWriteFlag).Value.Set(srv.URL); err != nil {
log.Fatalf("unable to set %q with value %q, err: %v", remoteWriteFlag, srv.URL, err)
}
logger.Init()
protoparserutil.StartUnmarshalWorkers()
remotewrite.Init()
testOutput = &bytes.Buffer{}
logger.SetOutputForTests(testOutput)
}
func tearDown() {
protoparserutil.StopUnmarshalWorkers()
srv.Close()
logger.ResetOutputForTest()
tmpDataDir := flag.Lookup("remoteWrite.tmpDataPath").Value.String()
fs.MustRemoveDir(tmpDataDir)
}

View File

@@ -1,46 +1,56 @@
package promremotewrite
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="promremotewrite"}`)
metadataInserted = metrics.NewCounter(`vmagent_metadata_inserted_total{type="promremotewrite"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="promremotewrite"}`)
metadataTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_metadata_total{type="promremotewrite"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="promremotewrite"}`)
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="promremotewrite"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="promremotewrite"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="promremotewrite"}`)
)
// InsertHandler processes remote write for prometheus.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
isVMRemoteWrite := req.Header.Get("Content-Encoding") == "zstd"
return stream.Parse(req.Body, isVMRemoteWrite, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, extraLabels)
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
return insertRows(at, tss, extraLabels)
})
})
}
func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabels []prompb.Label) error {
// InsertHandlerForReader processes metrics from given reader
func InsertHandlerForReader(at *auth.Token, r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, func(tss []prompb.TimeSeries) error {
return insertRows(at, tss, nil)
})
})
}
func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
mmsDst := ctx.WriteRequest.Metadata[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range timeseries {
@@ -49,56 +59,33 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
labelsLen := len(labels)
for i := range ts.Labels {
label := &ts.Labels[i]
labels = append(labels, prompb.Label{
Name: label.Name,
Value: label.Value,
labels = append(labels, prompbmarshal.Label{
Name: bytesutil.ToUnsafeString(label.Name),
Value: bytesutil.ToUnsafeString(label.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
for i := range ts.Samples {
sample := &ts.Samples[i]
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: sample.Value,
Timestamp: sample.Timestamp,
})
}
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
for i := range mms {
mm := &mms[i]
mmsDst = append(mmsDst, prompb.MetricMetadata{
MetricFamilyName: mm.MetricFamilyName,
Help: mm.Help,
Type: mm.Type,
Unit: mm.Unit,
AccountID: mm.AccountID,
ProjectID: mm.ProjectID,
})
}
ctx.WriteRequest.Metadata = mmsDst
metadataTotal = len(mms)
}
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.PushWithAuthToken(at, &ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
metadataTenantInserted.Get(at).Add(metadataTotal)
}
metadataInserted.Add(metadataTotal)
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -2,112 +2,91 @@ package remotewrite
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/metrics"
)
var (
forcePromProto = flagutil.NewArrayBool("remoteWrite.forcePromProto", "Whether to force Prometheus remote write protocol for sending data "+
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol")
forceVMProto = flagutil.NewArrayBool("remoteWrite.forceVMProto", "Whether to force VictoriaMetrics remote write protocol for sending data "+
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol")
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", "Optional rate limit in bytes per second for data sent to -remoteWrite.url. "+
"By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
"is sent after temporary unavailability of the remote storage")
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", "Timeout for sending a single block of data to -remoteWrite.url")
proxyURL = flagutil.NewArray("remoteWrite.proxyURL", "Optional proxy URL for writing data to -remoteWrite.url. Supported proxies: http, https, socks5. "+
"Example: -remoteWrite.proxyURL=socks5://proxy:1234")
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", 0, "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
"is sent after temporary unavailability of the remote storage. See also -maxIngestionRate")
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
retryMinInterval = flagutil.NewArrayDuration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
// deprecated in the future. use -remoteWrite.retryMaxInterval instead
retryMaxTime = flagutil.NewArrayDuration("remoteWrite.retryMaxTime", time.Minute, "The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. This flag is deprecated, use -remoteWrite.retryMaxInterval instead")
retryMaxInterval = flagutil.NewArrayDuration("remoteWrite.retryMaxInterval", time.Minute, "The maximum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. The delay doubles with each retry until this maximum is reached, after which it remains constant. See also -remoteWrite.retryMinInterval")
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
tlsInsecureSkipVerify = flagutil.NewArrayBool("remoteWrite.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to -remoteWrite.url")
tlsCertFile = flagutil.NewArray("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
tlsKeyFile = flagutil.NewArray("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
tlsCAFile = flagutil.NewArray("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
"By default system CA is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
tlsServerName = flagutil.NewArray("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to -remoteWrite.url. "+
"By default the server name from -remoteWrite.url is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
tlsHandshakeTimeout = flagutil.NewArrayDuration("remoteWrite.tlsHandshakeTimeout", 20*time.Second, "The timeout for establishing tls connections to the corresponding -remoteWrite.url")
tlsInsecureSkipVerify = flagutil.NewArrayBool("remoteWrite.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to the corresponding -remoteWrite.url")
tlsCertFile = flagutil.NewArrayString("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting "+
"to the corresponding -remoteWrite.url")
tlsKeyFile = flagutil.NewArrayString("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url")
tlsCAFile = flagutil.NewArrayString("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. "+
"By default, system CA is used")
tlsServerName = flagutil.NewArrayString("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to the corresponding -remoteWrite.url. "+
"By default, the server name from -remoteWrite.url is used")
basicAuthUsername = flagutil.NewArray("remoteWrite.basicAuth.username", "Optional basic auth username to use for -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
basicAuthPassword = flagutil.NewArray("remoteWrite.basicAuth.password", "Optional basic auth password to use for -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
basicAuthPasswordFile = flagutil.NewArray("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for -remoteWrite.url. "+
"The file is re-read every second. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
bearerToken = flagutil.NewArray("remoteWrite.bearerToken", "Optional bearer auth token to use for -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
bearerTokenFile = flagutil.NewArray("remoteWrite.bearerTokenFile", "Optional path to bearer token file to use for -remoteWrite.url. "+
"The token is re-read from the file every second. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
headers = flagutil.NewArrayString("remoteWrite.headers", "Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. "+
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. "+
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
oauth2ClientID = flagutil.NewArray("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
oauth2ClientSecret = flagutil.NewArray("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
oauth2ClientSecretFile = flagutil.NewArray("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
oauth2TokenURL = flagutil.NewArray("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for -remoteWrite.url. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
oauth2Scopes = flagutil.NewArray("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for -remoteWrite.url. Scopes must be delimited by ';'. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
basicAuthUsernameFile = flagutil.NewArrayString("remoteWrite.basicAuth.usernameFile", "Optional path to basic auth username to use for the corresponding -remoteWrite.url. "+
"The file is re-read every second")
basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
"The file is re-read every second")
bearerToken = flagutil.NewArrayString("remoteWrite.bearerToken", "Optional bearer auth token to use for the corresponding -remoteWrite.url")
bearerTokenFile = flagutil.NewArrayString("remoteWrite.bearerTokenFile", "Optional path to bearer token file to use for the corresponding -remoteWrite.url. "+
"The token is re-read from the file every second")
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
oauth2EndpointParams = flagutil.NewArrayString("remoteWrite.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . "+
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
awsUseSigv4 = flagutil.NewArrayBool("remoteWrite.aws.useSigv4", "Enables SigV4 request signing for the corresponding -remoteWrite.url. "+
"It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled")
awsEC2Endpoint = flagutil.NewArrayString("remoteWrite.aws.ec2Endpoint", "Optional AWS EC2 API endpoint to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
awsSTSEndpoint = flagutil.NewArrayString("remoteWrite.aws.stsEndpoint", "Optional AWS STS API endpoint to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
awsRegion = flagutil.NewArrayString("remoteWrite.aws.region", "Optional AWS region to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
awsRoleARN = flagutil.NewArrayString("remoteWrite.aws.roleARN", "Optional AWS roleARN to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
awsAccessKey = flagutil.NewArrayString("remoteWrite.aws.accessKey", "Optional AWS AccessKey to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
awsService = flagutil.NewArrayString("remoteWrite.aws.service", "Optional AWS Service to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
"Defaults to \"aps\"")
awsSecretKey = flagutil.NewArrayString("remoteWrite.aws.secretKey", "Optional AWS SecretKey to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
awsUseSigv4 = flagutil.NewArrayBool("remoteWrite.aws.useSigv4", "Enables SigV4 request signing for -remoteWrite.url. "+
"It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
awsRegion = flagutil.NewArray("remoteWrite.aws.region", "Optional AWS region to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
awsRoleARN = flagutil.NewArray("remoteWrite.aws.roleARN", "Optional AWS roleARN to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
awsAccessKey = flagutil.NewArray("remoteWrite.aws.accessKey", "Optional AWS AccessKey to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
awsService = flagutil.NewArray("remoteWrite.aws.serice", "Optional AWS Service to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url. Defaults to \"aps\".")
awsSecretKey = flagutil.NewArray("remoteWrite.aws.secretKey", "Optional AWS SecretKey to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
)
type client struct {
sanitizedURL string
remoteWriteURL string
// Whether to use VictoriaMetrics remote write protocol for sending the data to remoteWriteURL
useVMProto atomic.Bool
canDowngradeVMProto atomic.Bool
fq *persistentqueue.FastQueue
hc *http.Client
retryMinInterval time.Duration
retryMaxInterval time.Duration
fq *persistentqueue.FastQueue
hc *http.Client
sendBlock func(block []byte) bool
authCfg *promauth.Config
awsCfg *awsapi.Config
rl *ratelimiter.RateLimiter
rl rateLimiter
bytesSent *metrics.Counter
blocksSent *metrics.Counter
@@ -126,20 +105,22 @@ type client struct {
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
authCfg, err := getAuthConfig(argIdx)
if err != nil {
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
}
tlsCfg := authCfg.NewTLSConfig()
awsCfg, err := getAWSAPIConfig(argIdx)
if err != nil {
logger.Fatalf("cannot initialize AWS Config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
logger.Fatalf("FATAL: cannot initialize AWS Config for remoteWrite.url=%q: %s", remoteWriteURL, err)
}
tr := &http.Transport{
DialContext: statDial,
TLSClientConfig: tlsCfg,
TLSHandshakeTimeout: 10 * time.Second,
MaxConnsPerHost: 2 * concurrency,
MaxIdleConnsPerHost: 2 * concurrency,
IdleConnTimeout: time.Minute,
WriteBufferSize: 64 * 1024,
}
tr := httputil.NewTransport(false, "vmagent_remotewrite")
tr.TLSHandshakeTimeout = tlsHandshakeTimeout.GetOptionalArg(argIdx)
tr.MaxConnsPerHost = 2 * concurrency
tr.MaxIdleConnsPerHost = 2 * concurrency
tr.IdleConnTimeout = time.Minute
tr.WriteBufferSize = 64 * 1024
pURL := proxyURL.GetOptionalArg(argIdx)
if len(pURL) > 0 {
if !strings.Contains(pURL, "://") {
@@ -151,52 +132,33 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
}
tr.Proxy = http.ProxyURL(pu)
}
hc := &http.Client{
Transport: authCfg.NewRoundTripper(tr),
Timeout: sendTimeout.GetOptionalArg(argIdx),
}
retryMaxIntervalFlag := retryMaxTime
if retryMaxInterval.String() != "" {
retryMaxIntervalFlag = retryMaxInterval
}
c := &client{
sanitizedURL: sanitizedURL,
remoteWriteURL: remoteWriteURL,
authCfg: authCfg,
awsCfg: awsCfg,
fq: fq,
hc: hc,
retryMinInterval: retryMinInterval.GetOptionalArg(argIdx),
retryMaxInterval: retryMaxIntervalFlag.GetOptionalArg(argIdx),
stopCh: make(chan struct{}),
sanitizedURL: sanitizedURL,
remoteWriteURL: remoteWriteURL,
authCfg: authCfg,
awsCfg: awsCfg,
fq: fq,
hc: &http.Client{
Transport: tr,
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
},
stopCh: make(chan struct{}),
}
c.sendBlock = c.sendBlockHTTP
useVMProto := forceVMProto.GetOptionalArg(argIdx)
usePromProto := forcePromProto.GetOptionalArg(argIdx)
if useVMProto && usePromProto {
logger.Fatalf("-remoteWrite.useVMProto and -remoteWrite.usePromProto cannot be set simultaneously for -remoteWrite.url=%s", sanitizedURL)
}
if !useVMProto && !usePromProto {
// The VM protocol could be downgraded later at runtime if unsupported media type response status is received.
useVMProto = true
c.canDowngradeVMProto.Store(true)
}
c.useVMProto.Store(useVMProto)
return c
}
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
limitReached := metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
if bytesPerSec := rateLimit.GetOptionalArg(argIdx); bytesPerSec > 0 {
if bytesPerSec := rateLimit.GetOptionalArgOrDefault(argIdx, 0); bytesPerSec > 0 {
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
c.rl = ratelimiter.New(int64(bytesPerSec), limitReached, c.stopCh)
c.rl.perSecondLimit = int64(bytesPerSec)
}
c.rl.limitReached = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL))
c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL))
c.rateLimit = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_rate_limit{url=%q}`, c.sanitizedURL), func() float64 {
return float64(rateLimit.GetOptionalArg(argIdx))
return float64(rateLimit.GetOptionalArgOrDefault(argIdx, 0))
})
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL))
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL))
@@ -204,11 +166,12 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL))
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(concurrency)
})
for range concurrency {
c.wg.Go(c.runWorker)
for i := 0; i < concurrency; i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
}
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
}
@@ -220,20 +183,13 @@ func (c *client) MustStop() {
}
func getAuthConfig(argIdx int) (*promauth.Config, error) {
headersValue := headers.GetOptionalArg(argIdx)
var hdrs []string
if headersValue != "" {
hdrs = strings.Split(headersValue, "^^")
}
username := basicAuthUsername.GetOptionalArg(argIdx)
usernameFile := basicAuthUsernameFile.GetOptionalArg(argIdx)
password := basicAuthPassword.GetOptionalArg(argIdx)
passwordFile := basicAuthPasswordFile.GetOptionalArg(argIdx)
var basicAuthCfg *promauth.BasicAuthConfig
if username != "" || usernameFile != "" || password != "" || passwordFile != "" {
if username != "" || password != "" || passwordFile != "" {
basicAuthCfg = &promauth.BasicAuthConfig{
Username: username,
UsernameFile: usernameFile,
Password: promauth.NewSecret(password),
PasswordFile: passwordFile,
}
@@ -246,16 +202,10 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
clientSecret := oauth2ClientSecret.GetOptionalArg(argIdx)
clientSecretFile := oauth2ClientSecretFile.GetOptionalArg(argIdx)
if clientSecretFile != "" || clientSecret != "" {
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(argIdx)
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
if err != nil {
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
}
oauth2Cfg = &promauth.OAuth2Config{
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
ClientSecret: promauth.NewSecret(clientSecret),
ClientSecretFile: clientSecretFile,
EndpointParams: endpointParams,
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
}
@@ -269,17 +219,9 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
InsecureSkipVerify: tlsInsecureSkipVerify.GetOptionalArg(argIdx),
}
opts := &promauth.Options{
BasicAuth: basicAuthCfg,
BearerToken: token,
BearerTokenFile: tokenFile,
OAuth2: oauth2Cfg,
TLSConfig: tlsCfg,
Headers: hdrs,
}
authCfg, err := opts.NewConfig()
authCfg, err := promauth.NewConfig(".", nil, basicAuthCfg, token, tokenFile, oauth2Cfg, tlsCfg)
if err != nil {
return nil, fmt.Errorf("cannot populate auth config for remoteWrite idx: %d, err: %w", argIdx, err)
return nil, fmt.Errorf("cannot populate OAuth2 config for remoteWrite idx: %d, err: %w", argIdx, err)
}
return authCfg, nil
}
@@ -288,14 +230,12 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
if !awsUseSigv4.GetOptionalArg(argIdx) {
return nil, nil
}
ec2Endpoint := awsEC2Endpoint.GetOptionalArg(argIdx)
stsEndpoint := awsSTSEndpoint.GetOptionalArg(argIdx)
region := awsRegion.GetOptionalArg(argIdx)
roleARN := awsRoleARN.GetOptionalArg(argIdx)
accessKey := awsAccessKey.GetOptionalArg(argIdx)
secretKey := awsSecretKey.GetOptionalArg(argIdx)
service := awsService.GetOptionalArg(argIdx)
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service, "")
cfg, err := awsapi.NewConfig(region, roleARN, accessKey, secretKey, service)
if err != nil {
return nil, err
}
@@ -323,163 +263,102 @@ func (c *client) runWorker() {
continue
}
// Return unsent block to the queue.
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
c.fq.MustWriteBlock(block)
return
case <-c.stopCh:
// c must be stopped. Wait up to 5 seconds for the in-flight request to complete.
// If it succeeds, drain the remaining in-memory queue before returning.
stopCtx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
// c must be stopped. Wait for a while in the hope the block will be sent.
graceDuration := 5 * time.Second
select {
case ok := <-ch:
if !ok {
// Return unsent block to the queue.
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
} else {
c.drainInMemoryQueue(stopCtx, block[:0])
c.fq.MustWriteBlock(block)
}
case <-stopCtx.Done():
case <-time.After(graceDuration):
// Return unsent block to the queue.
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
c.fq.MustWriteBlock(block)
}
return
}
}
}
func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
req, err := c.newRequest(url, body)
if err != nil {
return nil, err
// sendBlockHTTP returns false only if c.stopCh is closed.
// Otherwise it tries sending the block to remote storage indefinitely.
func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.register(len(block), c.stopCh)
retryDuration := time.Second
retriesCount := 0
c.bytesSent.Add(len(block))
c.blocksSent.Inc()
sigv4Hash := ""
if c.awsCfg != nil {
sigv4Hash = awsapi.HashHex(block)
}
resp, err := c.hc.Do(req)
if err == nil {
return resp, nil
}
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
return nil, err
}
// It is likely connection became stale or timed out during the first request.
// Make another attempt in hope request will succeed.
// If not, the error should be handled by the caller as usual.
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
req, err = c.newRequest(url, body)
if err != nil {
return nil, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("second attempt: %w", err)
}
return resp, nil
}
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
reqBody := bytes.NewBuffer(body)
req, err := http.NewRequest(http.MethodPost, url, reqBody)
again:
req, err := http.NewRequest("POST", c.remoteWriteURL, bytes.NewBuffer(block))
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
}
err = c.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.sanitizedURL, err)
}
h := req.Header
h.Set("User-Agent", "vmagent")
h.Set("Content-Type", "application/x-protobuf")
if encoding.IsZstd(body) {
h.Set("Content-Encoding", "zstd")
h.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
} else {
h.Set("Content-Encoding", "snappy")
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
h.Set("Content-Encoding", "snappy")
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
if ah := c.authCfg.GetAuthHeader(); ah != "" {
req.Header.Set("Authorization", ah)
}
if c.awsCfg != nil {
sigv4Hash := awsapi.HashHex(body)
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
return nil, fmt.Errorf("cannot sign remoteWrite request with AWS sigv4: %w", err)
// there is no need in retry, request will be rejected by client.Do and retried by code below
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
}
}
return req, nil
}
// sendBlockHTTP sends the given block to c.remoteWriteURL.
//
// The function returns false only if c.stopCh is closed.
// Otherwise, it tries sending the block to remote storage indefinitely.
func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.Register(len(block))
bt := timeutil.NewBackoffTimer(c.retryMinInterval, c.retryMaxInterval)
retriesCount := 0
again:
startTime := time.Now()
resp, err := c.doRequest(c.remoteWriteURL, block)
resp, err := c.hc.Do(req)
c.requestDuration.UpdateDuration(startTime)
if err != nil {
c.errorsCount.Inc()
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %s",
len(block), c.sanitizedURL, err, bt.CurrentDelay())
if !bt.Wait(c.stopCh) {
retryDuration *= 2
if retryDuration > time.Minute {
retryDuration = time.Minute
}
logger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
len(block), c.sanitizedURL, err, retryDuration.Seconds())
t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
return false
case <-t.C:
timerpool.Put(t)
}
c.retriesCount.Inc()
goto again
}
statusCode := resp.StatusCode
if statusCode/100 == 2 {
_ = resp.Body.Close()
c.requestsOKCount.Inc()
c.bytesSent.Add(len(block))
c.blocksSent.Inc()
return true
}
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.sanitizedURL, statusCode)).Inc()
switch statusCode {
case 409:
logBlockRejected(block, c.sanitizedURL, resp)
// Just drop block on 409 status code like Prometheus does.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/873
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149
if statusCode == 409 || statusCode == 400 {
body, err := ioutil.ReadAll(resp.Body)
_ = resp.Body.Close()
c.packetsDropped.Inc()
return true
// - Remote Write v1 specification implicitly expects a `400 Bad Request` when the encoding is not supported.
// - Remote Write v2 specification explicitly specifies a `415 Unsupported Media Type` for unsupported encodings.
// - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
case 415, 400:
if encoding.IsZstd(block) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
zstdBlockLen := len(block)
block, err = repackBlockFromZstdToSnappy(block)
if err == nil {
if c.canDowngradeVMProto.Swap(false) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
c.useVMProto.Store(false)
}
c.retriesCount.Inc()
_ = resp.Body.Close()
goto again
}
logger.Warnf("failed to repack zstd block (%d bytes) to snappy: %s; The block will be rejected. "+
"Possible cause: ungraceful shutdown leading to persisted queue corruption.",
zstdBlockLen, err)
l := logger.WithThrottler("remoteWriteRejected", 5*time.Second)
if err != nil {
l.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; "+
"failed to read response body: %s",
len(block), c.sanitizedURL, statusCode, err)
} else {
l.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; response body: %s",
len(block), c.sanitizedURL, statusCode, string(body))
}
// Just drop snappy blocks on 400 or 415 status codes like Prometheus does.
// Just drop block on 409 and 400 status codes like Prometheus does.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/873
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149
logBlockRejected(block, c.sanitizedURL, resp)
_ = resp.Body.Close()
c.packetsDropped.Inc()
return true
@@ -487,103 +366,68 @@ again:
// Unexpected status code returned
retriesCount++
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
// retryAfterDuration has the highest priority duration
if retryAfterHeader > 0 {
bt.SetDelay(retryAfterHeader)
retryDuration *= 2
if retryDuration > time.Minute {
retryDuration = time.Minute
}
// Handle response
body, err := io.ReadAll(resp.Body)
body, err := ioutil.ReadAll(resp.Body)
_ = resp.Body.Close()
if err != nil {
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
} else {
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
"re-sending the block in %s", len(block), c.sanitizedURL, retriesCount, statusCode, body, bt.CurrentDelay())
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
}
if !bt.Wait(c.stopCh) {
t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
return false
case <-t.C:
timerpool.Put(t)
}
c.retriesCount.Inc()
goto again
}
func (c *client) drainInMemoryQueue(stopCtx context.Context, block []byte) {
var ok bool
for {
select {
case <-stopCtx.Done():
return
default:
}
block, ok = c.fq.MustReadInMemoryBlock(block[:0])
if !ok {
// The in memory queue has already been drained,
// or persisted queue is being used.
// In this case it is guaranteed that fq will be empty
return
}
// at this stage c.stopCh should be closed
// so sendBlock function should not perform retries
if ok := c.sendBlock(block); !ok {
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
return
type rateLimiter struct {
perSecondLimit int64
// mu protects budget and deadline from concurrent access.
mu sync.Mutex
// The current budget. It is increased by perSecondLimit every second.
budget int64
// The next deadline for increasing the budget by perSecondLimit
deadline time.Time
limitReached *metrics.Counter
}
func (rl *rateLimiter) register(dataLen int, stopCh <-chan struct{}) {
limit := rl.perSecondLimit
if limit <= 0 {
return
}
rl.mu.Lock()
defer rl.mu.Unlock()
for rl.budget <= 0 {
if d := time.Until(rl.deadline); d > 0 {
rl.limitReached.Inc()
t := timerpool.Get(d)
select {
case <-stopCh:
timerpool.Put(t)
return
case <-t.C:
timerpool.Put(t)
}
}
rl.budget += limit
rl.deadline = time.Now().Add(time.Second)
}
}
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
//
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
// failed to properly update the persisted queue files. In such cases, zstd decompression
// will fail and an error will be returned.
//
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
if err != nil {
return nil, err
}
return snappy.Encode(nil, plainBlock), nil
}
func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
body, err := io.ReadAll(resp.Body)
if err != nil {
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; "+
"failed to read response body: %s",
len(block), sanitizedURL, resp.StatusCode, err)
} else {
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; response body: %s",
len(block), sanitizedURL, resp.StatusCode, string(body))
}
}
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
//
// s should be in either HTTP-date or a number of seconds.
// It returns time.Duration(0) if s does not follow RFC 7231.
func parseRetryAfterHeader(s string) time.Duration {
if s == "" {
return 0
}
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
if parsedTime, err := time.Parse(http.TimeFormat, s); err == nil {
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
}
// Retry-After could be in seconds.
if seconds, err := strconv.Atoi(s); err == nil {
return time.Duration(seconds) * time.Second
}
return 0
rl.budget -= int64(dataLen)
}

View File

@@ -1,102 +0,0 @@
package remotewrite
import (
"math"
"net/http"
"testing"
"time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
)
func TestParseRetryAfterHeader(t *testing.T) {
f := func(retryAfterString string, expectResult time.Duration) {
t.Helper()
result := parseRetryAfterHeader(retryAfterString)
// expect `expectResult == result` when retryAfterString is in seconds or invalid
// expect the difference between result and expectResult to be lower than 10%
if !(expectResult == result || math.Abs(float64(expectResult-result))/float64(expectResult) < 0.10) {
t.Fatalf(
"incorrect retry after duration, want (ms): %d, got (ms): %d",
expectResult.Milliseconds(), result.Milliseconds(),
)
}
}
// retry after header in seconds
f("10", 10*time.Second)
// retry after header in date time
f(time.Now().Add(30*time.Second).UTC().Format(http.TimeFormat), 30*time.Second)
// retry after header invalid
f("invalid-retry-after", 0)
// retry after header not in GMT
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
}
func TestInitSecretFlags(t *testing.T) {
showRemoteWriteURLOrig := *showRemoteWriteURL
defer func() {
*showRemoteWriteURL = showRemoteWriteURLOrig
flagutil.UnregisterAllSecretFlags()
}()
flagutil.UnregisterAllSecretFlags()
*showRemoteWriteURL = false
InitSecretFlags()
if !flagutil.IsSecretFlag("remotewrite.url") {
t.Fatalf("expecting remoteWrite.url to be secret")
}
if !flagutil.IsSecretFlag("remotewrite.headers") {
t.Fatalf("expecting remoteWrite.headers to be secret")
}
if !flagutil.IsSecretFlag("remotewrite.proxyurl") {
t.Fatalf("expecting remoteWrite.proxyURL to be secret")
}
flagutil.UnregisterAllSecretFlags()
*showRemoteWriteURL = true
InitSecretFlags()
if flagutil.IsSecretFlag("remotewrite.url") {
t.Fatalf("remoteWrite.url must remain visible when -remoteWrite.showURL is set")
}
if !flagutil.IsSecretFlag("remotewrite.headers") {
t.Fatalf("expecting remoteWrite.headers to remain secret")
}
if !flagutil.IsSecretFlag("remotewrite.proxyurl") {
t.Fatalf("expecting remoteWrite.proxyURL to remain secret")
}
}
func TestRepackBlockFromZstdToSnappy(t *testing.T) {
expectedPlainBlock := []byte(`foobar`)
zstdBlock := encoding.CompressZSTDLevel(nil, expectedPlainBlock, 1)
snappyBlock, err := repackBlockFromZstdToSnappy(zstdBlock)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
actualPlainBlock, err := snappy.Decode(nil, snappyBlock)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if string(actualPlainBlock) != string(expectedPlainBlock) {
t.Fatalf("unexpected plain block; got %q; want %q", actualPlainBlock, expectedPlainBlock)
}
}
func TestRepackBlockFromZstdToSnappyInvalidBlock(t *testing.T) {
snappyBlock, err := repackBlockFromZstdToSnappy([]byte("invalid zstd block"))
if err == nil {
t.Fatalf("expected error for invalid zstd block; got nil")
}
if len(snappyBlock) != 0 {
t.Fatalf("expected empty snappy block; got %d bytes", len(snappyBlock))
}
}

View File

@@ -7,30 +7,22 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
)
var (
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
"This option takes effect only when less than -remoteWrite.maxRowsPerBlock data points per -remoteWrite.flushInterval are pushed to -remoteWrite.url")
"This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url")
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxRowsPerBlock")
maxRowsPerBlock = flag.Int("remoteWrite.maxRowsPerBlock", 10000, "The maximum number of samples to send in each block to remote storage. Higher number may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxBlockSize")
maxMetadataPerBlock = flag.Int("remoteWrite.maxMetadataPerBlock", 5000, "The maximum number of metadata to send in each block to remote storage. Higher number may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxBlockSize")
vmProtoCompressLevel = flag.Int("remoteWrite.vmProtoCompressLevel", 0, "The compression level for VictoriaMetrics remote write protocol. "+
"Higher values reduce network traffic at the cost of higher CPU usage. Negative values reduce CPU usage at the cost of increased network traffic. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol")
)
type pendingSeries struct {
@@ -41,14 +33,17 @@ type pendingSeries struct {
periodicFlusherWG sync.WaitGroup
}
func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Bool, significantFigures, roundDigits int) *pendingSeries {
func newPendingSeries(pushBlock func(block []byte), significantFigures, roundDigits int) *pendingSeries {
var ps pendingSeries
ps.wr.fq = fq
ps.wr.isVMRemoteWrite = isVMRemoteWrite
ps.wr.pushBlock = pushBlock
ps.wr.significantFigures = significantFigures
ps.wr.roundDigits = roundDigits
ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Go(ps.periodicFlusher)
ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
return &ps
}
@@ -57,18 +52,10 @@ func (ps *pendingSeries) MustStop() {
ps.periodicFlusherWG.Wait()
}
func (ps *pendingSeries) TryPushTimeSeries(tss []prompb.TimeSeries) bool {
func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
ps.mu.Lock()
ok := ps.wr.tryPushTimeSeries(tss)
ps.wr.push(tss)
ps.mu.Unlock()
return ok
}
func (ps *pendingSeries) TryPushMetadata(mms []prompb.MetricMetadata) bool {
ps.mu.Lock()
ok := ps.wr.tryPushMetadata(mms)
ps.mu.Unlock()
return ok
}
func (ps *pendingSeries) periodicFlusher() {
@@ -76,111 +63,82 @@ func (ps *pendingSeries) periodicFlusher() {
if flushSeconds <= 0 {
flushSeconds = 1
}
d := timeutil.AddJitterToDuration(*flushInterval)
ticker := time.NewTicker(d)
ticker := time.NewTicker(*flushInterval)
defer ticker.Stop()
for {
mustStop := false
for !mustStop {
select {
case <-ps.stopCh:
ps.mu.Lock()
ps.wr.mustFlushOnStop()
ps.mu.Unlock()
return
mustStop = true
case <-ticker.C:
if fasttime.UnixTimestamp()-ps.wr.lastFlushTime.Load() < uint64(flushSeconds) {
if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
continue
}
}
ps.mu.Lock()
_ = ps.wr.tryFlush()
ps.wr.flush()
ps.mu.Unlock()
}
}
type writeRequest struct {
lastFlushTime atomic.Uint64
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
lastFlushTime uint64
// The queue to send blocks to.
fq *persistentqueue.FastQueue
// pushBlock is called when whe write request is ready to be sent.
pushBlock func(block []byte)
// Whether to encode the write request with VictoriaMetrics remote write protocol.
isVMRemoteWrite *atomic.Bool
// How many significant figures must be left before sending the writeRequest to fq.
// How many significant figures must be left before sending the writeRequest to pushBlock.
significantFigures int
// How many decimal digits after point must be left before sending the writeRequest to fq.
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
roundDigits int
wr prompb.WriteRequest
wr prompbmarshal.WriteRequest
tss []prompb.TimeSeries
mms []prompb.MetricMetadata
labels []prompb.Label
samples []prompb.Sample
tss []prompbmarshal.TimeSeries
// buf holds labels data
buf []byte
// metadatabuf holds metadata data
metadatabuf []byte
labels []prompbmarshal.Label
samples []prompbmarshal.Sample
buf []byte
}
func (wr *writeRequest) reset() {
// Do not reset lastFlushTime, fq, isVMRemoteWrite, significantFigures and roundDigits, since they are reused.
// Do not reset pushBlock, significantFigures and roundDigits, since they are re-used.
wr.wr.Timeseries = nil
wr.wr.Metadata = nil
clear(wr.tss)
for i := range wr.tss {
ts := &wr.tss[i]
ts.Labels = nil
ts.Samples = nil
}
wr.tss = wr.tss[:0]
clear(wr.mms)
wr.mms = wr.mms[:0]
promrelabel.CleanLabels(wr.labels)
wr.labels = wr.labels[:0]
wr.samples = wr.samples[:0]
wr.buf = wr.buf[:0]
wr.metadatabuf = wr.metadatabuf[:0]
}
// mustFlushOnStop force pushes wr data into wr.fq
//
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
func (wr *writeRequest) mustFlushOnStop() {
func (wr *writeRequest) flush() {
wr.wr.Timeseries = wr.tss
wr.wr.Metadata = wr.mms
if !tryPushWriteRequest(&wr.wr, wr.mustWriteBlock, wr.isVMRemoteWrite.Load()) {
logger.Panicf("BUG: final flush must always return true")
}
wr.adjustSampleValues()
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
pushWriteRequest(&wr.wr, wr.pushBlock)
wr.reset()
}
func (wr *writeRequest) mustWriteBlock(block []byte) bool {
wr.fq.MustWriteBlockIgnoreDisabledPQ(block)
return true
}
func (wr *writeRequest) tryFlush() bool {
wr.wr.Timeseries = wr.tss
wr.wr.Metadata = wr.mms
wr.lastFlushTime.Store(fasttime.UnixTimestamp())
if !tryPushWriteRequest(&wr.wr, wr.fq.TryWriteBlock, wr.isVMRemoteWrite.Load()) {
return false
}
wr.reset()
return true
}
func adjustSampleValues(samples []prompb.Sample, significantFigures, roundDigits int) {
if n := significantFigures; n > 0 {
func (wr *writeRequest) adjustSampleValues() {
samples := wr.samples
if n := wr.significantFigures; n > 0 {
for i := range samples {
s := &samples[i]
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
}
}
if n := roundDigits; n < 100 {
if n := wr.roundDigits; n < 100 {
for i := range samples {
s := &samples[i]
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
@@ -188,239 +146,100 @@ func adjustSampleValues(samples []prompb.Sample, significantFigures, roundDigits
}
}
func (wr *writeRequest) tryPushMetadata(mms []prompb.MetricMetadata) bool {
mmdDst := wr.mms
maxMetadataPerBlock := *maxMetadataPerBlock
for i := range mms {
if len(wr.mms) >= maxMetadataPerBlock {
if !wr.tryFlush() {
return false
}
mmdDst = wr.mms
}
mmSrc := &mms[i]
mmdDst = append(mmdDst, prompb.MetricMetadata{})
wr.copyMetadata(&mmdDst[len(mmdDst)-1], mmSrc)
}
wr.mms = mmdDst
return true
}
func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
// Direct copy for non-string fields, which are safe by value.
dst.Type = src.Type
dst.Unit = src.Unit
dst.AccountID = src.AccountID
dst.ProjectID = src.ProjectID
// Pre-allocate memory for all string fields.
neededBufLen := len(src.MetricFamilyName) + len(src.Help)
bufLen := len(wr.metadatabuf)
wr.metadatabuf = slicesutil.SetLength(wr.metadatabuf, bufLen+neededBufLen)
buf := wr.metadatabuf[:bufLen]
// Copy MetricFamilyName
bufLen = len(buf)
buf = append(buf, src.MetricFamilyName...)
dst.MetricFamilyName = bytesutil.ToUnsafeString(buf[bufLen:])
// Copy Help
bufLen = len(buf)
buf = append(buf, src.Help...)
dst.Help = bytesutil.ToUnsafeString(buf[bufLen:])
wr.metadatabuf = buf
}
func (wr *writeRequest) tryPushTimeSeries(src []prompb.TimeSeries) bool {
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
tssDst := wr.tss
maxSamplesPerBlock := *maxRowsPerBlock
// Allow up to 10x of labels per each block on average.
maxLabelsPerBlock := 10 * maxSamplesPerBlock
for i := range src {
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
wr.tss = tssDst
if !wr.tryFlush() {
return false
}
wr.flush()
tssDst = wr.tss
}
tsSrc := &src[i]
adjustSampleValues(tsSrc.Samples, wr.significantFigures, wr.roundDigits)
tssDst = append(tssDst, prompb.TimeSeries{})
wr.copyTimeSeries(&tssDst[len(tssDst)-1], tsSrc)
}
wr.tss = tssDst
return true
}
func (wr *writeRequest) copyTimeSeries(dst, src *prompb.TimeSeries) {
labelsSrc := src.Labels
// Pre-allocate memory for labels.
func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
labelsDst := wr.labels
labelsLen := len(wr.labels)
wr.labels = slicesutil.SetLength(wr.labels, labelsLen+len(labelsSrc))
labelsDst := wr.labels[labelsLen:]
samplesDst := wr.samples
buf := wr.buf
for i := range src.Labels {
labelsDst = append(labelsDst, prompbmarshal.Label{})
dstLabel := &labelsDst[len(labelsDst)-1]
srcLabel := &src.Labels[i]
// Pre-allocate memory for byte slice needed for storing label names and values.
neededBufLen := 0
for i := range labelsSrc {
label := &labelsSrc[i]
neededBufLen += len(label.Name) + len(label.Value)
}
bufLen := len(wr.buf)
wr.buf = slicesutil.SetLength(wr.buf, bufLen+neededBufLen)
buf := wr.buf[:bufLen]
// Copy labels
for i := range labelsSrc {
dstLabel := &labelsDst[i]
srcLabel := &labelsSrc[i]
bufLen := len(buf)
buf = append(buf, srcLabel.Name...)
dstLabel.Name = bytesutil.ToUnsafeString(buf[bufLen:])
bufLen = len(buf)
dstLabel.Name = bytesutil.ToUnsafeString(buf[len(buf)-len(srcLabel.Name):])
buf = append(buf, srcLabel.Value...)
dstLabel.Value = bytesutil.ToUnsafeString(buf[bufLen:])
dstLabel.Value = bytesutil.ToUnsafeString(buf[len(buf)-len(srcLabel.Value):])
}
wr.buf = buf
dst.Labels = labelsDst
dst.Labels = labelsDst[labelsLen:]
// Copy samples
samplesLen := len(wr.samples)
wr.samples = append(wr.samples, src.Samples...)
dst.Samples = wr.samples[samplesLen:]
samplesDst = append(samplesDst, src.Samples...)
dst.Samples = samplesDst[len(samplesDst)-len(src.Samples):]
wr.samples = samplesDst
wr.labels = labelsDst
wr.buf = buf
}
// marshalConcurrency limits the maximum number of concurrent workers, which marshal and compress WriteRequest.
var marshalConcurrencyCh = make(chan struct{}, cgroup.AvailableCPUs())
func tryPushWriteRequest(wr *prompb.WriteRequest, tryPushBlock func(block []byte) bool, isVMRemoteWrite bool) bool {
if wr.IsEmpty() {
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte)) {
if len(wr.Timeseries) == 0 {
// Nothing to push
return true
return
}
marshalConcurrencyCh <- struct{}{}
bb := writeRequestBufPool.Get()
bb.B = wr.MarshalProtobuf(bb.B[:0])
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
zb := compressBufPool.Get()
if isVMRemoteWrite {
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, *vmProtoCompressLevel)
} else {
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
}
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
if len(bb.B) <= maxUnpackedBlockSize.N {
zb := snappyBufPool.Get()
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
writeRequestBufPool.Put(bb)
<-marshalConcurrencyCh
if len(zb.B) <= persistentqueue.MaxBlockSize {
zbLen := len(zb.B)
ok := tryPushBlock(zb.B)
compressBufPool.Put(zb)
if ok {
blockSizeRows.Update(float64(len(wr.Timeseries)))
blockMetadataRows.Update(float64(len(wr.Metadata)))
blockSizeBytes.Update(float64(zbLen))
}
return ok
pushBlock(zb.B)
blockSizeRows.Update(float64(len(wr.Timeseries)))
blockSizeBytes.Update(float64(len(zb.B)))
snappyBufPool.Put(zb)
return
}
compressBufPool.Put(zb)
snappyBufPool.Put(zb)
} else {
writeRequestBufPool.Put(bb)
<-marshalConcurrencyCh
}
// Split timeseries or metadata into two smaller blocks
switch len(wr.Timeseries) {
case 0:
if len(wr.Metadata) == 1 {
logger.Warnf("dropping a metadata exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
return true
}
metadata := wr.Metadata
n := len(metadata) / 2
wr.Metadata = metadata[:n]
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
wr.Metadata = metadata
return false
}
wr.Metadata = metadata[n:]
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
wr.Metadata = metadata
return false
}
wr.Metadata = metadata
return true
case 1:
// A single time series left. Recursively split its samples and metadata into smaller parts if possible.
// Too big block. Recursively split it into smaller parts if possible.
if len(wr.Timeseries) == 1 {
// A single time series left. Recursively split its samples into smaller parts if possible.
samples := wr.Timeseries[0].Samples
metaData := wr.Metadata
if len(samples) == 1 && len(metaData) <= 1 {
logger.Warnf("dropping a sample for metric and %d metadata which are exceeding -remoteWrite.maxBlockSize=%d bytes", len(metaData), maxUnpackedBlockSize.N)
return true
if len(samples) == 1 {
logger.Warnf("dropping a sample for metric with too long labels exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
return
}
n := len(samples) / 2
m := len(metaData) / 2
wr.Timeseries[0].Samples = samples[:n]
wr.Metadata = metaData[:m]
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
wr.Timeseries[0].Samples = samples
wr.Metadata = metaData
return false
}
pushWriteRequest(wr, pushBlock)
wr.Timeseries[0].Samples = samples[n:]
wr.Metadata = metaData[m:]
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
wr.Timeseries[0].Samples = samples
wr.Metadata = metaData
return false
}
pushWriteRequest(wr, pushBlock)
wr.Timeseries[0].Samples = samples
wr.Metadata = metaData
return true
default:
// Split both timeseries and metadata.
timeseries := wr.Timeseries
metaData := wr.Metadata
n := len(timeseries) / 2
m := len(metaData) / 2
wr.Timeseries = timeseries[:n]
wr.Metadata = metaData[:m]
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
wr.Timeseries = timeseries
wr.Metadata = metaData
return false
}
wr.Timeseries = timeseries[n:]
wr.Metadata = metaData[m:]
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
wr.Timeseries = timeseries
wr.Metadata = metaData
return false
}
wr.Timeseries = timeseries
wr.Metadata = metaData
return true
return
}
timeseries := wr.Timeseries
n := len(timeseries) / 2
wr.Timeseries = timeseries[:n]
pushWriteRequest(wr, pushBlock)
wr.Timeseries = timeseries[n:]
pushWriteRequest(wr, pushBlock)
wr.Timeseries = timeseries
}
var (
blockSizeBytes = metrics.NewHistogram(`vmagent_remotewrite_block_size_bytes`)
blockSizeRows = metrics.NewHistogram(`vmagent_remotewrite_block_size_rows`)
blockMetadataRows = metrics.NewHistogram(`vmagent_remotewrite_block_metadata_rows`)
blockSizeBytes = metrics.NewHistogram(`vmagent_remotewrite_block_size_bytes`)
blockSizeRows = metrics.NewHistogram(`vmagent_remotewrite_block_size_rows`)
)
var (
writeRequestBufPool bytesutil.ByteBufferPool
compressBufPool bytesutil.ByteBufferPool
)
var writeRequestBufPool bytesutil.ByteBufferPool
var snappyBufPool bytesutil.ByteBufferPool

View File

@@ -1,73 +0,0 @@
package remotewrite
import (
"fmt"
"math"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
func TestPushWriteRequest(t *testing.T) {
rowsCounts := []int{1, 10, 100, 1e3, 1e4}
expectedBlockLensProm := []int{216, 1848, 16424, 169882, 1757876}
expectedBlockLensVM := []int{138, 492, 3927, 34995, 288476}
for i, rowsCount := range rowsCounts {
expectedBlockLenProm := expectedBlockLensProm[i]
expectedBlockLenVM := expectedBlockLensVM[i]
t.Run(fmt.Sprintf("%d", rowsCount), func(t *testing.T) {
testPushWriteRequest(t, rowsCount, expectedBlockLenProm, expectedBlockLenVM)
})
}
}
func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expectedBlockLenVM int) {
f := func(isVMRemoteWrite bool, expectedBlockLen int, tolerancePrc float64) {
t.Helper()
wr := newTestWriteRequest(rowsCount, 20)
pushBlockLen := 0
pushBlock := func(block []byte) bool {
if pushBlockLen > 0 {
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
}
pushBlockLen = len(block)
return true
}
if !tryPushWriteRequest(wr, pushBlock, isVMRemoteWrite) {
t.Fatalf("cannot push data to remote storage")
}
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
}
}
// Check Prometheus remote write
f(false, expectedBlockLenProm, 3)
// Check VictoriaMetrics remote write
f(true, expectedBlockLenVM, 15)
}
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
var wr prompb.WriteRequest
for i := range seriesCount {
var labels []prompb.Label
for j := range labelsCount {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d_%d", i, j),
Value: fmt.Sprintf("value_%d_%d", i, j),
})
}
wr.Timeseries = append(wr.Timeseries, prompb.TimeSeries{
Labels: labels,
Samples: []prompb.Sample{
{
Value: float64(i),
Timestamp: 1000 * int64(i),
},
},
})
}
return &wr
}

View File

@@ -1,35 +0,0 @@
package remotewrite
import (
"fmt"
"testing"
"github.com/golang/snappy"
"github.com/klauspost/compress/s2"
)
func BenchmarkCompressWriteRequestSnappy(b *testing.B) {
b.Run("snappy", func(b *testing.B) {
benchmarkCompressWriteRequest(b, snappy.Encode)
})
b.Run("s2", func(b *testing.B) {
benchmarkCompressWriteRequest(b, s2.EncodeSnappy)
})
}
func benchmarkCompressWriteRequest(b *testing.B, compressFunc func(dst, src []byte) []byte) {
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
b.Run(fmt.Sprintf("rows_%d", rowsCount), func(b *testing.B) {
wr := newTestWriteRequest(rowsCount, 10)
data := wr.MarshalProtobuf(nil)
b.ReportAllocs()
b.SetBytes(int64(rowsCount))
b.RunParallel(func(pb *testing.PB) {
var zb []byte
for pb.Next() {
zb = compressFunc(zb[:cap(zb)], data)
}
})
})
}
}

View File

@@ -3,55 +3,31 @@ package remotewrite
import (
"flag"
"fmt"
"io"
"strconv"
"strings"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+
"See https://docs.victoriametrics.com/victoriametrics/relabeling/")
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel configs for the corresponding -remoteWrite.url. "+
"See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. "+
"See https://docs.victoriametrics.com/victoriametrics/relabeling/")
usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+
"in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
unparsedLabelsGlobal = flagutil.NewArray("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. "+
"The path can point either to local file or to http url. These entries are applied to all the metrics "+
"before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details")
relabelDebugGlobal = flag.Bool("remoteWrite.relabelDebug", false, "Whether to log metrics before and after relabeling with -remoteWrite.relabelConfig. "+
"If the -remoteWrite.relabelDebug is enabled, then the metrics aren't sent to remote storage. This is useful for debugging the relabeling configs")
relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url. "+
"The path can point either to local file or to http url")
relabelDebug = flagutil.NewArrayBool("remoteWrite.urlRelabelDebug", "Whether to log metrics before and after relabeling with -remoteWrite.urlRelabelConfig. "+
"If the -remoteWrite.urlRelabelDebug is enabled, then the metrics aren't sent to the corresponding -remoteWrite.url. "+
"This is useful for debugging the relabeling configs")
)
var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter
relabelConfigSuccess *metrics.Gauge
relabelConfigTimestamp *metrics.Counter
)
func initRelabelMetrics() {
relabelConfigReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
relabelConfigReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
relabelConfigSuccess = metrics.NewGauge(`vmagent_relabel_config_last_reload_successful`, nil)
relabelConfigTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
}
var labelsGlobal []prompbmarshal.Label
// CheckRelabelConfigs checks -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig.
func CheckRelabelConfigs() error {
@@ -59,115 +35,31 @@ func CheckRelabelConfigs() error {
return err
}
func initRelabelConfigs() {
rcs, err := loadRelabelConfigs()
if err != nil {
logger.Fatalf("cannot initialize relabel configs: %s", err)
}
allRelabelConfigs.Store(rcs)
if rcs.isSet() {
initRelabelMetrics()
relabelConfigSuccess.Set(1)
relabelConfigTimestamp.Set(fasttime.UnixTimestamp())
}
}
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
func WriteRelabelConfigData(w io.Writer) {
p := remoteWriteRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
_, _ = w.Write(*p)
}
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
func WriteURLRelabelConfigData(w io.Writer) {
p := remoteWriteURLRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig any `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
cfgData := (*p)[i]
if !*showRemoteWriteURL {
url = fmt.Sprintf("%d:secret-url", i+1)
}
cs = append(cs, urlRelabelCfg{
Url: url,
RelabelConfig: cfgData,
})
}
d, _ := yaml.Marshal(cs)
_, _ = w.Write(d)
}
func reloadRelabelConfigs() {
rcs := allRelabelConfigs.Load()
if !rcs.isSet() {
return
}
relabelConfigReloads.Inc()
logger.Infof("reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
rcs, err := loadRelabelConfigs()
if err != nil {
relabelConfigReloadErrors.Inc()
relabelConfigSuccess.Set(0)
logger.Errorf("cannot reload relabel configs; preserving the previous configs; error: %s", err)
return
}
allRelabelConfigs.Store(rcs)
relabelConfigSuccess.Set(1)
relabelConfigTimestamp.Set(fasttime.UnixTimestamp())
logger.Infof("successfully reloaded relabel configs")
}
func loadRelabelConfigs() (*relabelConfigs, error) {
var rcs relabelConfigs
if *relabelConfigPathGlobal != "" {
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal, *relabelDebugGlobal)
if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
}
remoteWriteRelabelConfigData.Store(&rawCfg)
rcs.global = global
}
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
if len(*relabelConfigPaths) > (len(*remoteWriteURLs) + len(*remoteWriteMultitenantURLs)) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url or -remoteWrite.multitenantURL args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs) + len(*remoteWriteMultitenantURLs)))
}
var urlRelabelCfgs []any
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
rcs.perURL = make([]*promrelabel.ParsedConfigs, (len(*remoteWriteURLs) + len(*remoteWriteMultitenantURLs)))
for i, path := range *relabelConfigPaths {
if len(path) == 0 {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
// Skip empty relabel config.
continue
}
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
prc, err := promrelabel.LoadRelabelConfigs(path, relabelDebug.GetOptionalArg(i))
if err != nil {
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
}
rcs.perURL[i] = prc
var parsedCfg any
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
}
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
// fill the urlRelabelCfgs with empty relabel configs if not set
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
}
}
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
return &rcs, nil
}
@@ -176,11 +68,6 @@ type relabelConfigs struct {
perURL []*promrelabel.ParsedConfigs
}
// isSet indicates whether (global or per-URL) command-line flags is set
func (rcs *relabelConfigs) isSet() bool {
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
}
// initLabelsGlobal must be called after parsing command-line flags.
func initLabelsGlobal() {
labelsGlobal = nil
@@ -192,35 +79,40 @@ func initLabelsGlobal() {
if n < 0 {
logger.Fatalf("missing '=' in `-remoteWrite.label`. It must contain label in the form `name=value`; got %q", s)
}
labelsGlobal = append(labelsGlobal, prompb.Label{
labelsGlobal = append(labelsGlobal, prompbmarshal.Label{
Name: s[:n],
Value: s[n+1:],
})
}
}
func (rctx *relabelCtx) applyRelabeling(tss []prompb.TimeSeries, pcs *promrelabel.ParsedConfigs) []prompb.TimeSeries {
if pcs.Len() == 0 && !*usePromCompatibleNaming {
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
if len(extraLabels) == 0 && pcs.Len() == 0 {
// Nothing to change.
return tss
}
rctx.reset()
tssDst := tss[:0]
labels := rctx.labels[:0]
for i := range tss {
ts := &tss[i]
labelsLen := len(labels)
labels = append(labels, ts.Labels...)
labels = pcs.Apply(labels, labelsLen)
labels = promrelabel.FinalizeLabels(labels[:labelsLen], labels[labelsLen:])
// extraLabels must be added before applying relabeling according to https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
for j := range extraLabels {
extraLabel := &extraLabels[j]
tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
if tmp != nil {
tmp.Value = extraLabel.Value
} else {
labels = append(labels, *extraLabel)
}
}
labels = pcs.Apply(labels, labelsLen, true)
if len(labels) == labelsLen {
// Drop the current time series, since relabeling removed all the labels.
continue
}
if *usePromCompatibleNaming {
fixPromCompatibleNaming(labels[labelsLen:])
}
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: ts.Samples,
})
@@ -229,61 +121,9 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompb.TimeSeries, pcs *promrelabe
return tssDst
}
func (rctx *relabelCtx) appendExtraLabels(tss []prompb.TimeSeries, extraLabels []prompb.Label) {
if len(extraLabels) == 0 {
return
}
rctx.reset()
labels := rctx.labels[:0]
for i := range tss {
ts := &tss[i]
labelsLen := len(labels)
labels = append(labels, ts.Labels...)
for j := range extraLabels {
extraLabel := extraLabels[j]
tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
if tmp != nil {
tmp.Value = extraLabel.Value
} else {
labels = append(labels, extraLabel)
}
}
ts.Labels = labels[labelsLen:]
}
rctx.labels = labels
}
func (rctx *relabelCtx) tenantToLabels(tss []prompb.TimeSeries, accountID, projectID uint32) {
rctx.reset()
accountIDStr := strconv.FormatUint(uint64(accountID), 10)
projectIDStr := strconv.FormatUint(uint64(projectID), 10)
labels := rctx.labels[:0]
for i := range tss {
ts := &tss[i]
labelsLen := len(labels)
for _, label := range ts.Labels {
labelName := label.Name
if labelName == "vm_account_id" || labelName == "vm_project_id" {
continue
}
labels = append(labels, label)
}
labels = append(labels, prompb.Label{
Name: "vm_account_id",
Value: accountIDStr,
})
labels = append(labels, prompb.Label{
Name: "vm_project_id",
Value: projectIDStr,
})
ts.Labels = labels[labelsLen:]
}
rctx.labels = labels
}
type relabelCtx struct {
// pool for labels, which are used during the relabeling.
labels []prompb.Label
labels []prompbmarshal.Label
}
func (rctx *relabelCtx) reset() {
@@ -292,7 +132,7 @@ func (rctx *relabelCtx) reset() {
}
var relabelCtxPool = &sync.Pool{
New: func() any {
New: func() interface{} {
return &relabelCtx{}
},
}
@@ -302,18 +142,6 @@ func getRelabelCtx() *relabelCtx {
}
func putRelabelCtx(rctx *relabelCtx) {
rctx.reset()
rctx.labels = rctx.labels[:0]
relabelCtxPool.Put(rctx)
}
func fixPromCompatibleNaming(labels []prompb.Label) {
// Replace unsupported Prometheus chars in label names and metric names with underscores.
for i := range labels {
label := &labels[i]
if label.Name == "__name__" {
label.Value = promrelabel.SanitizeMetricName(label.Value)
} else {
label.Name = promrelabel.SanitizeLabelName(label.Name)
}
}
}

View File

@@ -1,69 +0,0 @@
package remotewrite
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
)
func TestApplyRelabeling(t *testing.T) {
f := func(pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
rctx := &relabelCtx{}
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
gotTss := rctx.applyRelabeling(tss, pcs)
if !reflect.DeepEqual(gotTss, expTss) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, gotTss)
}
}
f(nil, "up", "up")
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
- target_label: "foo"
replacement: "aaa"
- action: labeldrop
regex: "env.*"
`))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
f(pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
oldVal := *usePromCompatibleNaming
*usePromCompatibleNaming = true
f(nil, `foo.bar`, `foo_bar`)
*usePromCompatibleNaming = oldVal
}
func TestAppendExtraLabels(t *testing.T) {
f := func(extraLabels []prompb.Label, sTss, sExpTss string) {
t.Helper()
rctx := &relabelCtx{}
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
rctx.appendExtraLabels(tss, extraLabels)
if !reflect.DeepEqual(tss, expTss) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, tss)
}
}
f(nil, "up", "up")
f([]prompb.Label{{Name: "foo", Value: "bar"}}, "up", `up{foo="bar"}`)
f([]prompb.Label{{Name: "foo", Value: "bar"}}, `up{foo="baz"}`, `up{foo="bar"}`)
f([]prompb.Label{{Name: "baz", Value: "qux"}}, `up{foo="baz"}`, `up{foo="baz",baz="qux"}`)
oldVal := *usePromCompatibleNaming
*usePromCompatibleNaming = true
f([]prompb.Label{{Name: "foo.bar", Value: "baz"}}, "up", `up{foo.bar="baz"}`)
*usePromCompatibleNaming = oldVal
}
func parseSeries(data string) []prompb.TimeSeries {
var tss []prompb.TimeSeries
tss = append(tss, prompb.TimeSeries{
Labels: promutil.MustNewLabelsFromString(data).GetLabels(),
})
return tss
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,378 +0,0 @@
package remotewrite
import (
"fmt"
"math"
"reflect"
"strconv"
"sync/atomic"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/metrics"
)
func TestGetLabelsHash_Distribution(t *testing.T) {
f := func(bucketsCount int) {
t.Helper()
// Distribute itemsCount hashes returned by getLabelsHash() across bucketsCount buckets.
itemsCount := 10_000 * bucketsCount
m := make([]int, bucketsCount)
var labels []prompb.Label
for i := range itemsCount {
labels = append(labels[:0], prompb.Label{
Name: "__name__",
Value: fmt.Sprintf("some_name_%d", i),
})
for j := range 10 {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d", j),
Value: fmt.Sprintf("value_%d_%d", i, j),
})
}
h := getLabelsHash(labels)
m[h%uint64(bucketsCount)]++
}
// Verify that the distribution is even
expectedItemsPerBucket := float64(itemsCount / bucketsCount)
allowedDeviation := math.Round(float64(expectedItemsPerBucket) * 0.04)
for _, n := range m {
if math.Abs(expectedItemsPerBucket-float64(n)) > allowedDeviation {
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want in range [%.0f, %.0f]",
bucketsCount, n, expectedItemsPerBucket-allowedDeviation, expectedItemsPerBucket+allowedDeviation)
}
}
}
f(2)
f(3)
f(4)
f(5)
f(10)
}
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
t.Helper()
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
if err != nil {
t.Fatalf("cannot load relabel configs: %s", err)
}
rcs := &relabelConfigs{
perURL: []*promrelabel.ParsedConfigs{
perURLRelabel,
},
}
allRelabelConfigs.Store(rcs)
path := "fast-queue-write-test"
fs.MustRemoveDir(path)
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
defer fs.MustRemoveDir(path)
defer fq.MustClose()
pss := make([]*pendingSeries, 1)
isVMProto := &atomic.Bool{}
isVMProto.Store(true)
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
rwctx := &remoteWriteCtx{
idx: 0,
streamAggrKeepInput: keepInput,
streamAggrDropInput: dropInput,
pss: pss,
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
}
defer metrics.UnregisterAllMetrics()
if dedupInterval > 0 {
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
}
if streamAggrConfig != "" {
pushNoop := func(_ []prompb.TimeSeries) {}
opts := streamaggr.Options{
EnableWindows: enableWindows,
}
sas, err := streamaggr.LoadFromData([]byte(streamAggrConfig), pushNoop, &opts, "global")
if err != nil {
t.Fatalf("cannot load streamaggr configs: %s", err)
}
defer sas.MustStop()
rwctx.sas.Store(sas)
}
offsetMsecs := time.Now().UnixMilli()
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
expectedTss := make([]prompb.TimeSeries, len(inputTss))
// check inputTss is not modified after TryPushTimeSeries
copy(expectedTss, inputTss)
if !rwctx.TryPushTimeSeries(inputTss, false) {
t.Fatalf("cannot push samples to rwctx")
}
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
}
if len(pss[0].wr.tss) != expectedPushedSample {
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
}
if !reflect.DeepEqual(expectedTss, inputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
}
}
// relabeling
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, false, 0, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 2, 2)
// relabeling + aggregation
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, `
- action: keep
source_labels: [env]
regex: ".*"
`, false, 0, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 2)
// aggregation + keepInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 4)
// aggregation + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, false, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 0)
// aggregation + keepInput + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="bar"} 25
`, 3, 1)
// aggregation + deduplication
f(``, ``, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="foo"} 20
metric{env="dev"} 15
metric{env="foo"} 25
`, 4, 0)
}
func TestShardAmountRemoteWriteCtx(t *testing.T) {
// 1. distribute 100000 series to n nodes.
// 2. remove the last node from healthy list.
// 3. distribute the same 10000 series to (n-1) node again.
// 4. check active time series change rate:
// change rate must < (3/total nodes). e.g. +30% if 10 you have 10 nodes.
f := func(remoteWriteCount int, healthyIdx []int, replicas int) {
t.Helper()
defer func() {
rwctxsGlobal = nil
rwctxsGlobalIdx = nil
rwctxConsistentHashGlobal = nil
}()
rwctxsGlobal = make([]*remoteWriteCtx, remoteWriteCount)
rwctxsGlobalIdx = make([]int, remoteWriteCount)
rwctxs := make([]*remoteWriteCtx, 0, len(healthyIdx))
for i := range remoteWriteCount {
rwCtx := &remoteWriteCtx{
idx: i,
}
rwctxsGlobalIdx[i] = i
if i >= len(healthyIdx) {
rwctxsGlobal[i] = rwCtx
continue
}
hIdx := healthyIdx[i]
if hIdx != i {
rwctxs = append(rwctxs, &remoteWriteCtx{
idx: hIdx,
})
} else {
rwctxs = append(rwctxs, rwCtx)
}
rwctxsGlobal[i] = rwCtx
}
seriesCount := 100000
// build 1000000 series
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
for i := range seriesCount {
tssBlock = append(tssBlock, prompb.TimeSeries{
Labels: []prompb.Label{
{
Name: "label",
Value: strconv.Itoa(i),
},
},
Samples: []prompb.Sample{
{
Timestamp: 0,
Value: 0,
},
},
})
}
// build consistent hash for x remote write context
// build active time series set
nodes := make([]string, 0, remoteWriteCount)
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
for i := range remoteWriteCount {
nodes = append(nodes, fmt.Sprintf("node%d", i))
activeTimeSeriesByNodes[i] = make(map[string]struct{})
}
rwctxConsistentHashGlobal = consistenthash.NewConsistentHash(nodes, 0)
// create shards
x := getTSSShards(len(rwctxs))
shards := x.shards
// execute
shardAmountRemoteWriteCtx(tssBlock, shards, rwctxs, replicas)
for i, nodeIdx := range healthyIdx {
for _, ts := range shards[i] {
// add it to node[nodeIdx]'s active time series
activeTimeSeriesByNodes[nodeIdx][prompb.LabelsToString(ts.Labels)] = struct{}{}
}
}
totalActiveTimeSeries := 0
for _, activeTimeSeries := range activeTimeSeriesByNodes {
totalActiveTimeSeries += len(activeTimeSeries)
}
avgActiveTimeSeries1 := totalActiveTimeSeries / remoteWriteCount
putTSSShards(x)
// removed last node
rwctxs = rwctxs[:len(rwctxs)-1]
healthyIdx = healthyIdx[:len(healthyIdx)-1]
x = getTSSShards(len(rwctxs))
shards = x.shards
// execute
shardAmountRemoteWriteCtx(tssBlock, shards, rwctxs, replicas)
for i, nodeIdx := range healthyIdx {
for _, ts := range shards[i] {
// add it to node[nodeIdx]'s active time series
activeTimeSeriesByNodes[nodeIdx][prompb.LabelsToString(ts.Labels)] = struct{}{}
}
}
totalActiveTimeSeries = 0
for _, activeTimeSeries := range activeTimeSeriesByNodes {
totalActiveTimeSeries += len(activeTimeSeries)
}
avgActiveTimeSeries2 := totalActiveTimeSeries / remoteWriteCount
changed := math.Abs(float64(avgActiveTimeSeries2-avgActiveTimeSeries1) / float64(avgActiveTimeSeries1))
threshold := 3 / float64(remoteWriteCount)
if changed >= threshold {
t.Fatalf("average active time series before: %d, after: %d, changed: %.2f. threshold: %.2f", avgActiveTimeSeries1, avgActiveTimeSeries2, changed, threshold)
}
}
f(5, []int{0, 1, 2, 3, 4}, 1)
f(5, []int{0, 1, 2, 3, 4}, 2)
f(10, []int{0, 1, 2, 3, 4, 5, 6, 7, 9}, 1)
f(10, []int{0, 1, 2, 3, 4, 5, 6, 7, 9}, 3)
}
func TestCalculateHealthyRwctxIdx(t *testing.T) {
f := func(total int, healthyIdx []int, unhealthyIdx []int) {
t.Helper()
healthyMap := make(map[int]bool)
for _, idx := range healthyIdx {
healthyMap[idx] = true
}
rwctxsGlobal = make([]*remoteWriteCtx, total)
rwctxsGlobalIdx = make([]int, total)
rwctxs := make([]*remoteWriteCtx, 0, len(healthyIdx))
for i := range rwctxsGlobal {
rwctx := &remoteWriteCtx{idx: i}
rwctxsGlobal[i] = rwctx
if healthyMap[i] {
rwctxs = append(rwctxs, rwctx)
}
rwctxsGlobalIdx[i] = i
}
gotHealthyIdx, gotUnhealthyIdx := calculateHealthyRwctxIdx(rwctxs)
if !reflect.DeepEqual(healthyIdx, gotHealthyIdx) {
t.Errorf("calculateHealthyRwctxIdx want healthyIdx = %v, got %v", healthyIdx, gotHealthyIdx)
}
if !reflect.DeepEqual(unhealthyIdx, gotUnhealthyIdx) {
t.Errorf("calculateHealthyRwctxIdx want unhealthyIdx = %v, got %v", unhealthyIdx, gotUnhealthyIdx)
}
}
f(5, []int{0, 1, 2, 3, 4}, nil)
f(5, []int{0, 1, 2, 4}, []int{3})
f(5, []int{2, 4}, []int{0, 1, 3})
f(5, []int{0, 2, 4}, []int{1, 3})
f(5, []int{}, []int{0, 1, 2, 3, 4})
f(5, []int{4}, []int{0, 1, 2, 3})
f(1, []int{0}, nil)
f(1, []int{}, []int{0})
}

View File

@@ -0,0 +1,92 @@
package remotewrite
import (
"context"
"net"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/metrics"
)
func getStdDialer() *net.Dialer {
stdDialerOnce.Do(func() {
stdDialer = &net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
DualStack: netutil.TCP6Enabled(),
}
})
return stdDialer
}
var (
stdDialer *net.Dialer
stdDialerOnce sync.Once
)
func statDial(ctx context.Context, networkUnused, addr string) (conn net.Conn, err error) {
network := netutil.GetTCPNetwork()
d := getStdDialer()
conn, err = d.DialContext(ctx, network, addr)
dialsTotal.Inc()
if err != nil {
dialErrors.Inc()
return nil, err
}
conns.Inc()
sc := &statConn{
Conn: conn,
}
return sc, nil
}
var (
dialsTotal = metrics.NewCounter(`vmagent_remotewrite_dials_total`)
dialErrors = metrics.NewCounter(`vmagent_remotewrite_dial_errors_total`)
conns = metrics.NewCounter(`vmagent_remotewrite_conns`)
)
type statConn struct {
closed uint64
net.Conn
}
func (sc *statConn) Read(p []byte) (int, error) {
n, err := sc.Conn.Read(p)
connReadsTotal.Inc()
if err != nil {
connReadErrors.Inc()
}
connBytesRead.Add(n)
return n, err
}
func (sc *statConn) Write(p []byte) (int, error) {
n, err := sc.Conn.Write(p)
connWritesTotal.Inc()
if err != nil {
connWriteErrors.Inc()
}
connBytesWritten.Add(n)
return n, err
}
func (sc *statConn) Close() error {
err := sc.Conn.Close()
if atomic.AddUint64(&sc.closed, 1) == 1 {
conns.Dec()
}
return err
}
var (
connReadsTotal = metrics.NewCounter(`vmagent_remotewrite_conn_reads_total`)
connWritesTotal = metrics.NewCounter(`vmagent_remotewrite_conn_writes_total`)
connReadErrors = metrics.NewCounter(`vmagent_remotewrite_conn_read_errors_total`)
connWriteErrors = metrics.NewCounter(`vmagent_remotewrite_conn_write_errors_total`)
connBytesRead = metrics.NewCounter(`vmagent_remotewrite_conn_bytes_read_total`)
connBytesWritten = metrics.NewCounter(`vmagent_remotewrite_conn_bytes_written_total`)
)

View File

@@ -1,258 +0,0 @@
package remotewrite
import (
"flag"
"fmt"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/metrics"
)
var (
// Global config
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
"aggregator before optional aggregation with -streamAggr.config . "+
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
streamAggrGlobalIgnoreOldSamples = flag.Bool("streamAggr.ignoreOldSamples", false, "Whether to ignore input samples with old timestamps outside the "+
"current aggregation interval for aggregator. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples")
streamAggrGlobalIgnoreFirstIntervals = flag.Int("streamAggr.ignoreFirstIntervals", 0, "Number of aggregation intervals to skip after the start for "+
"aggregator. Increase this value if you observe incorrect aggregation results after vmagent restarts. It could be caused by receiving unordered delayed data from "+
"clients pushing data into the vmagent. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignore-aggregation-intervals-on-start")
streamAggrGlobalDropInputLabels = flagutil.NewArrayString("streamAggr.dropInputLabels", "An optional list of labels to drop from samples for aggregator "+
"before stream de-duplication and aggregation . See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#dropping-unneeded-labels")
streamAggrGlobalEnableWindows = flag.Bool("streamAggr.enableWindows", false, "Enables aggregation within fixed windows for all global aggregators. "+
"This allows to get more precise results, but impacts resource usage as it requires twice more memory to store two states. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#aggregation-windows.")
// Per URL config
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
streamAggrIgnoreOldSamples = flagutil.NewArrayBool("remoteWrite.streamAggr.ignoreOldSamples", "Whether to ignore input samples with old timestamps outside the current "+
"aggregation interval for the corresponding -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples")
streamAggrIgnoreFirstIntervals = flagutil.NewArrayInt("remoteWrite.streamAggr.ignoreFirstIntervals", 0, "Number of aggregation intervals to skip after the start "+
"for the corresponding -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. Increase this value if "+
"you observe incorrect aggregation results after vmagent restarts. It could be caused by receiving buffered delayed data from clients pushing data into the vmagent. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignore-aggregation-intervals-on-start")
streamAggrDropInputLabels = flagutil.NewArrayString("remoteWrite.streamAggr.dropInputLabels", "An optional list of labels to drop from samples "+
"before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. "+
"Multiple labels per remoteWrite.url must be delimited by '^^': -remoteWrite.streamAggr.dropInputLabels='replica^^az,replica'. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#dropping-unneeded-labels")
streamAggrEnableWindows = flagutil.NewArrayBool("remoteWrite.streamAggr.enableWindows", "Enables aggregation within fixed windows for all remote write's aggregators. "+
"This allows to get more precise results, but impacts resource usage as it requires twice more memory to store two states. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#aggregation-windows.")
)
// CheckStreamAggrConfigs checks -remoteWrite.streamAggr.config and -streamAggr.config.
func CheckStreamAggrConfigs() error {
// Check global config
sas, err := newStreamAggrConfigGlobal()
if err != nil {
return err
}
sas.MustStop()
if len(*streamAggrConfig) > len(*remoteWriteURLs) {
return fmt.Errorf("too many -remoteWrite.streamAggr.config args: %d; it mustn't exceed the number of -remoteWrite.url args: %d", len(*streamAggrConfig), len(*remoteWriteURLs))
}
pushNoop := func(_ []prompb.TimeSeries) {}
for idx := range *streamAggrConfig {
sas, err := newStreamAggrConfigPerURL(idx, pushNoop)
if err != nil {
return err
}
sas.MustStop()
}
return nil
}
func reloadStreamAggrConfigs() {
reloadStreamAggrConfigGlobal()
for _, rwctx := range rwctxsGlobal {
rwctx.reloadStreamAggrConfig()
}
}
func reloadStreamAggrConfigGlobal() {
path := *streamAggrGlobalConfig
if path == "" {
return
}
logger.Infof("reloading stream aggregation configs pointed by -streamAggr.config=%q", path)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, path)).Inc()
sasNew, err := newStreamAggrConfigGlobal()
if err != nil {
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, path)).Inc()
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(0)
logger.Errorf("cannot reload -streamAggr.config=%q; continue using the previously loaded config; error: %s", path, err)
return
}
sas := sasGlobal.Load()
if !sasNew.Equal(sas) {
sasOld := sasGlobal.Swap(sasNew)
sasOld.MustStop()
logger.Infof("successfully reloaded -streamAggr.config=%q", path)
} else {
sasNew.MustStop()
logger.Infof("-streamAggr.config=%q wasn't changed since the last reload", path)
}
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(1)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, path)).Set(fasttime.UnixTimestamp())
}
func initStreamAggrConfigGlobal() {
sas, err := newStreamAggrConfigGlobal()
if err != nil {
logger.Fatalf("cannot initialize global stream aggregators: %s", err)
}
if sas != nil {
filePath := sas.FilePath()
sasGlobal.Store(sas)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, filePath)).Set(1)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, filePath)).Set(fasttime.UnixTimestamp())
}
dedupInterval := *streamAggrGlobalDedupInterval
if dedupInterval > 0 {
deduplicatorGlobal = streamaggr.NewDeduplicator(pushTimeSeriesToRemoteStoragesTrackDropped, *streamAggrGlobalEnableWindows, dedupInterval, *streamAggrGlobalDropInputLabels, "dedup-global")
}
}
func (rwctx *remoteWriteCtx) initStreamAggrConfig() {
idx := rwctx.idx
sas, err := rwctx.newStreamAggrConfig()
if err != nil {
logger.Fatalf("cannot initialize stream aggregators: %s", err)
}
if sas != nil {
filePath := sas.FilePath()
rwctx.sas.Store(sas)
rwctx.streamAggrKeepInput = streamAggrKeepInput.GetOptionalArg(idx)
rwctx.streamAggrDropInput = streamAggrDropInput.GetOptionalArg(idx)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, filePath)).Set(1)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, filePath)).Set(fasttime.UnixTimestamp())
}
dedupInterval := streamAggrDedupInterval.GetOptionalArg(idx)
if dedupInterval > 0 {
alias := fmt.Sprintf("dedup-%d", idx+1)
var dropLabels []string
if streamAggrDropInputLabels.GetOptionalArg(idx) != "" {
dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^")
}
rwctx.deduplicator = streamaggr.NewDeduplicator(rwctx.pushInternalTrackDropped, *streamAggrGlobalEnableWindows, dedupInterval, dropLabels, alias)
}
}
func (rwctx *remoteWriteCtx) reloadStreamAggrConfig() {
path := streamAggrConfig.GetOptionalArg(rwctx.idx)
if path == "" {
return
}
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", path)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, path)).Inc()
sasNew, err := rwctx.newStreamAggrConfig()
if err != nil {
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, path)).Inc()
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(0)
logger.Errorf("cannot reload -remoteWrite.streamAggr.config=%q; continue using the previously loaded config; error: %s", path, err)
return
}
sas := rwctx.sas.Load()
if !sasNew.Equal(sas) {
sasOld := rwctx.sas.Swap(sasNew)
sasOld.MustStop()
logger.Infof("successfully reloaded -remoteWrite.streamAggr.config=%q", path)
} else {
sasNew.MustStop()
logger.Infof("-remoteWrite.streamAggr.config=%q wasn't changed since the last reload", path)
}
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(1)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, path)).Set(fasttime.UnixTimestamp())
}
func newStreamAggrConfigGlobal() (*streamaggr.Aggregators, error) {
path := *streamAggrGlobalConfig
if path == "" {
return nil, nil
}
opts := &streamaggr.Options{
DedupInterval: *streamAggrGlobalDedupInterval,
DropInputLabels: *streamAggrGlobalDropInputLabels,
IgnoreOldSamples: *streamAggrGlobalIgnoreOldSamples,
IgnoreFirstIntervals: *streamAggrGlobalIgnoreFirstIntervals,
KeepInput: *streamAggrGlobalKeepInput,
EnableWindows: *streamAggrGlobalEnableWindows,
}
sas, err := streamaggr.LoadFromFile(path, pushTimeSeriesToRemoteStoragesTrackDropped, opts, "global")
if err != nil {
return nil, fmt.Errorf("cannot load -streamAggr.config=%q: %w", *streamAggrGlobalConfig, err)
}
return sas, nil
}
func (rwctx *remoteWriteCtx) newStreamAggrConfig() (*streamaggr.Aggregators, error) {
return newStreamAggrConfigPerURL(rwctx.idx, rwctx.pushInternalTrackDropped)
}
func newStreamAggrConfigPerURL(idx int, pushFunc streamaggr.PushFunc) (*streamaggr.Aggregators, error) {
path := streamAggrConfig.GetOptionalArg(idx)
if path == "" {
return nil, nil
}
alias := fmt.Sprintf("%d:secret-url", idx+1)
if *showRemoteWriteURL {
alias = fmt.Sprintf("%d:%s", idx+1, remoteWriteURLs.GetOptionalArg(idx))
}
var dropLabels []string
if streamAggrDropInputLabels.GetOptionalArg(idx) != "" {
dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^")
}
opts := &streamaggr.Options{
DedupInterval: streamAggrDedupInterval.GetOptionalArg(idx),
DropInputLabels: dropLabels,
IgnoreOldSamples: streamAggrIgnoreOldSamples.GetOptionalArg(idx),
IgnoreFirstIntervals: streamAggrIgnoreFirstIntervals.GetOptionalArg(idx),
KeepInput: streamAggrKeepInput.GetOptionalArg(idx),
EnableWindows: streamAggrEnableWindows.GetOptionalArg(idx),
}
sas, err := streamaggr.LoadFromFile(path, pushFunc, opts, alias)
if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.streamAggr.config=%q: %w", path, err)
}
return sas, nil
}

View File

@@ -1,6 +1,7 @@
package vmimport
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
@@ -8,11 +9,11 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -26,17 +27,28 @@ var (
//
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []vmimport.Row) error {
return insertRows(at, rows, extraLabels)
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
})
}
func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label) error {
// InsertHandlerForReader processes metrics from given reader
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, isGzipped, func(rows []parser.Row) error {
return insertRows(nil, rows, nil)
})
})
}
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
@@ -50,7 +62,7 @@ func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label)
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
labels = append(labels, prompbmarshal.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
@@ -63,12 +75,12 @@ func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label)
}
samplesLen := len(samples)
for j, value := range values {
samples = append(samples, prompb.Sample{
samples = append(samples, prompbmarshal.Sample{
Value: value,
Timestamp: timestamps[j],
})
}
tssDst = append(tssDst, prompb.TimeSeries{
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
@@ -76,9 +88,7 @@ func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label)
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
remotewrite.PushWithAuthToken(at, &ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)

View File

@@ -1,80 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := len(rows)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, prompb.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -1,3 +0,0 @@
See vmalert-tool docs [here](https://docs.victoriametrics.com/victoriametrics/vmalert-tool/).
vmalert-tool docs can be edited at [docs/vmalert-tool.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/vmalert-tool.md).

View File

@@ -1,8 +0,0 @@
ARG base_image=non-existing
FROM $base_image
EXPOSE 8880
ENTRYPOINT ["/vmalert-tool-prod"]
ARG src_binary=non-existing
COPY $src_binary ./vmalert-tool-prod

View File

@@ -1,13 +0,0 @@
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
ARG certs_image=non-existing
ARG root_image=non-existing
FROM $certs_image AS certs
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
FROM $root_image
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
EXPOSE 8429
ENTRYPOINT ["/vmalert-tool-prod"]
ARG TARGETARCH
ARG BINARY_SUFFIX=non-existing
COPY vmalert-tool-linux-${TARGETARCH}-prod${BINARY_SUFFIX} ./vmalert-tool-prod

View File

@@ -12,23 +12,20 @@ vmalert-prod:
vmalert-pure-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-pure
vmalert-linux-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-amd64
vmalert-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-amd64
vmalert-linux-arm-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-arm
vmalert-arm-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-arm
vmalert-linux-arm64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-arm64
vmalert-arm64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-arm64
vmalert-linux-ppc64le-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-ppc64le
vmalert-ppc64le-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-ppc64le
vmalert-linux-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
vmalert-linux-s390x-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
vmalert-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-386
vmalert-darwin-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64
@@ -36,12 +33,6 @@ vmalert-darwin-amd64-prod:
vmalert-darwin-arm64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-arm64
vmalert-freebsd-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-freebsd-amd64
vmalert-openbsd-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-openbsd-amd64
vmalert-windows-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-windows-amd64
@@ -71,13 +62,11 @@ publish-vmalert:
test-vmalert:
go test -v -race -cover ./app/vmalert -loggerLevel=ERROR
go test -v -race -cover ./app/vmalert/rule
go test -v -race -cover ./app/vmalert/templates
go test -v -race -cover ./app/vmalert/datasource
go test -v -race -cover ./app/vmalert/notifier
go test -v -race -cover ./app/vmalert/config
go test -v -race -cover ./app/vmalert/remotewrite
go test -v -race -cover ./app/vmalert/vmalertutil
run-vmalert: vmalert
./bin/vmalert -rule=app/vmalert/config/testdata/rules/rules2-good.rules \
@@ -104,43 +93,29 @@ replay-vmalert: vmalert
-remoteWrite.url=http://localhost:8428 \
-external.label=cluster=east-1 \
-external.label=replica=a \
-replay.timeFrom=2024-06-01T00:00:00Z
-replay.timeFrom=2021-05-11T07:21:43Z \
-replay.timeTo=2021-05-29T18:40:43Z
vmalert-linux-amd64:
APP_NAME=vmalert CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-amd64:
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmalert-local-with-goarch
vmalert-linux-arm:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
vmalert-arm:
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmalert-local-with-goarch
vmalert-linux-arm64:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
vmalert-arm64:
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmalert-local-with-goarch
vmalert-linux-ppc64le:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
vmalert-ppc64le:
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmalert-local-with-goarch
vmalert-linux-s390x:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
vmalert-386:
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmalert-local-with-goarch
vmalert-linux-loong64:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=loong64 $(MAKE) app-local-goos-goarch
vmalert-linux-386:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
vmalert-darwin-amd64:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-darwin-arm64:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
vmalert-freebsd-amd64:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-openbsd-amd64:
APP_NAME=vmalert CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-windows-amd64:
GOARCH=amd64 APP_NAME=vmalert $(MAKE) app-local-windows-goarch
vmalert-local-with-goarch:
APP_NAME=vmalert $(MAKE) app-local-with-goarch
vmalert-pure:
APP_NAME=vmalert $(MAKE) app-local-pure
vmalert-windows-amd64:
GOARCH=amd64 APP_NAME=vmalert $(MAKE) app-local-windows-with-goarch

File diff suppressed because it is too large Load Diff

625
app/vmalert/alerting.go Normal file
View File

@@ -0,0 +1,625 @@
package main
import (
"context"
"fmt"
"hash/fnv"
"sort"
"strconv"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// AlertingRule is basic alert entity
type AlertingRule struct {
Type datasource.Type
RuleID uint64
Name string
Expr string
For time.Duration
Labels map[string]string
Annotations map[string]string
GroupID uint64
GroupName string
EvalInterval time.Duration
q datasource.Querier
// guard status fields
mu sync.RWMutex
// stores list of active alerts
alerts map[uint64]*notifier.Alert
// stores last moment of time Exec was called
lastExecTime time.Time
// stores the duration of the last Exec call
lastExecDuration time.Duration
// stores last error that happened in Exec func
// resets on every successful Exec
// may be used as Health state
lastExecError error
// stores the number of samples returned during
// the last evaluation
lastExecSamples int
metrics *alertingRuleMetrics
}
type alertingRuleMetrics struct {
errors *utils.Gauge
pending *utils.Gauge
active *utils.Gauge
samples *utils.Gauge
}
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
ar := &AlertingRule{
Type: group.Type,
RuleID: cfg.ID,
Name: cfg.Alert,
Expr: cfg.Expr,
For: cfg.For.Duration(),
Labels: cfg.Labels,
Annotations: cfg.Annotations,
GroupID: group.ID(),
GroupName: group.Name,
EvalInterval: group.Interval,
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: &group.Type,
EvaluationInterval: group.Interval,
QueryParams: group.Params,
}),
alerts: make(map[uint64]*notifier.Alert),
metrics: &alertingRuleMetrics{},
}
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
func() float64 {
ar.mu.RLock()
defer ar.mu.RUnlock()
var num int
for _, a := range ar.alerts {
if a.State == notifier.StatePending {
num++
}
}
return float64(num)
})
ar.metrics.active = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_firing{%s}`, labels),
func() float64 {
ar.mu.RLock()
defer ar.mu.RUnlock()
var num int
for _, a := range ar.alerts {
if a.State == notifier.StateFiring {
num++
}
}
return float64(num)
})
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
func() float64 {
ar.mu.RLock()
defer ar.mu.RUnlock()
if ar.lastExecError == nil {
return 0
}
return 1
})
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
func() float64 {
ar.mu.RLock()
defer ar.mu.RUnlock()
return float64(ar.lastExecSamples)
})
return ar
}
// Close unregisters rule metrics
func (ar *AlertingRule) Close() {
ar.metrics.active.Unregister()
ar.metrics.pending.Unregister()
ar.metrics.errors.Unregister()
ar.metrics.samples.Unregister()
}
// String implements Stringer interface
func (ar *AlertingRule) String() string {
return ar.Name
}
// ID returns unique Rule ID
// within the parent Group.
func (ar *AlertingRule) ID() uint64 {
return ar.RuleID
}
type labelSet struct {
// origin labels from series
// used for templating
origin map[string]string
// processed labels with additional data
// used as Alert labels
processed map[string]string
}
// toLabels converts labels from given Metric
// to labelSet which contains original and processed labels.
func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
ls := &labelSet{
origin: make(map[string]string, len(m.Labels)),
processed: make(map[string]string),
}
for _, l := range m.Labels {
// drop __name__ to be consistent with Prometheus alerting
if l.Name == "__name__" {
continue
}
ls.origin[l.Name] = l.Value
ls.processed[l.Name] = l.Value
}
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
Labels: ls.origin,
Value: m.Values[0],
Expr: ar.Expr,
})
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %s", err)
}
for k, v := range extraLabels {
ls.processed[k] = v
}
// set additional labels to identify group and rule name
if ar.Name != "" {
ls.processed[alertNameLabel] = ar.Name
}
if !*disableAlertGroupLabel && ar.GroupName != "" {
ls.processed[alertGroupNameLabel] = ar.GroupName
}
return ls, nil
}
// ExecRange executes alerting rule on the given time range similarly to Exec.
// It doesn't update internal states of the Rule and meant to be used just
// to get time series for backfilling.
// It returns ALERT and ALERT_FOR_STATE time series as result.
func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
series, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
if err != nil {
return nil, err
}
var result []prompbmarshal.TimeSeries
qFn := func(query string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
}
for _, s := range series {
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
if err != nil {
return nil, fmt.Errorf("failed to create alert: %s", err)
}
if ar.For == 0 { // if alert is instant
a.State = notifier.StateFiring
for i := range s.Values {
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
}
continue
}
// if alert with For > 0
prevT := time.Time{}
for i := range s.Values {
at := time.Unix(s.Timestamps[i], 0)
if at.Sub(prevT) > ar.EvalInterval {
// reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending
a.ActiveAt = at
} else if at.Sub(a.ActiveAt) >= ar.For {
a.State = notifier.StateFiring
a.Start = at
}
prevT = at
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
}
}
return result, nil
}
// resolvedRetention is the duration for which a resolved alert instance
// is kept in memory state and consequently repeatedly sent to the AlertManager.
const resolvedRetention = 15 * time.Minute
// Exec executes AlertingRule expression via the given Querier.
// Based on the Querier results AlertingRule maintains notifier.Alerts
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
start := time.Now()
qMetrics, err := ar.q.Query(ctx, ar.Expr, ts)
ar.mu.Lock()
defer ar.mu.Unlock()
ar.lastExecTime = start
ar.lastExecDuration = time.Since(start)
ar.lastExecError = err
ar.lastExecSamples = len(qMetrics)
if err != nil {
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
}
for h, a := range ar.alerts {
// cleanup inactive alerts from previous Exec
if a.State == notifier.StateInactive && ts.Sub(a.ResolvedAt) > resolvedRetention {
delete(ar.alerts, h)
}
}
qFn := func(query string) ([]datasource.Metric, error) { return ar.q.Query(ctx, query, ts) }
updated := make(map[uint64]struct{})
// update list of active alerts
for _, m := range qMetrics {
ls, err := ar.toLabels(m, qFn)
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %s", err)
}
h := hash(ls.processed)
if _, ok := updated[h]; ok {
// duplicate may be caused by extra labels
// conflicting with the metric labels
ar.lastExecError = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
return nil, ar.lastExecError
}
updated[h] = struct{}{}
if a, ok := ar.alerts[h]; ok {
if a.State == notifier.StateInactive {
// alert could be in inactive state for resolvedRetention
// so when we again receive metrics for it - we switch it
// back to notifier.StatePending
a.State = notifier.StatePending
a.ActiveAt = ts
}
if a.Value != m.Values[0] {
// update Value field with latest value
a.Value = m.Values[0]
// and re-exec template since Value can be used
// in annotations
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
if err != nil {
return nil, err
}
}
continue
}
a, err := ar.newAlert(m, ls, ar.lastExecTime, qFn)
if err != nil {
ar.lastExecError = err
return nil, fmt.Errorf("failed to create alert: %w", err)
}
a.ID = h
a.State = notifier.StatePending
a.ActiveAt = ts
ar.alerts[h] = a
}
var numActivePending int
for h, a := range ar.alerts {
// if alert wasn't updated in this iteration
// means it is resolved already
if _, ok := updated[h]; !ok {
if a.State == notifier.StatePending {
// alert was in Pending state - it is not
// active anymore
delete(ar.alerts, h)
continue
}
if a.State == notifier.StateFiring {
a.State = notifier.StateInactive
a.ResolvedAt = ts
}
continue
}
numActivePending++
if a.State == notifier.StatePending && ts.Sub(a.ActiveAt) >= ar.For {
a.State = notifier.StateFiring
a.Start = ts
alertsFired.Inc()
}
}
if limit > 0 && numActivePending > limit {
ar.alerts = map[uint64]*notifier.Alert{}
return nil, fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
}
return ar.toTimeSeries(ts.Unix()), nil
}
func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries {
var tss []prompbmarshal.TimeSeries
for _, a := range ar.alerts {
if a.State == notifier.StateInactive {
continue
}
ts := ar.alertToTimeSeries(a, timestamp)
tss = append(tss, ts...)
}
return tss
}
// UpdateWith copies all significant fields.
// alerts state isn't copied since
// it should be updated in next 2 Execs
func (ar *AlertingRule) UpdateWith(r Rule) error {
nr, ok := r.(*AlertingRule)
if !ok {
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
}
ar.Expr = nr.Expr
ar.For = nr.For
ar.Labels = nr.Labels
ar.Annotations = nr.Annotations
ar.EvalInterval = nr.EvalInterval
ar.q = nr.q
return nil
}
// TODO: consider hashing algorithm in VM
func hash(labels map[string]string) uint64 {
hash := fnv.New64a()
keys := make([]string, 0, len(labels))
for k := range labels {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
// drop __name__ to be consistent with Prometheus alerting
if k == "__name__" {
continue
}
name, value := k, labels[k]
hash.Write([]byte(name))
hash.Write([]byte(value))
hash.Write([]byte("\xff"))
}
return hash.Sum64()
}
func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.Time, qFn templates.QueryFn) (*notifier.Alert, error) {
var err error
if ls == nil {
ls, err = ar.toLabels(m, qFn)
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %s", err)
}
}
a := &notifier.Alert{
GroupID: ar.GroupID,
Name: ar.Name,
Labels: ls.processed,
Value: m.Values[0],
ActiveAt: start,
Expr: ar.Expr,
}
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
return a, err
}
// AlertAPI generates APIAlert object from alert by its id(hash)
func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
ar.mu.RLock()
defer ar.mu.RUnlock()
a, ok := ar.alerts[id]
if !ok {
return nil
}
return ar.newAlertAPI(*a)
}
// ToAPI returns Rule representation in form
// of APIRule
func (ar *AlertingRule) ToAPI() APIRule {
r := APIRule{
Type: "alerting",
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: ar.lastExecTime,
EvaluationTime: ar.lastExecDuration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: ar.lastExecSamples,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
}
if ar.lastExecError != nil {
r.LastError = ar.lastExecError.Error()
r.Health = "err"
}
// satisfy APIRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// AlertsToAPI generates list of APIAlert objects from existing alerts
func (ar *AlertingRule) AlertsToAPI() []*APIAlert {
var alerts []*APIAlert
ar.mu.RLock()
for _, a := range ar.alerts {
if a.State == notifier.StateInactive {
continue
}
alerts = append(alerts, ar.newAlertAPI(*a))
}
ar.mu.RUnlock()
return alerts
}
func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
aa := &APIAlert{
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", a.ID),
GroupID: fmt.Sprintf("%d", a.GroupID),
RuleID: fmt.Sprintf("%d", ar.RuleID),
Name: a.Name,
Expression: ar.Expr,
Labels: a.Labels,
Annotations: a.Annotations,
State: a.State.String(),
ActiveAt: a.ActiveAt,
Restored: a.Restored,
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
}
if alertURLGeneratorFn != nil {
aa.SourceLink = alertURLGeneratorFn(a)
}
return aa
}
const (
// alertMetricName is the metric name for synthetic alert timeseries.
alertMetricName = "ALERTS"
// alertForStateMetricName is the metric name for 'for' state of alert.
alertForStateMetricName = "ALERTS_FOR_STATE"
// alertNameLabel is the label name indicating the name of an alert.
alertNameLabel = "alertname"
// alertStateLabel is the label name indicating the state of an alert.
alertStateLabel = "alertstate"
// alertGroupNameLabel defines the label name attached for generated time series.
// attaching this label may be disabled via `-disableAlertgroupLabel` flag.
alertGroupNameLabel = "alertgroup"
)
// alertToTimeSeries converts the given alert with the given timestamp to time series
func (ar *AlertingRule) alertToTimeSeries(a *notifier.Alert, timestamp int64) []prompbmarshal.TimeSeries {
var tss []prompbmarshal.TimeSeries
tss = append(tss, alertToTimeSeries(a, timestamp))
if ar.For > 0 {
tss = append(tss, alertForToTimeSeries(a, timestamp))
}
return tss
}
func alertToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
labels := make(map[string]string)
for k, v := range a.Labels {
labels[k] = v
}
labels["__name__"] = alertMetricName
labels[alertStateLabel] = a.State.String()
return newTimeSeries([]float64{1}, []int64{timestamp}, labels)
}
// alertForToTimeSeries returns a timeseries that represents
// state of active alerts, where value is time when alert become active
func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
labels := make(map[string]string)
for k, v := range a.Labels {
labels[k] = v
}
labels["__name__"] = alertForStateMetricName
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
}
// Restore restores the state of active alerts basing on previously written time series.
// Restore restores only ActiveAt field. Field State will be always Pending and supposed
// to be updated on next Exec, as well as Value field.
// Only rules with For > 0 will be restored.
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration, labels map[string]string) error {
if q == nil {
return fmt.Errorf("querier is nil")
}
ts := time.Now()
qFn := func(query string) ([]datasource.Metric, error) { return ar.q.Query(ctx, query, ts) }
// account for external labels in filter
var labelsFilter string
for k, v := range labels {
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
}
// Get the last data point in range via MetricsQL `last_over_time`.
// We don't use plain PromQL since Prometheus doesn't support
// remote write protocol which is used for state persistence in vmalert.
expr := fmt.Sprintf("last_over_time(%s{alertname=%q%s}[%ds])",
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
qMetrics, err := q.Query(ctx, expr, ts)
if err != nil {
return err
}
for _, m := range qMetrics {
ls := &labelSet{
origin: make(map[string]string, len(m.Labels)),
processed: make(map[string]string, len(m.Labels)),
}
for _, l := range m.Labels {
if l.Name == "__name__" {
continue
}
ls.origin[l.Name] = l.Value
ls.processed[l.Name] = l.Value
}
a, err := ar.newAlert(m, ls, time.Unix(int64(m.Values[0]), 0), qFn)
if err != nil {
return fmt.Errorf("failed to create alert: %w", err)
}
a.ID = hash(ls.processed)
a.State = notifier.StatePending
a.Restored = true
ar.alerts[a.ID] = a
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
}
return nil
}
// alertsToSend walks through the current alerts of AlertingRule
// and returns only those which should be sent to notifier.
// Isn't concurrent safe.
func (ar *AlertingRule) alertsToSend(ts time.Time, resolveDuration, resendDelay time.Duration) []notifier.Alert {
needsSending := func(a *notifier.Alert) bool {
if a.State == notifier.StatePending {
return false
}
if a.ResolvedAt.After(a.LastSent) {
return true
}
return a.LastSent.Add(resendDelay).Before(ts)
}
var alerts []notifier.Alert
for _, a := range ar.alerts {
if !needsSending(a) {
continue
}
a.End = ts.Add(resolveDuration)
if a.State == notifier.StateInactive {
a.End = a.ResolvedAt
}
a.LastSent = ts
alerts = append(alerts, *a)
}
return alerts
}

View File

@@ -0,0 +1,919 @@
package main
import (
"context"
"errors"
"reflect"
"sort"
"strings"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func TestAlertingRule_ToTimeSeries(t *testing.T) {
timestamp := time.Now()
testCases := []struct {
rule *AlertingRule
alert *notifier.Alert
expTS []prompbmarshal.TimeSeries
}{
{
newTestAlertingRule("instant", 0),
&notifier.Alert{State: notifier.StateFiring},
[]prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
}),
},
},
{
newTestAlertingRule("instant extra labels", 0),
&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
"job": "foo",
"instance": "bar",
}},
[]prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
"job": "foo",
"instance": "bar",
}),
},
},
{
newTestAlertingRule("instant labels override", 0),
&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
alertStateLabel: "foo",
"__name__": "bar",
}},
[]prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
}),
},
},
{
newTestAlertingRule("for", time.Second),
&notifier.Alert{State: notifier.StateFiring, ActiveAt: timestamp.Add(time.Second)},
[]prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
}),
},
},
{
newTestAlertingRule("for pending", 10*time.Second),
&notifier.Alert{State: notifier.StatePending, ActiveAt: timestamp.Add(time.Second)},
[]prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StatePending.String(),
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
}),
},
},
}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
tc.rule.alerts[tc.alert.ID] = tc.alert
tss := tc.rule.toTimeSeries(timestamp.Unix())
if err := compareTimeSeries(t, tc.expTS, tss); err != nil {
t.Fatalf("timeseries missmatch: %s", err)
}
})
}
}
func TestAlertingRule_Exec(t *testing.T) {
const defaultStep = 5 * time.Millisecond
type testAlert struct {
labels []string
alert *notifier.Alert
}
testCases := []struct {
rule *AlertingRule
steps [][]datasource.Metric
expAlerts []testAlert
}{
{
newTestAlertingRule("empty", 0),
[][]datasource.Metric{},
nil,
},
{
newTestAlertingRule("empty labels", 0),
[][]datasource.Metric{
{datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}},
},
[]testAlert{
{alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("single-firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("single-firing=>inactive", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
newTestAlertingRule("single-firing=>inactive=>firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("single-firing=>inactive=>firing=>inactive", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
{metricWithLabels(t, "name", "foo")},
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>inactive", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
{metricWithLabels(t, "name", "foo")},
{},
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
{metricWithLabels(t, "name", "foo")},
{},
{},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("multiple-firing", 0),
[][]datasource.Metric{
{
metricWithLabels(t, "name", "foo"),
metricWithLabels(t, "name", "foo1"),
metricWithLabels(t, "name", "foo2"),
},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("multiple-steps-firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo1")},
{metricWithLabels(t, "name", "foo2")},
},
// 1: fire first alert
// 2: fire second alert, set first inactive
// 3: fire third alert, set second inactive
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("for-pending", time.Minute),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}},
},
},
{
newTestAlertingRule("for-fired", defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("for-pending=>empty", time.Second),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset and delete pending alerts
{},
},
nil,
},
{
newTestAlertingRule("for-pending=>firing=>inactive", defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset pending alerts
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
newTestAlertingRule("for-pending=>firing=>inactive=>pending", defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset pending alerts
{},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}},
},
},
{
newTestAlertingRule("for-pending=>firing=>inactive=>pending=>firing", defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset pending alerts
{},
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
}
fakeGroup := Group{Name: "TestRule_Exec"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.q = fq
tc.rule.GroupID = fakeGroup.ID()
for _, step := range tc.steps {
fq.reset()
fq.add(step...)
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected err: %s", err)
}
// artificial delay between applying steps
time.Sleep(defaultStep)
}
if len(tc.rule.alerts) != len(tc.expAlerts) {
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
}
expAlerts := make(map[uint64]*notifier.Alert)
for _, ta := range tc.expAlerts {
labels := make(map[string]string)
for i := 0; i < len(ta.labels); i += 2 {
k, v := ta.labels[i], ta.labels[i+1]
labels[k] = v
}
labels[alertNameLabel] = tc.rule.Name
h := hash(labels)
expAlerts[h] = ta.alert
}
for key, exp := range expAlerts {
got, ok := tc.rule.alerts[key]
if !ok {
t.Fatalf("expected to have key %d", key)
}
if got.State != exp.State {
t.Fatalf("expected state %d; got %d", exp.State, got.State)
}
}
})
}
}
func TestAlertingRule_ExecRange(t *testing.T) {
testCases := []struct {
rule *AlertingRule
data []datasource.Metric
expAlerts []*notifier.Alert
}{
{
newTestAlertingRule("empty", 0),
[]datasource.Metric{},
nil,
},
{
newTestAlertingRule("empty labels", 0),
[]datasource.Metric{
{Values: []float64{1}, Timestamps: []int64{1}},
},
[]*notifier.Alert{
{State: notifier.StateFiring},
},
},
{
newTestAlertingRule("single-firing", 0),
[]datasource.Metric{
metricWithLabels(t, "name", "foo"),
},
[]*notifier.Alert{
{
Labels: map[string]string{"name": "foo"},
State: notifier.StateFiring,
},
},
},
{
newTestAlertingRule("single-firing-on-range", 0),
[]datasource.Metric{
{Values: []float64{1, 1, 1}, Timestamps: []int64{1e3, 2e3, 3e3}},
},
[]*notifier.Alert{
{State: notifier.StateFiring},
{State: notifier.StateFiring},
{State: notifier.StateFiring},
},
},
{
newTestAlertingRule("for-pending", time.Second),
[]datasource.Metric{
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
},
[]*notifier.Alert{
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
},
},
{
newTestAlertingRule("for-firing", 3*time.Second),
[]datasource.Metric{
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
},
[]*notifier.Alert{
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
},
},
{
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
[]datasource.Metric{
{Values: []float64{1, 1, 1, 1, 1}, Timestamps: []int64{1, 2, 5, 6, 20}},
},
[]*notifier.Alert{
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
},
},
{
newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
[]datasource.Metric{
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
},
},
[]*notifier.Alert{
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
//
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
Labels: map[string]string{
"foo": "bar",
}},
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
Labels: map[string]string{
"foo": "bar",
}},
},
},
{
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
[]datasource.Metric{
{Values: []float64{1, 1}, Timestamps: []int64{1, 100}},
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
},
},
[]*notifier.Alert{
{State: notifier.StateFiring, Labels: map[string]string{
"source": "vm",
}},
{State: notifier.StateFiring, Labels: map[string]string{
"source": "vm",
}},
//
{State: notifier.StateFiring, Labels: map[string]string{
"foo": "bar",
"source": "vm",
}},
{State: notifier.StateFiring, Labels: map[string]string{
"foo": "bar",
"source": "vm",
}},
},
},
}
fakeGroup := Group{Name: "TestRule_ExecRange"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.q = fq
tc.rule.GroupID = fakeGroup.ID()
fq.add(tc.data...)
gotTS, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
var expTS []prompbmarshal.TimeSeries
var j int
for _, series := range tc.data {
for _, timestamp := range series.Timestamps {
a := tc.expAlerts[j]
if a.Labels == nil {
a.Labels = make(map[string]string)
}
a.Labels[alertNameLabel] = tc.rule.Name
expTS = append(expTS, tc.rule.alertToTimeSeries(a, timestamp)...)
j++
}
}
if len(gotTS) != len(expTS) {
t.Fatalf("expected %d time series; got %d", len(expTS), len(gotTS))
}
for i := range expTS {
got, exp := gotTS[i], expTS[i]
if !reflect.DeepEqual(got, exp) {
t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
}
}
})
}
}
func TestAlertingRule_Restore(t *testing.T) {
testCases := []struct {
rule *AlertingRule
metrics []datasource.Metric
expAlerts map[uint64]*notifier.Alert
}{
{
newTestRuleWithLabels("no extra labels"),
[]datasource.Metric{
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
"__name__", alertForStateMetricName,
),
},
map[uint64]*notifier.Alert{
hash(nil): {State: notifier.StatePending,
ActiveAt: time.Now().Truncate(time.Hour)},
},
},
{
newTestRuleWithLabels("metric labels"),
[]datasource.Metric{
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
"__name__", alertForStateMetricName,
alertNameLabel, "metric labels",
alertGroupNameLabel, "groupID",
"foo", "bar",
"namespace", "baz",
),
},
map[uint64]*notifier.Alert{
hash(map[string]string{
alertNameLabel: "metric labels",
alertGroupNameLabel: "groupID",
"foo": "bar",
"namespace": "baz",
}): {State: notifier.StatePending,
ActiveAt: time.Now().Truncate(time.Hour)},
},
},
{
newTestRuleWithLabels("rule labels", "source", "vm"),
[]datasource.Metric{
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
"__name__", alertForStateMetricName,
"foo", "bar",
"namespace", "baz",
// extra labels set by rule
"source", "vm",
),
},
map[uint64]*notifier.Alert{
hash(map[string]string{
"foo": "bar",
"namespace": "baz",
"source": "vm",
}): {State: notifier.StatePending,
ActiveAt: time.Now().Truncate(time.Hour)},
},
},
{
newTestRuleWithLabels("multiple alerts"),
[]datasource.Metric{
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
"__name__", alertForStateMetricName,
"host", "localhost-1",
),
metricWithValueAndLabels(t, float64(time.Now().Truncate(2*time.Hour).Unix()),
"__name__", alertForStateMetricName,
"host", "localhost-2",
),
metricWithValueAndLabels(t, float64(time.Now().Truncate(3*time.Hour).Unix()),
"__name__", alertForStateMetricName,
"host", "localhost-3",
),
},
map[uint64]*notifier.Alert{
hash(map[string]string{"host": "localhost-1"}): {State: notifier.StatePending,
ActiveAt: time.Now().Truncate(time.Hour)},
hash(map[string]string{"host": "localhost-2"}): {State: notifier.StatePending,
ActiveAt: time.Now().Truncate(2 * time.Hour)},
hash(map[string]string{"host": "localhost-3"}): {State: notifier.StatePending,
ActiveAt: time.Now().Truncate(3 * time.Hour)},
},
},
}
fakeGroup := Group{Name: "TestRule_Exec"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.GroupID = fakeGroup.ID()
tc.rule.q = fq
fq.add(tc.metrics...)
if err := tc.rule.Restore(context.TODO(), fq, time.Hour, nil); err != nil {
t.Fatalf("unexpected err: %s", err)
}
if len(tc.rule.alerts) != len(tc.expAlerts) {
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
}
for key, exp := range tc.expAlerts {
got, ok := tc.rule.alerts[key]
if !ok {
t.Fatalf("expected to have key %d", key)
}
if got.State != exp.State {
t.Fatalf("expected state %d; got %d", exp.State, got.State)
}
if got.ActiveAt != exp.ActiveAt {
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
}
}
})
}
}
func TestAlertingRule_Exec_Negative(t *testing.T) {
fq := &fakeQuerier{}
ar := newTestAlertingRule("test", 0)
ar.Labels = map[string]string{"job": "test"}
ar.q = fq
// successful attempt
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
_, err := ar.Exec(context.TODO(), time.Now(), 0)
if err != nil {
t.Fatal(err)
}
// label `job` will collide with rule extra label and will make both time series equal
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
_, err = ar.Exec(context.TODO(), time.Now(), 0)
if !errors.Is(err, errDuplicate) {
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
}
fq.reset()
expErr := "connection reset by peer"
fq.setErr(errors.New(expErr))
_, err = ar.Exec(context.TODO(), time.Now(), 0)
if err == nil {
t.Fatalf("expected to get err; got nil")
}
if !strings.Contains(err.Error(), expErr) {
t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
}
}
func TestAlertingRuleLimit(t *testing.T) {
fq := &fakeQuerier{}
ar := newTestAlertingRule("test", 0)
ar.Labels = map[string]string{"job": "test"}
ar.q = fq
ar.For = time.Minute
testCases := []struct {
limit int
err string
tssNum int
}{
{
limit: 0,
tssNum: 4,
},
{
limit: -1,
tssNum: 4,
},
{
limit: 1,
err: "exec exceeded limit of 1 with 2 alerts",
tssNum: 0,
},
{
limit: 4,
tssNum: 4,
},
}
var (
err error
timestamp = time.Now()
)
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
for _, testCase := range testCases {
_, err = ar.Exec(context.TODO(), timestamp, testCase.limit)
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
t.Fatal(err)
}
}
fq.reset()
}
func TestAlertingRule_Template(t *testing.T) {
testCases := []struct {
rule *AlertingRule
metrics []datasource.Metric
expAlerts map[uint64]*notifier.Alert
}{
{
newTestRuleWithLabels("common", "region", "east"),
[]datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"),
metricWithValueAndLabels(t, 1, "instance", "bar"),
},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
Annotations: map[string]string{},
Labels: map[string]string{
alertNameLabel: "common",
"region": "east",
"instance": "foo",
},
},
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "bar"}): {
Annotations: map[string]string{},
Labels: map[string]string{
alertNameLabel: "common",
"region": "east",
"instance": "bar",
},
},
},
},
{
&AlertingRule{
Name: "override label",
Labels: map[string]string{
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `Too high connection number for "{{ $labels.instance }}"`,
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
},
alerts: make(map[uint64]*notifier.Alert),
},
[]datasource.Metric{
metricWithValueAndLabels(t, 2, "instance", "foo", alertNameLabel, "override"),
metricWithValueAndLabels(t, 10, "instance", "bar", alertNameLabel, "override"),
},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "override label", "instance": "foo"}): {
Labels: map[string]string{
alertNameLabel: "override label",
"instance": "foo",
},
Annotations: map[string]string{
"summary": `Too high connection number for "foo"`,
"description": `override: It is 2 connections for "foo"`,
},
},
hash(map[string]string{alertNameLabel: "override label", "instance": "bar"}): {
Labels: map[string]string{
alertNameLabel: "override label",
"instance": "bar",
},
Annotations: map[string]string{
"summary": `Too high connection number for "bar"`,
"description": `override: It is 10 connections for "bar"`,
},
},
},
},
{
&AlertingRule{
Name: "OriginLabels",
GroupName: "Testing",
Labels: map[string]string{
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
},
alerts: make(map[uint64]*notifier.Alert),
},
[]datasource.Metric{
metricWithValueAndLabels(t, 1,
alertNameLabel, "originAlertname",
alertGroupNameLabel, "originGroupname",
"instance", "foo"),
},
map[uint64]*notifier.Alert{
hash(map[string]string{
alertNameLabel: "OriginLabels",
alertGroupNameLabel: "Testing",
"instance": "foo"}): {
Labels: map[string]string{
alertNameLabel: "OriginLabels",
alertGroupNameLabel: "Testing",
"instance": "foo",
},
Annotations: map[string]string{
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
},
},
},
},
}
fakeGroup := Group{Name: "TestRule_Exec"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.GroupID = fakeGroup.ID()
tc.rule.q = fq
fq.add(tc.metrics...)
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected err: %s", err)
}
for hash, expAlert := range tc.expAlerts {
gotAlert := tc.rule.alerts[hash]
if gotAlert == nil {
t.Fatalf("alert %d is missing; labels: %v; annotations: %v",
hash, expAlert.Labels, expAlert.Annotations)
}
if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
}
if !reflect.DeepEqual(expAlert.Labels, gotAlert.Labels) {
t.Fatalf("expected to have labels %#v; got %#v", expAlert.Labels, gotAlert.Labels)
}
}
})
}
}
func TestAlertsToSend(t *testing.T) {
ts := time.Now()
f := func(alerts, expAlerts []*notifier.Alert, resolveDuration, resendDelay time.Duration) {
t.Helper()
ar := &AlertingRule{alerts: make(map[uint64]*notifier.Alert)}
for i, a := range alerts {
ar.alerts[uint64(i)] = a
}
gotAlerts := ar.alertsToSend(ts, resolveDuration, resendDelay)
if gotAlerts == nil && expAlerts == nil {
return
}
if len(gotAlerts) != len(expAlerts) {
t.Fatalf("expected to get %d alerts; got %d instead",
len(expAlerts), len(gotAlerts))
}
sort.Slice(expAlerts, func(i, j int) bool {
return expAlerts[i].Name < expAlerts[j].Name
})
sort.Slice(gotAlerts, func(i, j int) bool {
return gotAlerts[i].Name < gotAlerts[j].Name
})
for i, exp := range expAlerts {
got := gotAlerts[i]
if got.LastSent != exp.LastSent {
t.Fatalf("expected LastSent to be %v; got %v", exp.LastSent, got.LastSent)
}
if got.End != exp.End {
t.Fatalf("expected End to be %v; got %v", exp.End, got.End)
}
}
}
f( // send firing alert with custom resolve time
[]*notifier.Alert{{State: notifier.StateFiring}},
[]*notifier.Alert{{LastSent: ts, End: ts.Add(5 * time.Minute)}},
5*time.Minute, time.Minute,
)
f( // resolve inactive alert at the current timestamp
[]*notifier.Alert{{State: notifier.StateInactive, ResolvedAt: ts}},
[]*notifier.Alert{{LastSent: ts, End: ts}},
time.Minute, time.Minute,
)
f( // mixed case of firing and resolved alerts. Names are added for deterministic sorting
[]*notifier.Alert{{Name: "a", State: notifier.StateFiring}, {Name: "b", State: notifier.StateInactive, ResolvedAt: ts}},
[]*notifier.Alert{{Name: "a", LastSent: ts, End: ts.Add(5 * time.Minute)}, {Name: "b", LastSent: ts, End: ts}},
5*time.Minute, time.Minute,
)
f( // mixed case of pending and resolved alerts. Names are added for deterministic sorting
[]*notifier.Alert{{Name: "a", State: notifier.StatePending}, {Name: "b", State: notifier.StateInactive, ResolvedAt: ts}},
[]*notifier.Alert{{Name: "b", LastSent: ts, End: ts}},
5*time.Minute, time.Minute,
)
f( // attempt to send alert that was already sent in the resendDelay interval
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-time.Second)}},
nil,
time.Minute, time.Minute,
)
f( // attempt to send alert that was sent out of the resendDelay interval
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-2 * time.Minute)}},
[]*notifier.Alert{{LastSent: ts, End: ts.Add(time.Minute)}},
time.Minute, time.Minute,
)
f( // alert must be sent even if resendDelay interval is 0
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-time.Second)}},
[]*notifier.Alert{{LastSent: ts, End: ts.Add(time.Minute)}},
time.Minute, 0,
)
f( // inactive alert which has been sent already
[]*notifier.Alert{{State: notifier.StateInactive, LastSent: ts.Add(-time.Second), ResolvedAt: ts.Add(-2 * time.Second)}},
nil,
time.Minute, time.Minute,
)
f( // inactive alert which has been resolved after last send
[]*notifier.Alert{{State: notifier.StateInactive, LastSent: ts.Add(-time.Second), ResolvedAt: ts}},
[]*notifier.Alert{{LastSent: ts, End: ts}},
time.Minute, time.Minute,
)
}
func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
r := newTestAlertingRule(name, 0)
r.Labels = make(map[string]string)
for i := 0; i < len(labels); i += 2 {
r.Labels[labels[i]] = labels[i+1]
}
return r
}
func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
return &AlertingRule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor, EvalInterval: waitFor}
}

View File

@@ -1,39 +1,40 @@
package config
import (
"bytes"
"flag"
"crypto/md5"
"fmt"
"hash/fnv"
"io"
"io/ioutil"
"net/url"
"path/filepath"
"sort"
"strings"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/log"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
var defaultRuleType = flag.String("rule.defaultRuleType", "prometheus", `Default type for rule expressions, can be overridden via "type" parameter on the group level, see https://docs.victoriametrics.com/victoriametrics/vmalert/#groups. Supported values: "graphite", "prometheus" and "vlogs".`)
// Group contains list of Rules grouped into
// entity with one name and evaluation interval
type Group struct {
Type Type `yaml:"type,omitempty"`
File string
Name string `yaml:"name"`
Interval *promutil.Duration `yaml:"interval,omitempty"`
EvalOffset *promutil.Duration `yaml:"eval_offset,omitempty"`
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
Limit *int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
Type datasource.Type `yaml:"type,omitempty"`
File string
Name string `yaml:"name"`
Interval *promutils.Duration `yaml:"interval,omitempty"`
Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
// ExtraFilterLabels is a list label filters applied to every rule
// request withing a group. Is compatible only with VM datasources.
// See https://docs.victoriametrics.com#prometheus-querying-api-enhancements
// DEPRECATED: use Params field instead
ExtraFilterLabels map[string]string `yaml:"extra_filter_labels"`
// Labels is a set of label value pairs, that will be added to every rule.
// It has priority over the external labels.
Labels map[string]string `yaml:"labels"`
@@ -42,20 +43,13 @@ type Group struct {
Checksum string
// Optional HTTP URL parameters added to each rule request
Params url.Values `yaml:"params"`
// Headers contains optional HTTP headers added to each rule request
Headers []Header `yaml:"headers,omitempty"`
// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
NotifierHeaders []Header `yaml:"notifier_headers,omitempty"`
// EvalAlignment will make the timestamp of group query requests be aligned with interval
EvalAlignment *bool `yaml:"eval_alignment,omitempty"`
// Debug enables debug logs for the group
Debug bool `yaml:"debug,omitempty"`
// Catches all undefined fields and must be empty after parsing.
XXX map[string]any `yaml:",inline"`
XXX map[string]interface{} `yaml:",inline"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (g *Group) UnmarshalYAML(unmarshal func(any) error) error {
func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
type group Group
if err := unmarshal((*group)(g)); err != nil {
return err
@@ -64,36 +58,38 @@ func (g *Group) UnmarshalYAML(unmarshal func(any) error) error {
if err != nil {
return fmt.Errorf("failed to marshal group configuration for checksum: %w", err)
}
// change default value to prometheus datasource.
if g.Type.Get() == "" {
g.Type = NewRawType(*defaultRuleType)
g.Type.Set(datasource.NewPrometheusType())
}
h := fnv.New64a()
// backward compatibility with deprecated `ExtraFilterLabels` param
if len(g.ExtraFilterLabels) > 0 {
if g.Params == nil {
g.Params = url.Values{}
}
// Sort extraFilters for consistent order for query args across runs.
extraFilters := make([]string, 0, len(g.ExtraFilterLabels))
for k, v := range g.ExtraFilterLabels {
extraFilters = append(extraFilters, fmt.Sprintf("%s=%s", k, v))
}
sort.Strings(extraFilters)
for _, extraFilter := range extraFilters {
g.Params.Add("extra_label", extraFilter)
}
}
h := md5.New()
h.Write(b)
g.Checksum = fmt.Sprintf("%x", h.Sum(nil))
return nil
}
// Validate checks configuration errors for group and internal rules
func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool) error {
// Validate check for internal Group or Rule configuration errors
func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
if g.Name == "" {
return fmt.Errorf("group name must be set")
}
if g.Interval.Duration() < 0 {
return fmt.Errorf("interval shouldn't be lower than 0")
}
// if `eval_offset` is set, the group interval must be specified explicitly(instead of inherited from global evaluationInterval flag) and must bigger than offset.
if g.EvalOffset.Duration().Abs() > g.Interval.Duration() {
return fmt.Errorf("the abs value of eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
}
if g.EvalOffset != nil && g.EvalDelay != nil {
return fmt.Errorf("eval_offset cannot be used with eval_delay")
}
if g.Limit != nil && *g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
}
if g.Concurrency < 0 {
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
}
uniqueRules := map[uint64]struct{}{}
for _, r := range g.Rules {
@@ -102,26 +98,26 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
ruleName = r.Alert
}
if _, ok := uniqueRules[r.ID]; ok {
return fmt.Errorf("%q is a duplicate in group", r.String())
return fmt.Errorf("rule %q duplicate", ruleName)
}
uniqueRules[r.ID] = struct{}{}
if err := r.Validate(); err != nil {
return fmt.Errorf("invalid rule %q: %w", ruleName, err)
return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
}
if validateExpressions {
// its needed only for tests.
// because correct types must be inherited after unmarshalling.
exprValidator := g.Type.ValidateExpr
if err := exprValidator(r.Expr); err != nil {
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
}
}
if validateTplFn != nil {
if err := validateTplFn(r.Annotations); err != nil {
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
if validateAnnotations {
if err := notifier.ValidateTemplates(r.Annotations); err != nil {
return fmt.Errorf("invalid annotations for rule %q.%q: %w", g.Name, ruleName, err)
}
if err := validateTplFn(r.Labels); err != nil {
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
if err := notifier.ValidateTemplates(r.Labels); err != nil {
return fmt.Errorf("invalid labels for rule %q.%q: %w", g.Name, ruleName, err)
}
}
}
@@ -131,26 +127,20 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
// Rule describes entity that represent either
// recording rule or alerting rule.
type Rule struct {
ID uint64
Record string `yaml:"record,omitempty"`
Alert string `yaml:"alert,omitempty"`
Expr string `yaml:"expr"`
For *promutil.Duration `yaml:"for,omitempty"`
// Alert will continue firing for this long even when the alerting expression no longer has results.
KeepFiringFor *promutil.Duration `yaml:"keep_firing_for,omitempty"`
Labels map[string]string `yaml:"labels,omitempty"`
Annotations map[string]string `yaml:"annotations,omitempty"`
Debug *bool `yaml:"debug,omitempty"`
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
// Overrides `-rule.updateEntriesLimit`.
UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"`
ID uint64
Record string `yaml:"record,omitempty"`
Alert string `yaml:"alert,omitempty"`
Expr string `yaml:"expr"`
For *promutils.Duration `yaml:"for,omitempty"`
Labels map[string]string `yaml:"labels,omitempty"`
Annotations map[string]string `yaml:"annotations,omitempty"`
// Catches all undefined fields and must be empty after parsing.
XXX map[string]any `yaml:",inline"`
XXX map[string]interface{} `yaml:",inline"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (r *Rule) UnmarshalYAML(unmarshal func(any) error) error {
func (r *Rule) UnmarshalYAML(unmarshal func(interface{}) error) error {
type rule Rule
if err := unmarshal((*rule)(r)); err != nil {
return err
@@ -167,32 +157,6 @@ func (r *Rule) Name() string {
return r.Alert
}
// String implements Stringer interface
func (r *Rule) String() string {
ruleType := "recording"
if r.Alert != "" {
ruleType = "alerting"
}
b := strings.Builder{}
b.WriteString(fmt.Sprintf("%s rule %q", ruleType, r.Name()))
b.WriteString(fmt.Sprintf("; expr: %q", r.Expr))
kv := sortMap(r.Labels)
for i := range kv {
if i == 0 {
b.WriteString("; labels:")
}
b.WriteString(" ")
b.WriteString(kv[i].key)
b.WriteString("=")
b.WriteString(kv[i].value)
if i < len(kv)-1 {
b.WriteString(",")
}
}
return b.String()
}
// HashRule hashes significant Rule fields into
// unique hash that supposed to define Rule uniqueness
func HashRule(r Rule) uint64 {
@@ -222,60 +186,31 @@ func (r *Rule) Validate() error {
if r.Expr == "" {
return fmt.Errorf("expression can't be empty")
}
if _, ok := r.Labels["__name__"]; ok {
return fmt.Errorf("invalid rule label __name__")
}
return checkOverflow(r.XXX, "rule")
}
// ValidateTplFn must validate the given annotations
type ValidateTplFn func(annotations map[string]string) error
// cLogger is a logger with support of logs suppressing.
// it is used when logs emitted by config package needs
// to be suppressed.
var cLogger = &log.Logger{}
// ParseSilent parses rule configs from given file patterns without emitting logs
func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
cLogger.Suppress(true)
defer cLogger.Suppress(false)
files, err := ReadFromFS(pathPatterns)
if err != nil {
return nil, fmt.Errorf("failed to read from the config: %w", err)
}
return parse(files, validateTplFn, validateExpressions)
}
// Parse parses rule configs from given file patterns
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
files, err := ReadFromFS(pathPatterns)
if err != nil {
return nil, fmt.Errorf("failed to read from the config: %w", err)
func Parse(pathPatterns []string, validateAnnotations, validateExpressions bool) ([]Group, error) {
var fp []string
for _, pattern := range pathPatterns {
matches, err := filepath.Glob(pattern)
if err != nil {
return nil, fmt.Errorf("error reading file pattern %s: %w", pattern, err)
}
fp = append(fp, matches...)
}
groups, err := parse(files, validateTplFn, validateExpressions)
if err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", pathPatterns, err)
}
if len(groups) < 1 {
cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
}
return groups, nil
}
func parse(files map[string][]byte, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
errGroup := new(vmalertutil.ErrGroup)
errGroup := new(utils.ErrGroup)
var isExtraFilterLabelsUsed bool
var groups []Group
for file, data := range files {
for _, file := range fp {
uniqueGroups := map[string]struct{}{}
gr, err := parseConfig(data)
gr, err := parseFile(file)
if err != nil {
errGroup.Add(fmt.Errorf("failed to parse file %q: %w", file, err))
continue
}
for _, g := range gr {
if err := g.Validate(validateTplFn, validateExpressions); err != nil {
if err := g.Validate(validateAnnotations, validateExpressions); err != nil {
errGroup.Add(fmt.Errorf("invalid group %q in file %q: %w", g.Name, file, err))
continue
}
@@ -285,44 +220,43 @@ func parse(files map[string][]byte, validateTplFn ValidateTplFn, validateExpress
}
uniqueGroups[g.Name] = struct{}{}
g.File = file
if len(g.ExtraFilterLabels) > 0 {
isExtraFilterLabelsUsed = true
}
groups = append(groups, g)
}
}
if err := errGroup.Err(); err != nil {
return nil, err
}
if len(groups) < 1 {
logger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
}
if isExtraFilterLabelsUsed {
logger.Warnf("field `extra_filter_labels` is deprecated - use `params` instead")
}
return groups, nil
}
func parseConfig(data []byte) ([]Group, error) {
data = envtemplate.ReplaceBytes(data)
var result []Group
type cfgFile struct {
func parseFile(path string) ([]Group, error) {
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("error reading alert rule file: %w", err)
}
data = envtemplate.Replace(data)
g := struct {
Groups []Group `yaml:"groups"`
// Catches all undefined fields and must be empty after parsing.
XXX map[string]any `yaml:",inline"`
XXX map[string]interface{} `yaml:",inline"`
}{}
err = yaml.Unmarshal(data, &g)
if err != nil {
return nil, err
}
decoder := yaml.NewDecoder(bytes.NewReader(data))
for {
var cf cfgFile
if err := decoder.Decode(&cf); err != nil {
if err == io.EOF { // EOF indicates no more documents to read
break
}
return nil, err
}
if err := checkOverflow(cf.XXX, "config"); err != nil {
return nil, err
}
result = append(result, cf.Groups...)
}
return result, nil
return g.Groups, checkOverflow(g.XXX, "config")
}
func checkOverflow(m map[string]any, ctx string) error {
func checkOverflow(m map[string]interface{}, ctx string) error {
if len(m) > 0 {
var keys []string
for k := range m {

View File

@@ -1,504 +1,398 @@
package config
import (
"net/http"
"net/http/httptest"
"net/url"
"os"
"strings"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
func TestMain(m *testing.M) {
if err := templates.Load([]string{"testdata/templates/*good.tmpl"}, url.URL{}); err != nil {
if err := templates.Load([]string{"testdata/templates/*good.tmpl"}, true); err != nil {
os.Exit(1)
}
os.Exit(m.Run())
}
func TestParseFromURL(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/bad", func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte("foo bar"))
})
mux.HandleFunc("/good-alert", func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`
groups:
- name: TestGroup
rules:
- alert: Conns
expr: vm_tcplistener_conns > 0`))
})
mux.HandleFunc("/good-rr", func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`
groups:
- name: TestGroup
rules:
- record: conns
expr: max(vm_tcplistener_conns)`))
})
mux.HandleFunc("/good-multi-doc", func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`
groups:
- name: foo
rules:
- record: conns
expr: max(vm_tcplistener_conns)
---
groups:
- name: bar
rules:
- record: conns
expr: max(vm_tcplistener_conns)`))
})
mux.HandleFunc("/bad-multi-doc", func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`
bad_field:
- name: foo
rules:
- record: conns
expr: max(vm_tcplistener_conns)
---
groups:
- name: bar
rules:
- record: conns
expr: max(vm_tcplistener_conns)`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
f := func(urls []string, expErr bool) {
for i, u := range urls {
urls[i] = srv.URL + u
}
_, err := Parse(urls, notifier.ValidateTemplates, true)
if err != nil && !expErr {
t.Fatalf("error parsing URLs %s", err)
}
if err == nil && expErr {
t.Fatalf("expecting error parsing URLs but got none")
}
}
f([]string{"/good-alert", "/good-rr", "/good-multi-doc"}, false)
f([]string{"/bad"}, true)
f([]string{"/bad-multi-doc"}, true)
f([]string{"/good-alert", "/bad"}, true)
}
func TestParse_Success(t *testing.T) {
_, err := Parse([]string{"testdata/rules/*good.rules", "testdata/dir/*good.*"}, notifier.ValidateTemplates, true)
if err != nil {
t.Fatalf("error parsing files %s", err)
func TestParseGood(t *testing.T) {
if _, err := Parse([]string{"testdata/rules/*good.rules", "testdata/dir/*good.*"}, true, true); err != nil {
t.Errorf("error parsing files %s", err)
}
}
func TestParse_Failure(t *testing.T) {
f := func(paths []string, errStrExpected string) {
t.Helper()
_, err := Parse(paths, notifier.ValidateTemplates, true)
func TestParseBad(t *testing.T) {
testCases := []struct {
path []string
expErr string
}{
{
[]string{"testdata/rules/rules0-bad.rules"},
"unexpected token",
},
{
[]string{"testdata/dir/rules0-bad.rules"},
"error parsing annotation",
},
{
[]string{"testdata/dir/rules1-bad.rules"},
"duplicate in file",
},
{
[]string{"testdata/dir/rules2-bad.rules"},
"function \"unknown\" not defined",
},
{
[]string{"testdata/dir/rules3-bad.rules"},
"either `record` or `alert` must be set",
},
{
[]string{"testdata/dir/rules4-bad.rules"},
"either `record` or `alert` must be set",
},
{
[]string{"testdata/rules/rules1-bad.rules"},
"bad graphite expr",
},
}
for _, tc := range testCases {
_, err := Parse(tc.path, true, true)
if err == nil {
t.Fatalf("expected to get error")
t.Errorf("expected to get error")
return
}
if !strings.Contains(err.Error(), errStrExpected) {
t.Fatalf("expected err to contain %q; got %q instead", errStrExpected, err)
if !strings.Contains(err.Error(), tc.expErr) {
t.Errorf("expected err to contain %q; got %q instead", tc.expErr, err)
}
}
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/rules/rules1-bad.rules"}, "bad GraphiteQL expr")
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
f([]string{"testdata/rules/rules-multi-doc-duplicates-bad.rules"}, "duplicate")
f([]string{"http://unreachable-url"}, "failed to")
}
func TestRuleValidate(t *testing.T) {
func TestRule_Validate(t *testing.T) {
if err := (&Rule{}).Validate(); err == nil {
t.Fatalf("expected empty name error")
t.Errorf("expected empty name error")
}
if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
t.Fatalf("expected empty expr error")
}
if err := (&Rule{Record: "record", Expr: "sum(test)", Labels: map[string]string{"__name__": "test"}}).Validate(); err == nil {
t.Fatalf("invalid rule label; got %s", err)
t.Errorf("expected empty expr error")
}
if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
t.Fatalf("expected valid rule; got %s", err)
t.Errorf("expected valid rule; got %s", err)
}
}
func TestGroupValidate_Failure(t *testing.T) {
f := func(group *Group, validateExpressions bool, errStrExpected string) {
t.Helper()
err := group.Validate(nil, validateExpressions)
if err == nil {
t.Fatalf("expecting non-nil error")
}
errStr := err.Error()
if !strings.Contains(errStr, errStrExpected) {
t.Fatalf("missing %q in the returned error %q", errStrExpected, errStr)
}
}
f(&Group{}, false, "group name must be set")
f(&Group{
Name: "both record and alert are not set",
Rules: []Rule{
{
Expr: "sum(up == 0 ) by (host)",
For: promutil.NewDuration(10 * time.Millisecond),
},
{
Expr: "sumSeries(time('foo.bar',10))",
},
func TestGroup_Validate(t *testing.T) {
testCases := []struct {
group *Group
rules []Rule
validateAnnotations bool
validateExpressions bool
expErr string
}{
{
group: &Group{},
expErr: "group name must be set",
},
}, false, "invalid rule")
f(&Group{
Name: "negative interval",
Interval: promutil.NewDuration(-1),
}, false, "interval shouldn't be lower than 0")
f(&Group{
Name: "too big eval_offset",
Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
f(&Group{
Name: "too big negative eval_offset",
Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(-2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
limit := -1
f(&Group{
Name: "wrong limit",
Limit: &limit,
}, false, "invalid limit")
f(&Group{
Name: "wrong concurrency",
Concurrency: -1,
}, false, "invalid concurrency")
f(&Group{
Name: "test",
Rules: []Rule{
{
Alert: "alert",
Expr: "up == 1",
},
{
Alert: "alert",
Expr: "up == 1",
},
},
}, false, "duplicate")
f(&Group{
Name: "test",
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
},
}, false, "duplicate")
f(&Group{
Name: "test",
Rules: []Rule{
{Record: "record", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Record: "record", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
},
}, false, "duplicate")
f(&Group{
Name: "test",
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
}, false, "duplicate")
f(&Group{
Name: "test",
Rules: []Rule{
{Record: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
},
}, false, "duplicate")
f(&Group{
Name: "test thanos",
Type: NewRawType("thanos"),
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
}, true, "unknown datasource type")
// validate expressions
f(&Group{
Name: "test prometheus expr",
Type: NewPrometheusType(),
Rules: []Rule{
{
Record: "record",
Expr: "up | 0",
},
},
}, true, "bad MetricsQL expr")
f(&Group{
Name: "test graphite expr",
Type: NewGraphiteType(),
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "some-description",
}},
},
}, true, "bad GraphiteQL expr")
f(&Group{
Name: "test vlogs expr",
Type: NewVLogsType(),
Rules: []Rule{
{Alert: "alert", Expr: "stats count(*) as requests"},
},
}, true, "bad LogsQL expr")
f(&Group{
Name: "test vlogs expr",
Type: NewVLogsType(),
Rules: []Rule{
{Alert: "alert", Expr: "_time: 1m | stats by (path, _time: 1m) count(*) as requests"},
},
}, true, "bad LogsQL expr")
f(&Group{
Name: "test graphite with prometheus expr",
Type: NewGraphiteType(),
Rules: []Rule{
{
Record: "r1",
ID: 1,
Expr: "sumSeries(time('foo.bar',10))",
For: promutil.NewDuration(10 * time.Millisecond),
},
{
Record: "r2",
ID: 2,
Expr: "sum(up == 0 ) by (host)",
},
},
}, true, "bad GraphiteQL expr")
f(&Group{
Name: "test vlogs with prometheus exp",
Type: NewVLogsType(),
Rules: []Rule{
{
Record: "r1",
Expr: "sum(up == 0 ) by (host)",
For: promutil.NewDuration(10 * time.Millisecond),
},
},
}, true, "bad LogsQL expr")
f(&Group{
Name: "test prometheus with vlogs exp",
Type: NewPrometheusType(),
Rules: []Rule{
{
Record: "r1",
Expr: "* | stats by (path) count()",
For: promutil.NewDuration(10 * time.Millisecond),
},
},
}, true, "bad MetricsQL expr")
}
func TestGroupValidate_Success(t *testing.T) {
f := func(group *Group, validateAnnotations, validateExpressions bool) {
t.Helper()
var validateTplFn ValidateTplFn
if validateAnnotations {
validateTplFn = notifier.ValidateTemplates
}
err := group.Validate(validateTplFn, validateExpressions)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
}
f(&Group{
Name: "test",
Rules: []Rule{
{
Record: "record",
Expr: "up | 0",
},
},
}, false, false)
f(&Group{
Name: "test",
Rules: []Rule{
{
Alert: "alert",
Expr: "up == 1",
Labels: map[string]string{
"summary": "{{ value|query }}",
{
group: &Group{Name: "test",
Rules: []Rule{
{
Record: "record",
Expr: "up | 0",
},
},
},
expErr: "",
},
}, false, false)
// validate annotations
f(&Group{
Name: "test",
Rules: []Rule{
{
Alert: "alert",
Expr: "up == 1",
Labels: map[string]string{
"summary": `
{
group: &Group{Name: "test",
Rules: []Rule{
{
Record: "record",
Expr: "up | 0",
},
},
},
expErr: "invalid expression",
validateExpressions: true,
},
{
group: &Group{Name: "test",
Rules: []Rule{
{
Alert: "alert",
Expr: "up == 1",
Labels: map[string]string{
"summary": "{{ value|query }}",
},
},
},
},
expErr: "",
},
{
group: &Group{Name: "test",
Rules: []Rule{
{
Alert: "alert",
Expr: "up == 1",
Labels: map[string]string{
"summary": `
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" "localhost" | query }}
{{ . | first | value | humanize1024 }}B
{{ end }}`,
},
},
},
},
validateAnnotations: true,
},
{
group: &Group{Name: "test",
Rules: []Rule{
{
Alert: "alert",
Expr: "up == 1",
},
{
Alert: "alert",
Expr: "up == 1",
},
},
},
expErr: "duplicate",
},
{
group: &Group{Name: "test",
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
},
},
expErr: "duplicate",
},
{
group: &Group{Name: "test",
Rules: []Rule{
{Record: "record", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Record: "record", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
},
},
expErr: "duplicate",
},
{
group: &Group{Name: "test",
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
},
expErr: "",
},
{
group: &Group{Name: "test",
Rules: []Rule{
{Record: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"summary": "{{ value|query }}",
}},
},
},
expErr: "",
},
{
group: &Group{Name: "test thanos",
Type: datasource.NewRawType("thanos"),
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
},
validateExpressions: true,
expErr: "unknown datasource type",
},
{
group: &Group{Name: "test graphite",
Type: datasource.NewGraphiteType(),
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "some-description",
}},
},
},
validateExpressions: true,
expErr: "",
},
{
group: &Group{Name: "test prometheus",
Type: datasource.NewPrometheusType(),
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
},
validateExpressions: true,
expErr: "",
},
{
group: &Group{
Name: "test graphite inherit",
Type: datasource.NewGraphiteType(),
Rules: []Rule{
{
Expr: "sumSeries(time('foo.bar',10))",
For: promutils.NewDuration(10 * time.Millisecond),
},
{
Expr: "sum(up == 0 ) by (host)",
},
},
},
},
}, true, false)
// validate expressions
f(&Group{
Name: "test prometheus",
Type: NewPrometheusType(),
Rules: []Rule{
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"description": "{{ value|query }}",
}},
{
group: &Group{
Name: "test graphite prometheus bad expr",
Type: datasource.NewGraphiteType(),
Rules: []Rule{
{
Expr: "sum(up == 0 ) by (host)",
For: promutils.NewDuration(10 * time.Millisecond),
},
{
Expr: "sumSeries(time('foo.bar',10))",
},
},
},
expErr: "invalid rule",
},
}, false, true)
f(&Group{
Name: "test victorialogs",
Type: NewVLogsType(),
Rules: []Rule{
{Alert: "alert", Expr: " _time: 1m | stats count(*) as requests", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
}, false, true)
}
func TestHashRule_NotEqual(t *testing.T) {
f := func(a, b Rule) {
t.Helper()
aID, bID := HashRule(a), HashRule(b)
if aID == bID {
t.Fatalf("rule hashes mustn't be equal; got %d", aID)
}
for _, tc := range testCases {
err := tc.group.Validate(tc.validateAnnotations, tc.validateExpressions)
if err == nil {
if tc.expErr != "" {
t.Errorf("expected to get err %q; got nil insted", tc.expErr)
}
continue
}
if !strings.Contains(err.Error(), tc.expErr) {
t.Errorf("expected err to contain %q; got %q instead", tc.expErr, err)
}
}
f(Rule{Alert: "record", Expr: "up == 1"}, Rule{Record: "record", Expr: "up == 1"})
f(Rule{Record: "record", Expr: "up == 1"}, Rule{Record: "record", Expr: "up == 2"})
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"baz": "foo",
"foo": "baz",
}})
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"baz": "foo",
}})
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}}, Rule{Alert: "alert", Expr: "up == 1"})
}
func TestHashRule_Equal(t *testing.T) {
f := func(a, b Rule) {
t.Helper()
aID, bID := HashRule(a), HashRule(b)
if aID != bID {
t.Fatalf("rule hashes must be equal; got %d and %d", aID, bID)
func TestHashRule(t *testing.T) {
testCases := []struct {
a, b Rule
equal bool
}{
{
Rule{Record: "record", Expr: "up == 1"},
Rule{Record: "record", Expr: "up == 1"},
true,
},
{
Rule{Alert: "alert", Expr: "up == 1"},
Rule{Alert: "alert", Expr: "up == 1"},
true,
},
{
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}},
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}},
true,
},
{
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}},
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"baz": "foo",
"foo": "bar",
}},
true,
},
{
Rule{Alert: "record", Expr: "up == 1"},
Rule{Alert: "record", Expr: "up == 1"},
true,
},
{
Rule{Alert: "alert", Expr: "up == 1", For: promutils.NewDuration(time.Minute)},
Rule{Alert: "alert", Expr: "up == 1"},
true,
},
{
Rule{Alert: "record", Expr: "up == 1"},
Rule{Record: "record", Expr: "up == 1"},
false,
},
{
Rule{Record: "record", Expr: "up == 1"},
Rule{Record: "record", Expr: "up == 2"},
false,
},
{
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}},
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"baz": "foo",
"foo": "baz",
}},
false,
},
{
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}},
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"baz": "foo",
}},
false,
},
{
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}},
Rule{Alert: "alert", Expr: "up == 1"},
false,
},
}
for i, tc := range testCases {
aID, bID := HashRule(tc.a), HashRule(tc.b)
if tc.equal != (aID == bID) {
t.Fatalf("missmatch for rule %d", i)
}
}
f(Rule{Record: "record", Expr: "up == 1"}, Rule{Record: "record", Expr: "up == 1"})
f(Rule{Alert: "alert", Expr: "up == 1"}, Rule{Alert: "alert", Expr: "up == 1"})
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}})
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"foo": "bar",
"baz": "foo",
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
"baz": "foo",
"foo": "bar",
}})
f(Rule{Alert: "record", Expr: "up == 1"}, Rule{Alert: "record", Expr: "up == 1"})
f(Rule{
Alert: "alert", Expr: "up == 1", For: promutil.NewDuration(time.Minute), KeepFiringFor: promutil.NewDuration(time.Minute),
}, Rule{Alert: "alert", Expr: "up == 1"})
}
func TestGroupChecksum(t *testing.T) {
@@ -614,71 +508,6 @@ limit: 10
rules:
- alert: foo
expr: sum by(job) (up == 1)
`)
})
t.Run("`headers` change", func(t *testing.T) {
f(t, `
name: TestGroup
headers:
- "TenantID: foo"
rules:
- alert: foo
expr: sum by(job) (up == 1)
`, `
name: TestGroup
headers:
- "TenantID: bar"
rules:
- alert: foo
expr: sum by(job) (up == 1)
`)
})
t.Run("`notifier_headers` change", func(t *testing.T) {
f(t, `
name: TestGroup
notifier_headers:
- "TenantID: foo"
rules:
- alert: foo
expr: sum by(job) (up == 1)
`, `
name: TestGroup
notifier_headers:
- "TenantID: bar"
rules:
- alert: foo
expr: sum by(job) (up == 1)
`)
})
t.Run("`debug` change", func(t *testing.T) {
f(t, `
name: TestGroup
rules:
- alert: foo
expr: sum by(job) (up == 1)
`, `
name: TestGroup
rules:
- alert: foo
expr: sum by(job) (up == 1)
debug: true
`)
})
t.Run("`update_entries_limit` change", func(t *testing.T) {
f(t, `
name: TestGroup
rules:
- alert: foo
expr: sum by(job) (up == 1)
`, `
name: TestGroup
rules:
- alert: foo
expr: sum by(job) (up == 1)
update_entries_limit: 33
`)
})
}
@@ -716,4 +545,30 @@ rules:
expr: sum by(job) (up == 1)
`, url.Values{"nocache": {"1"}, "denyPartialResponse": {"true"}})
})
t.Run("extra labels", func(t *testing.T) {
f(t, `
name: TestGroup
extra_filter_labels:
job: victoriametrics
env: prod
rules:
- alert: ExampleAlertAlwaysFiring
expr: sum by(job) (up == 1)
`, url.Values{"extra_label": {"env=prod", "job=victoriametrics"}})
})
t.Run("extra labels and params", func(t *testing.T) {
f(t, `
name: TestGroup
extra_filter_labels:
job: victoriametrics
params:
nocache: ["1"]
extra_label: ["env=prod"]
rules:
- alert: ExampleAlertAlwaysFiring
expr: sum by(job) (up == 1)
`, url.Values{"nocache": {"1"}, "extra_label": {"env=prod", "job=victoriametrics"}})
})
}

View File

@@ -1,111 +0,0 @@
package config
import (
"fmt"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/fslocal"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/fsurl"
)
// FS represent a file system abstract for reading files.
type FS interface {
// Init initializes FS.
Init() error
// String must return human-readable representation of FS.
String() string
// List returns the list of file names which will be read via Read fn
List() ([]string, error)
// Read returns a list of read files in form of a map
// where key is a file name and value is a content of read file.
// Read must be called only after the successful Init call.
Read(files []string) (map[string][]byte, error)
}
var (
fsRegistryMu sync.Mutex
fsRegistry = make(map[string]FS)
)
// ReadFromFS parses the given path list and inits FS for each item.
// Once initialed, ReadFromFS will try to read and return files from each FS.
// ReadFromFS returns an error if at least one FS failed to init.
// The function can be called multiple times but each unique path
// will be initialed only once.
//
// It is allowed to mix different FS types in path list.
func ReadFromFS(paths []string) (map[string][]byte, error) {
var err error
result := make(map[string][]byte)
for _, path := range paths {
fsRegistryMu.Lock()
fs, ok := fsRegistry[path]
if !ok {
fs, err = newFS(path)
if err != nil {
fsRegistryMu.Unlock()
return nil, fmt.Errorf("error while parsing path %q: %w", path, err)
}
if err := fs.Init(); err != nil {
fsRegistryMu.Unlock()
return nil, fmt.Errorf("error while initializing path %q: %w", path, err)
}
fsRegistry[path] = fs
}
fsRegistryMu.Unlock()
list, err := fs.List()
if err != nil {
return nil, fmt.Errorf("failed to list files from %q", fs)
}
cLogger.Infof("found %d files to read from %q", len(list), fs)
if len(list) < 1 {
continue
}
ts := time.Now()
files, err := fs.Read(list)
if err != nil {
return nil, fmt.Errorf("error while reading files from %q: %w", fs, err)
}
cLogger.Infof("finished reading %d files in %v from %q", len(list), time.Since(ts), fs)
for k, v := range files {
if _, ok := result[k]; ok {
return nil, fmt.Errorf("duplicate found for file name %q: file names must be unique", k)
}
result[k] = v
}
}
return result, nil
}
// newFS creates FS based on the give path.
// Supported file systems are: fs
func newFS(originPath string) (FS, error) {
scheme := "fs"
path := originPath
n := strings.Index(path, "://")
if n >= 0 {
scheme = path[:n]
path = path[n+len("://"):]
}
if len(path) == 0 {
return nil, fmt.Errorf("path cannot be empty")
}
switch scheme {
case "fs":
return &fslocal.FS{Pattern: path}, nil
case "http", "https":
return &fsurl.FS{Path: originPath}, nil
default:
return nil, fmt.Errorf("unsupported scheme %q", scheme)
}
}

View File

@@ -1,39 +0,0 @@
package config
import (
"strings"
"testing"
)
func TestNewFS(t *testing.T) {
f := func(path, expStr string) {
t.Helper()
fs, err := newFS(path)
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
if fs.String() != expStr {
t.Fatalf("expected FS %q; got %q", expStr, fs.String())
}
}
f("/foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
f("fs:///foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
}
func TestNewFSNegative(t *testing.T) {
f := func(path, expErr string) {
t.Helper()
_, err := newFS(path)
if err == nil {
t.Fatalf("expected to have err: %s", expErr)
}
if !strings.Contains(err.Error(), expErr) {
t.Fatalf("expected to have err %q; got %q instead", expErr, err)
}
}
f("", "path cannot be empty")
f("fs://", "path cannot be empty")
f("foobar://baz", `unsupported scheme "foobar"`)
}

View File

@@ -1,50 +0,0 @@
package fslocal
import (
"fmt"
"os"
"github.com/bmatcuk/doublestar/v4"
)
// FS represents a local file system
type FS struct {
// Pattern is used for matching one or multiple files.
// The pattern may describe hierarchical names such as
// /usr/*/bin/ed (assuming the Separator is '/').
Pattern string
}
// Init verifies that configured Pattern is correct
func (fs *FS) Init() error {
_, err := doublestar.FilepathGlob(fs.Pattern)
return err
}
// String implements Stringer interface
func (fs *FS) String() string {
return fmt.Sprintf("Local FS{MatchPattern: %q}", fs.Pattern)
}
// List returns the list of file names which will be read via Read fn
func (fs *FS) List() ([]string, error) {
matches, err := doublestar.FilepathGlob(fs.Pattern)
if err != nil {
return nil, fmt.Errorf("error while matching files via pattern %s: %w", fs.Pattern, err)
}
return matches, nil
}
// Read returns a map of read files where
// key is the file name and value is file's content.
func (fs *FS) Read(files []string) (map[string][]byte, error) {
result := make(map[string][]byte)
for _, path := range files {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("error while reading file %q: %w", path, err)
}
result[path] = data
}
return result, nil
}

View File

@@ -1,57 +0,0 @@
package fsurl
import (
"fmt"
"io"
"net/http"
"net/url"
)
// FS represents a struct which can read content from URL Path
type FS struct {
// Path defines the URL to read the data from
Path string
}
// Init verifies that configured Path is correct
func (fs *FS) Init() error {
_, err := url.Parse(fs.Path)
return err
}
// String implements Stringer interface
func (fs *FS) String() string {
return fmt.Sprintf("URL {Path: %q}", fs.Path)
}
// List returns the list of file names which will be read via Read fn
// List isn't supported by FS and reads from Path only
func (fs *FS) List() ([]string, error) {
return []string{fs.Path}, nil
}
// Read returns a map of read files where
// key is the file name and value is file's content.
func (fs *FS) Read(files []string) (map[string][]byte, error) {
result := make(map[string][]byte)
for _, path := range files {
resp, err := http.Get(path)
if err != nil {
return nil, fmt.Errorf("failed to read from %q: %w", path, err)
}
data, err := io.ReadAll(resp.Body)
_ = resp.Body.Close()
if resp.StatusCode != http.StatusOK {
if len(data) > 4*1024 {
data = data[:4*1024]
}
return nil, fmt.Errorf("unexpected status code when fetching %q: %d, expecting %d; response: %q",
path, resp.StatusCode, http.StatusOK, data)
}
if err != nil {
return nil, fmt.Errorf("cannot read %q: %w", path, err)
}
result[path] = data
}
return result, nil
}

View File

@@ -1,59 +0,0 @@
package log
import (
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// Logger is using lib/logger for logging
// but can be suppressed via Suppress method
type Logger struct {
mu sync.RWMutex
disabled bool
}
// Suppress whether to ignore message logging.
// Once suppressed, logging continues to be ignored
// until logger is un-suppressed.
func (l *Logger) Suppress(v bool) {
l.mu.Lock()
l.disabled = v
l.mu.Unlock()
}
func (l *Logger) isDisabled() bool {
l.mu.RLock()
defer l.mu.RUnlock()
return l.disabled
}
// Errorf logs error message.
func (l *Logger) Errorf(format string, args ...any) {
if l.isDisabled() {
return
}
logger.Errorf(format, args...)
}
// Warnf logs warning message.
func (l *Logger) Warnf(format string, args ...any) {
if l.isDisabled() {
return
}
logger.Warnf(format, args...)
}
// Infof logs info message.
func (l *Logger) Infof(format string, args ...any) {
if l.isDisabled() {
return
}
logger.Infof(format, args...)
}
// Panicf logs panic message and panics.
// Panicf can't be suppressed
func (l *Logger) Panicf(format string, args ...any) {
logger.Panicf(format, args...)
}

View File

@@ -1,54 +0,0 @@
package log
import (
"bytes"
"fmt"
"strings"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
func TestOutput(t *testing.T) {
testOutput := &bytes.Buffer{}
logger.SetOutputForTests(testOutput)
defer logger.ResetOutputForTest()
log := &Logger{}
mustMatch := func(exp string) {
t.Helper()
if exp == "" {
if testOutput.String() != "" {
t.Fatalf("expected output to be empty; got %q", testOutput.String())
}
}
if !strings.Contains(testOutput.String(), exp) {
t.Fatalf("output %q should contain %q", testOutput.String(), exp)
}
fmt.Println(testOutput.String())
testOutput.Reset()
}
log.Warnf("foo")
mustMatch("foo")
log.Infof("info %d", 2)
mustMatch("info 2")
log.Errorf("error %s %d", "baz", 5)
mustMatch("error baz 5")
log.Suppress(true)
log.Warnf("foo")
mustMatch("")
log.Infof("info %d", 2)
mustMatch("")
log.Errorf("error %q %d", "baz", 5)
mustMatch("")
}

View File

@@ -7,9 +7,9 @@ groups:
labels:
label: bar
annotations:
summary: "{{ }}"
summary: "{{ $value }"
description: "{{$labels}}"
- alert: UnknownAnnotationsFunction
- alert: UnkownAnnotationsFunction
for: 5m
expr: vm_rows > 0
labels:

View File

@@ -1,7 +1,7 @@
groups:
- name: group
rules:
- alert: UnknownLabelFunction
- alert: UnkownLabelFunction
for: 5m
expr: vm_rows > 0
labels:

View File

@@ -1,7 +0,0 @@
groups:
- name: group
headers:
- 'foobar'
rules:
- alert: rows
expr: vm_rows > 0

View File

@@ -1158,9 +1158,9 @@
$labels.pod }}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
expr: |
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
/
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)
> ( 25 / 100 )
for: 15m
labels:

View File

@@ -1,29 +0,0 @@
groups:
- name: groupTest
rules:
- alert: VMRows
for: 1ms
expr: vm_rows > 0
labels:
label: bar
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"
invalid-field-1: invalid-value-1
invalid-field-2: invalid-value-2
---
groups:
- name: TestGroup
interval: 2s
concurrency: 2
type: graphite
rules:
- alert: Conns
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
for: 3m
annotations:
summary: Too high connection number for {{$labels.instance}}
description: "It is {{ $value }} connections for {{$labels.instance}}"
invalid-field-2: invalid-value-2
invalid-field-3: invalid-value-3

View File

@@ -1,11 +0,0 @@
groups:
- name: foo
rules:
- alert: VMRows
expr: vm_rows > 0
---
groups:
- name: foo
rules:
- alert: VMRows
expr: vm_rows > 0

View File

@@ -1,15 +0,0 @@
---
groups:
- name: groupTest
rules:
- alert: VMRows
for: 1ms
expr: vm_rows > 0
labels:
label: bar
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"
---
groups:

View File

@@ -1,46 +0,0 @@
---
groups:
- name: groupTest
rules:
- alert: VMRows
for: 1ms
expr: vm_rows > 0
labels:
label: bar
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"
- name: groupTest-2
rules:
- alert: VMRows-2
for: 1ms
expr: vm_rows_2 > 0
labels:
label: bar2
host: "{{ $labels.instance }}"
annotations:
summary: "\n markdown result is : \n---\n # header\n body: \n text \n----\n"
---
groups:
- name: groupTest-3
rules:
- alert: VMRows-3
for: 1ms
expr: vm_rows_3 > 0
labels:
label: bar_3
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"
- name: groupTest-4
rules:
- alert: VMRows-4
for: 1ms
expr: vm_rows_4 > 0
labels:
label: bar4
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"
---
groups:

View File

@@ -1,30 +1,22 @@
groups:
- name: TestGroup
interval: 5s
interval: 2s
concurrency: 2
limit: 1000
headers:
- "MyHeader: foo"
notifier_headers:
- "MyHeader: foo"
params:
denyPartialResponse: ["true"]
extra_label: ["env=dev"]
rules:
- alert: Conns
expr: vm_tcplistener_conns > 0
expr: sum(vm_tcplistener_conns) by(instance) > 1
for: 3m
debug: true
update_entries_limit: 40
annotations:
labels: "Available labels: {{ $labels }}"
summary: Too high connection number for {{ $labels.instance }}
summary: Too high connection number for {{$labels.instance}}
{{ with printf "sum(vm_tcplistener_conns{instance=%q})" .Labels.instance | query }}
{{ . | first | value }}
{{ end }}
description: "It is {{ $value }} connections for {{$labels.instance}}"
link: http://localhost:3000/d/wNf0q_kZk?viewPanel=51&from={{($activeAt.Add (parseDurationTime "1h")).UnixMilli}}&to={{($activeAt.Add (parseDurationTime "-1h")).UnixMilli}}
- alert: ExampleAlertAlwaysFiring
update_entries_limit: -1
expr: sum by(job)
(up == 1)
labels:

View File

@@ -7,7 +7,6 @@ groups:
- alert: Conns
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
for: 3m
annotations:
summary: Too high connection number for {{$labels.instance}}
description: "It is {{ $value }} connections for {{$labels.instance}}"
@@ -15,7 +14,6 @@ groups:
interval: 2s
concurrency: 2
type: prometheus
eval_delay: 30s
rules:
- alert: Conns
expr: sum(vm_tcplistener_conns) by (instance) > 1

View File

@@ -1,13 +0,0 @@
groups:
- name: groupTest
## default interval is 1min, eval_offset shouldn't be greater than interval
eval_offset: 2m
rules:
- alert: VMRows
for: 2s
expr: sum(rate(vm_http_request_errors_total[2s])) > 0
labels:
label: bar
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"

View File

@@ -1,10 +0,0 @@
groups:
- name: InvalidStatsLogsql
type: vlogs
interval: 5m
rules:
- record: MissingFilter
expr: 'stats count(*) as requests'
- record: MissingStatsPipe
expr: 'service: "nginx"'

View File

@@ -1,29 +0,0 @@
groups:
- name: RequestCount
type: vlogs
interval: 5m
rules:
- record: nginxRequestCount
expr: 'env: "test" AND service: "nginx" | stats count(*) as requests'
annotations:
description: "Service nginx on env test accepted {{$labels.requests}} requests in the last 5 minutes"
- record: prodRequestCount
expr: 'env: "prod" | stats by (service) count(*) as requests'
annotations:
description: "Service {{$labels.service}} on env prod accepted {{$labels.requests}} requests in the last 5 minutes"
- name: ServiceLog
type: vlogs
interval: 5m
rules:
- alert: HasErrorLog
expr: 'env: "prod" AND status:~"error|warn" | stats by (service) count(*) as errorLog | filter errorLog:>0'
annotations:
description: "Service {{$labels.service}} generated {{$labels.errorLog}} error logs in the last 5 minutes"
- name: ServiceRequest
type: vlogs
interval: 10m
rules:
- alert: TooManyFailedRequest
expr: '* | extract "ip=<ip> " | extract "status_code=<code>;" | stats by (ip) count() if (code:!~200) as failed, count() as total| math failed / total as failed_percentage| filter failed_percentage :> 0.01 | fields ip,failed_percentage'
annotations:
description: "Connection from address {{$labels.ip}} has {{$value}} failed requests ratio in last 10 minutes"

View File

@@ -1,138 +0,0 @@
package config
import (
"fmt"
"slices"
"strings"
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
"github.com/VictoriaMetrics/metricsql"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/graphiteql"
)
// Type represents data source type
type Type struct {
Name string
}
// NewPrometheusType returns prometheus datasource type
func NewPrometheusType() Type {
return Type{
Name: "prometheus",
}
}
// NewGraphiteType returns graphite datasource type
func NewGraphiteType() Type {
return Type{
Name: "graphite",
}
}
// NewVLogsType returns victorialogs datasource type
func NewVLogsType() Type {
return Type{
Name: "vlogs",
}
}
// NewRawType returns datasource type from raw string
// without validation.
func NewRawType(d string) Type {
return Type{Name: d}
}
// Get returns datasource type
func (t *Type) Get() string {
return t.Name
}
// Set changes datasource type
func (t *Type) Set(d Type) {
t.Name = d.Name
}
// String implements String interface with default value.
func (t Type) String() string {
if t.Name == "" {
return "prometheus"
}
return t.Name
}
// ValidateExpr validates query expression with datasource ql.
func (t *Type) ValidateExpr(expr string) error {
switch t.String() {
case "graphite":
if _, err := graphiteql.Parse(expr); err != nil {
return fmt.Errorf("bad GraphiteQL expr: %q, err: %w", expr, err)
}
case "prometheus":
if _, err := metricsql.Parse(expr); err != nil {
return fmt.Errorf("bad MetricsQL expr: %q, err: %w", expr, err)
}
case "vlogs":
q, err := logstorage.ParseStatsQuery(expr, 0)
if err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
}
labels, err := q.GetStatsLabels()
if err != nil {
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
}
if slices.Contains(labels, "_time") {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
default:
return fmt.Errorf("unknown datasource type=%q", t.Name)
}
return nil
}
// SupportedType is true if given datasource type is supported
func SupportedType(dsType string) bool {
return dsType == "graphite" || dsType == "prometheus" || dsType == "vlogs"
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (t *Type) UnmarshalYAML(unmarshal func(any) error) error {
var s string
if err := unmarshal(&s); err != nil {
return err
}
if !SupportedType(s) {
return fmt.Errorf("unknown datasource type=%q, want prometheus, graphite or vlogs", s)
}
t.Name = s
return nil
}
// MarshalYAML implements the yaml.Unmarshaler interface.
func (t Type) MarshalYAML() (any, error) {
return t.Name, nil
}
// Header is a Key - Value struct for holding an HTTP header.
type Header struct {
Key string
Value string
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (h *Header) UnmarshalYAML(unmarshal func(any) error) error {
var s string
if err := unmarshal(&s); err != nil {
return err
}
if s == "" {
return nil
}
n := strings.IndexByte(s, ':')
if n < 0 {
return fmt.Errorf(`missing ':' in header %q; expecting "key: value" format`, s)
}
h.Key = strings.TrimSpace(s[:n])
h.Value = strings.TrimSpace(s[n+1:])
return nil
}

View File

@@ -1,340 +0,0 @@
package datasource
import (
"context"
"errors"
"fmt"
"io"
"maps"
"net/http"
"net/url"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
type datasourceType string
const (
datasourcePrometheus datasourceType = "prometheus"
datasourceGraphite datasourceType = "graphite"
datasourceVLogs datasourceType = "vlogs"
)
func toDatasourceType(s string) datasourceType {
switch s {
case string(datasourcePrometheus):
return datasourcePrometheus
case string(datasourceGraphite):
return datasourceGraphite
case string(datasourceVLogs):
return datasourceVLogs
default:
logger.Panicf("BUG: unknown datasource type %q", s)
}
return ""
}
// Client is a datasource entity for reading data,
// supported clients are enumerated in datasourceType.
// WARN: when adding a new field, remember to check if Clone() method needs to be updated.
type Client struct {
c *http.Client
authCfg *promauth.Config
datasourceURL string
appendTypePrefix bool
queryStep time.Duration
dataSourceType datasourceType
// ApplyIntervalAsTimeFilter is only valid for vlogs datasource.
// Set to true if there is no [timeFilter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) in the rule expression,
// and we will add evaluation interval as an additional timeFilter when querying.
applyIntervalAsTimeFilter bool
// evaluationInterval will help setting request's `step` param,
// or adding time filter for LogsQL expression.
evaluationInterval time.Duration
// extraParams contains params to be attached to each HTTP request
extraParams url.Values
// extraHeaders are headers to be attached to each HTTP request
extraHeaders []keyValue
// whether to print additional log messages
// for each sent request
debug bool
}
type keyValue struct {
key string
value string
}
// Clone clones shared http client and other configuration to the new client.
func (c *Client) Clone() *Client {
ns := &Client{
c: c.c,
authCfg: c.authCfg,
datasourceURL: c.datasourceURL,
appendTypePrefix: c.appendTypePrefix,
queryStep: c.queryStep,
dataSourceType: c.dataSourceType,
evaluationInterval: c.evaluationInterval,
// init map so it can be populated below
extraParams: url.Values{},
debug: c.debug,
}
if len(c.extraHeaders) > 0 {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders)
}
maps.Copy(ns.extraParams, c.extraParams)
return ns
}
// ApplyParams - changes given querier params.
func (c *Client) ApplyParams(params QuerierParams) *Client {
if params.DataSourceType != "" {
c.dataSourceType = toDatasourceType(params.DataSourceType)
}
c.evaluationInterval = params.EvaluationInterval
c.applyIntervalAsTimeFilter = params.ApplyIntervalAsTimeFilter
if params.QueryParams != nil {
if c.extraParams == nil {
c.extraParams = url.Values{}
}
for k, vl := range params.QueryParams {
// custom query params are prior to default ones
if c.extraParams.Has(k) {
c.extraParams.Del(k)
}
for _, v := range vl {
// don't use .Set() instead of Del/Add since it is allowed
// for GET params to be duplicated
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
c.extraParams.Add(k, v)
}
}
}
if params.Headers != nil {
for key, value := range params.Headers {
kv := keyValue{key: key, value: value}
c.extraHeaders = append(c.extraHeaders, kv)
}
}
c.debug = params.Debug
return c
}
// BuildWithParams - implements interface.
func (c *Client) BuildWithParams(params QuerierParams) Querier {
return c.Clone().ApplyParams(params)
}
// NewPrometheusClient returns a new prometheus datasource client.
func NewPrometheusClient(baseURL string, authCfg *promauth.Config, appendTypePrefix bool, c *http.Client) *Client {
return &Client{
c: c,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(baseURL, "/"),
appendTypePrefix: appendTypePrefix,
queryStep: *queryStep,
dataSourceType: datasourcePrometheus,
extraParams: url.Values{},
}
}
// Query executes the given query and returns parsed response
func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
req, err := c.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, err
}
resp, err := c.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return Result{}, nil, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = c.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.do(req)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusInstantResponse
case datasourceGraphite:
parseFn = parseGraphiteResponse
case datasourceVLogs:
parseFn = parseVLogsInstantResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
}
result, err := parseFn(resp)
if err != nil {
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return result, req, nil
}
// QueryRange executes the given query on the given time range.
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
// Graphite type isn't supported.
func (c *Client) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
if c.dataSourceType == datasourceGraphite {
return res, fmt.Errorf("%q is not supported for QueryRange", c.dataSourceType)
}
// TODO: disable range query LogsQL with time filter now
if c.dataSourceType == datasourceVLogs && !c.applyIntervalAsTimeFilter {
return res, fmt.Errorf("range query is not supported for LogsQL expression %q because it contains time filter. Remove time filter from the expression and try again", query)
}
if start.IsZero() {
return res, fmt.Errorf("start param is missing")
}
if end.IsZero() {
return res, fmt.Errorf("end param is missing")
}
req, err := c.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, err
}
resp, err := c.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return res, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = c.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.do(req)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusRangeResponse
case datasourceVLogs:
parseFn = parseVLogsRangeResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
}
res, err = parseFn(resp)
if err != nil {
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return res, err
}
func (c *Client) do(req *http.Request) (*http.Response, error) {
ru := req.URL.Redacted()
if *showDatasourceURL {
ru = req.URL.String()
}
if c.debug {
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
}
resp, err := c.c.Do(req)
if err != nil {
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
}
return resp, nil
}
func (c *Client) newQueryRangeRequest(ctx context.Context, query string, start, end time.Time) (*http.Request, error) {
req, err := c.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", c.datasourceURL, err)
}
switch c.dataSourceType {
case datasourcePrometheus:
c.setPrometheusRangeReqParams(req, query, start, end)
case datasourceVLogs:
c.setVLogsRangeReqParams(req, query, start, end)
default:
logger.Panicf("BUG: unsupported datasource type %q to create range query request", c.dataSourceType)
}
return req, nil
}
func (c *Client) newQueryRequest(ctx context.Context, query string, ts time.Time) (*http.Request, error) {
req, err := c.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", c.datasourceURL, err)
}
switch c.dataSourceType {
case datasourcePrometheus:
c.setPrometheusInstantReqParams(req, query, ts)
case datasourceGraphite:
c.setGraphiteReqParams(req, query)
case datasourceVLogs:
c.setVLogsInstantReqParams(req, query, ts)
default:
logger.Panicf("BUG: unsupported datasource type %q to create query request", c.dataSourceType)
}
return req, nil
}
func (c *Client) newRequest(ctx context.Context) (*http.Request, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.datasourceURL, nil)
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.datasourceURL, err)
}
req.Header.Set("Content-Type", "application/json")
if c.authCfg != nil {
err = c.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
}
}
for _, h := range c.extraHeaders {
req.Header.Set(h.key, h.value)
}
return req, nil
}
// setReqParams adds query and other extra params for the request.
func (c *Client) setReqParams(r *http.Request, query string) {
q := r.URL.Query()
for k, vs := range c.extraParams {
if q.Has(k) { // extraParams are prior to params in URL
q.Del(k)
}
for _, v := range vs {
q.Add(k, v)
}
}
q.Set("query", query)
r.URL.RawQuery = q.Encode()
}

View File

@@ -1,71 +0,0 @@
package datasource
import (
"encoding/json"
"fmt"
"net/http"
)
type graphiteResponse []graphiteResponseTarget
type graphiteResponseTarget struct {
Target string `json:"target"`
Tags map[string]string `json:"tags"`
DataPoints [][2]float64 `json:"datapoints"`
}
func (r graphiteResponse) metrics() []Metric {
var ms []Metric
for _, res := range r {
if len(res.DataPoints) < 1 {
continue
}
var m Metric
// add only last value to the result.
last := res.DataPoints[len(res.DataPoints)-1]
m.Values = append(m.Values, last[0])
m.Timestamps = append(m.Timestamps, int64(last[1]))
for k, v := range res.Tags {
m.AddLabel(k, v)
}
ms = append(ms, m)
}
return ms
}
func parseGraphiteResponse(resp *http.Response) (Result, error) {
r := &graphiteResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err)
}
return Result{Data: r.metrics()}, nil
}
const (
graphitePath = "/render"
graphitePrefix = "/graphite"
)
func (c *Client) setGraphiteReqParams(r *http.Request, query string) {
if c.appendTypePrefix {
r.URL.Path += graphitePrefix
}
r.URL.Path += graphitePath
q := r.URL.Query()
from := "-5min"
q.Set("from", from)
q.Set("format", "json")
q.Set("target", query)
q.Set("until", "now")
for k, vs := range c.extraParams {
if q.Has(k) { // extraParams are prior to params in URL
q.Del(k)
}
for _, v := range vs {
q.Add(k, v)
}
}
r.URL.RawQuery = q.Encode()
}

Some files were not shown because too many files have changed in this diff Show More