mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-22 03:06:36 +03:00
Compare commits
19 Commits
query-debu
...
v1.87.2-cl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
27afb0ac37 | ||
|
|
16578b58b6 | ||
|
|
5816678a86 | ||
|
|
c9547f4fba | ||
|
|
1026e25f33 | ||
|
|
95b66f3de5 | ||
|
|
3f64dae46b | ||
|
|
808a06bfd4 | ||
|
|
8dd8972a7a | ||
|
|
13d0a2a3e7 | ||
|
|
6ac2f62577 | ||
|
|
3dfb9e732c | ||
|
|
2961a90f7d | ||
|
|
6353aead68 | ||
|
|
cc2a82a7dc | ||
|
|
68e86a5d49 | ||
|
|
732dd5357a | ||
|
|
ddd8d01cf5 | ||
|
|
49f69a7068 |
14
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
14
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -5,10 +5,10 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/victoriametrics/troubleshooting/) first.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
|
||||
- type: textarea
|
||||
id: describe-the-bug
|
||||
attributes:
|
||||
@@ -60,12 +60,12 @@ body:
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#monitoring)
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
|
||||
32
.github/ISSUE_TEMPLATE/question.yml
vendored
32
.github/ISSUE_TEMPLATE/question.yml
vendored
@@ -1,32 +0,0 @@
|
||||
name: Question
|
||||
description: Ask a question regarding VictoriaMetrics or its components
|
||||
labels: [question]
|
||||
body:
|
||||
- type: textarea
|
||||
id: describe-the-component
|
||||
attributes:
|
||||
label: Is your question request related to a specific component?
|
||||
placeholder: |
|
||||
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: describe-the-question
|
||||
attributes:
|
||||
label: Describe the question in detail
|
||||
description: |
|
||||
A clear and concise description of the issue and the question.
|
||||
validations:
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: troubleshooting
|
||||
attributes:
|
||||
label: Troubleshooting docs
|
||||
description: I am familiar with the following troubleshooting docs
|
||||
options:
|
||||
- label: General - https://docs.victoriametrics.com/victoriametrics/troubleshooting/
|
||||
required: false
|
||||
- label: vmagent - https://docs.victoriametrics.com/victoriametrics/vmagent/#troubleshooting
|
||||
required: false
|
||||
- label: vmalert - https://docs.victoriametrics.com/victoriametrics/vmalert/#troubleshooting
|
||||
required: false
|
||||
23
.github/copilot-instructions.md
vendored
23
.github/copilot-instructions.md
vendored
@@ -1,23 +0,0 @@
|
||||
# Project Overview
|
||||
|
||||
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||
|
||||
## Folder Structure
|
||||
|
||||
- `/app`: Contains the compilable binaries.
|
||||
- `/lib`: Contains the golang reusable libraries
|
||||
- `/docs/victoriametrics`: Contains documentation for the project.
|
||||
- `/apptest/tests`: Contains integration tests.
|
||||
|
||||
## Libraries and Frameworks
|
||||
|
||||
- Backend: Golang, no framework. Use third-party libraries sparingly.
|
||||
- Frontend: React.
|
||||
|
||||
## Code review guidelines
|
||||
|
||||
Ensure the feature or bugfix includes a changelog entry in /docs/victoriametrics/changelog/CHANGELOG.md.
|
||||
Verify the entry is under the ## tip section and matches the structure and style of existing entries.
|
||||
Chore-only changes may be omitted from the changelog.
|
||||
|
||||
|
||||
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -8,12 +8,10 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
open-pull-requests-limit: 0
|
||||
- package-ecosystem: "bundler"
|
||||
directory: "/docs"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
open-pull-requests-limit: 0
|
||||
interval: "daily"
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/app/vmui/packages/vmui/web"
|
||||
schedule:
|
||||
|
||||
10
.github/pull_request_template.md
vendored
10
.github/pull_request_template.md
vendored
@@ -1,10 +0,0 @@
|
||||
### Describe Your Changes
|
||||
|
||||
Please provide a brief description of the changes you made. Be as specific as possible to help others understand the purpose and impact of your modifications.
|
||||
|
||||
### Checklist
|
||||
|
||||
The following checks are **mandatory**:
|
||||
|
||||
- [ ] My change adheres to [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
|
||||
- [ ] My change adheres to [VictoriaMetrics development goals](https://docs.victoriametrics.com/victoriametrics/goals/).
|
||||
69
.github/workflows/build.yml
vendored
69
.github/workflows/build.yml
vendored
@@ -1,69 +0,0 @@
|
||||
name: build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- '**.go'
|
||||
- '**/Dockerfile'
|
||||
- '**/Makefile'
|
||||
- '!app/vmui/**'
|
||||
- '.github/workflows/build.yml'
|
||||
pull_request:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- '**.go'
|
||||
- '**/Dockerfile'
|
||||
- '**/Makefile'
|
||||
- '!app/vmui/**'
|
||||
- '.github/workflows/build.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
cancel-in-progress: true
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: ${{ matrix.os }}-${{ matrix.arch }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- os: linux
|
||||
arch: amd64
|
||||
- os: linux
|
||||
arch: arm64
|
||||
- os: linux
|
||||
arch: arm
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
- os: linux
|
||||
arch: 386
|
||||
- os: freebsd
|
||||
arch: amd64
|
||||
- os: openbsd
|
||||
arch: amd64
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
|
||||
- name: Build vmcluster for ${{ matrix.os }}-${{ matrix.arch }}
|
||||
run: make vmcluster-${{ matrix.os }}-${{ matrix.arch }}
|
||||
26
.github/workflows/check-licenses.yml
vendored
26
.github/workflows/check-licenses.yml
vendored
@@ -14,25 +14,13 @@ jobs:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.21.0
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||
|
||||
- name: Check License
|
||||
run: make check-licenses
|
||||
run: |
|
||||
make check-licenses
|
||||
|
||||
62
.github/workflows/codeql-analysis-go.yml
vendored
62
.github/workflows/codeql-analysis-go.yml
vendored
@@ -1,62 +0,0 @@
|
||||
name: 'CodeQL Go'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- '**.go'
|
||||
pull_request:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- '**.go'
|
||||
|
||||
concurrency:
|
||||
cancel-in-progress: true
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache: false
|
||||
go-version: stable
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/bin
|
||||
~/go/pkg/mod
|
||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: go
|
||||
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: 'language:go'
|
||||
46
.github/workflows/codeql-analysis-js.yml
vendored
Normal file
46
.github/workflows/codeql-analysis-js.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
name: "CodeQL - JS"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
schedule:
|
||||
- cron: "30 18 * * 2"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ["javascript"]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
with:
|
||||
category: "javascript"
|
||||
92
.github/workflows/codeql-analysis.yml
vendored
Normal file
92
.github/workflows/codeql-analysis.yml
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
# For most projects, this workflow file will not need changing; you simply need
|
||||
# to commit it to your repository.
|
||||
#
|
||||
# You may wish to alter this file to override the set of languages analyzed,
|
||||
# or to provide custom queries or build logic.
|
||||
#
|
||||
# ******** NOTE ********
|
||||
# We have attempted to detect the languages in your repository. Please check
|
||||
# the `language` matrix defined below to confirm you have the correct set of
|
||||
# supported CodeQL languages.
|
||||
#
|
||||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
schedule:
|
||||
- cron: "30 18 * * 2"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ["go"]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
57
.github/workflows/docs.yaml
vendored
57
.github/workflows/docs.yaml
vendored
@@ -1,57 +0,0 @@
|
||||
name: publish-docs
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'master'
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- '.github/workflows/docs.yaml'
|
||||
workflow_dispatch: {}
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout and to commit back image update
|
||||
deployments: write
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
path: __vm
|
||||
|
||||
- name: Checkout private code
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
repository: VictoriaMetrics/vmdocs
|
||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||
path: __vm-docs
|
||||
|
||||
- name: Import GPG key
|
||||
uses: crazy-max/ghaction-import-gpg@v6
|
||||
id: import-gpg
|
||||
with:
|
||||
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
|
||||
passphrase: ${{ secrets.VM_BOT_PASSPHRASE }}
|
||||
git_user_signingkey: true
|
||||
git_commit_gpgsign: true
|
||||
git_config_global: true
|
||||
|
||||
- name: Copy docs
|
||||
id: update
|
||||
run: |
|
||||
find docs -type d -maxdepth 1 -mindepth 1 -exec \
|
||||
sh -c 'rsync -zarvh --delete {}/ ../__vm-docs/content/$(basename {})/' \;
|
||||
echo "SHORT_SHA=$(git rev-parse --short $GITHUB_SHA)" >> $GITHUB_OUTPUT
|
||||
working-directory: __vm
|
||||
|
||||
- name: Push to vmdocs
|
||||
run: |
|
||||
git config --global user.name "${{ steps.import-gpg.outputs.email }}"
|
||||
git config --global user.email "${{ steps.import-gpg.outputs.email }}"
|
||||
if [[ -n $(git status --porcelain) ]]; then
|
||||
git add .
|
||||
git commit -S -m "sync docs with VictoriaMetrics/VictoriaMetrics commit: ${{ steps.update.outputs.SHORT_SHA }}"
|
||||
git push
|
||||
fi
|
||||
working-directory: __vm-docs
|
||||
95
.github/workflows/main.yml
vendored
Normal file
95
.github/workflows/main.yml
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
name: main
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
make install-golangci-lint
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
matrix:
|
||||
scenario: ["test-full", "test-pure", "test-full-386"]
|
||||
name: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
build:
|
||||
needs: test
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make vmcluster-crossbuild
|
||||
113
.github/workflows/test.yml
vendored
113
.github/workflows/test.yml
vendored
@@ -1,113 +0,0 @@
|
||||
name: test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- '**.go'
|
||||
- 'go.*'
|
||||
- '.github/workflows/main.yml'
|
||||
pull_request:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- '**.go'
|
||||
- 'go.*'
|
||||
- '.github/workflows/main.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
cancel-in-progress: true
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
|
||||
|
||||
- name: Cache golangci-lint
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/golangci-lint
|
||||
~/go/bin
|
||||
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
|
||||
|
||||
- name: Run check-all
|
||||
run: |
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
|
||||
unit:
|
||||
name: unit
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
scenario:
|
||||
- 'test-full'
|
||||
- 'test-full-386'
|
||||
- 'test-pure'
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
|
||||
- name: Run tests
|
||||
run: GOGC=10 make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
files: ./coverage.txt
|
||||
|
||||
integration:
|
||||
name: integration
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
|
||||
- name: Run integration tests
|
||||
run: make integration-test
|
||||
82
.github/workflows/vmui.yml
vendored
82
.github/workflows/vmui.yml
vendored
@@ -1,82 +0,0 @@
|
||||
name: vmui
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- 'app/vmui/packages/vmui/**'
|
||||
- '.github/workflows/vmui.yml'
|
||||
pull_request:
|
||||
branches:
|
||||
- cluster
|
||||
- master
|
||||
paths:
|
||||
- 'app/vmui/packages/vmui/**'
|
||||
- '.github/workflows/vmui.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: read
|
||||
pull-requests: read
|
||||
checks: write
|
||||
|
||||
concurrency:
|
||||
cancel-in-progress: true
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
|
||||
jobs:
|
||||
vmui-checks:
|
||||
name: VMUI Checks (lint, test, typecheck)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '24.x'
|
||||
|
||||
- name: Cache node-modules
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
app/vmui/packages/vmui/node_modules
|
||||
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
||||
restore-keys: vmui-artifacts-${{ runner.os }}-
|
||||
|
||||
- name: Run lint
|
||||
id: lint
|
||||
run: make vmui-lint
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
id: test
|
||||
run: make vmui-test
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run typecheck
|
||||
id: typecheck
|
||||
run: make vmui-typecheck
|
||||
continue-on-error: true
|
||||
|
||||
- name: Annotate Code Linting Results
|
||||
uses: ataylorme/eslint-annotate-action@v3
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
report-json: app/vmui/packages/vmui/vmui-lint-report.json
|
||||
|
||||
- name: Check overall status
|
||||
run: |
|
||||
echo "Lint status: ${{ steps.lint.outcome }}"
|
||||
echo "Test status: ${{ steps.test.outcome }}"
|
||||
echo "Typecheck status: ${{ steps.typecheck.outcome }}"
|
||||
|
||||
if [[ "${{ steps.lint.outcome }}" == "failure" || "${{ steps.test.outcome }}" == "failure" || "${{ steps.typecheck.outcome }}" == "failure" ]]; then
|
||||
echo "One or more checks failed"
|
||||
exit 1
|
||||
else
|
||||
echo "All checks passed"
|
||||
fi
|
||||
10
.gitignore
vendored
10
.gitignore
vendored
@@ -7,12 +7,9 @@
|
||||
.vscode
|
||||
*.test
|
||||
*.swp
|
||||
/vmdocs
|
||||
/gocache-for-docker
|
||||
/victoria-logs-data
|
||||
/victoria-metrics-data
|
||||
/vmagent-remotewrite-data
|
||||
/vlagent-remotewritewrite
|
||||
/vmstorage-data
|
||||
/vmselect-cache
|
||||
/package/temp-deb-*
|
||||
@@ -23,9 +20,4 @@
|
||||
Gemfile.lock
|
||||
/_site
|
||||
_site
|
||||
*.tmp
|
||||
/docs/.jekyll-metadata
|
||||
coverage.txt
|
||||
cspell.json
|
||||
*~
|
||||
deployment/docker/provisioning/plugins/
|
||||
*.tmp
|
||||
@@ -1,29 +1,15 @@
|
||||
version: "2"
|
||||
linters:
|
||||
settings:
|
||||
errcheck:
|
||||
exclude-functions:
|
||||
- fmt.Fprintf
|
||||
- fmt.Fprint
|
||||
- (net/http.ResponseWriter).Write
|
||||
exclusions:
|
||||
generated: lax
|
||||
presets:
|
||||
- common-false-positives
|
||||
- legacy
|
||||
- std-error-handling
|
||||
rules:
|
||||
- linters:
|
||||
- staticcheck
|
||||
text: 'SA(4003|1019|5011):'
|
||||
paths:
|
||||
- third_party$
|
||||
- builtin$
|
||||
- examples$
|
||||
formatters:
|
||||
exclusions:
|
||||
generated: lax
|
||||
paths:
|
||||
- third_party$
|
||||
- builtin$
|
||||
- examples$
|
||||
run:
|
||||
timeout: 2m
|
||||
|
||||
enable:
|
||||
- revive
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
- linters:
|
||||
- staticcheck
|
||||
text: "SA(4003|1019|5011):"
|
||||
|
||||
linters-settings:
|
||||
errcheck:
|
||||
exclude: ./errcheck_excludes.txt
|
||||
|
||||
@@ -4,4 +4,3 @@ allowlist:
|
||||
- BSD-3-Clause
|
||||
- BSD-2-Clause
|
||||
- ISC
|
||||
- MPL-2.0
|
||||
|
||||
120
CODE_OF_CONDUCT_RU.md
Normal file
120
CODE_OF_CONDUCT_RU.md
Normal file
@@ -0,0 +1,120 @@
|
||||
|
||||
# Кодекс Поведения участника
|
||||
|
||||
## Наши обязательства
|
||||
|
||||
Мы, как участники, авторы и лидеры обязуемся сделать участие в сообществе
|
||||
свободным от притеснений для всех, независимо от возраста, телосложения,
|
||||
видимых или невидимых ограничений способности, этнической принадлежности,
|
||||
половых признаков, гендерной идентичности и выражения, уровня опыта,
|
||||
образования, социо-экономического статуса, национальности, внешности,
|
||||
расы, религии, или сексуальной идентичности и ориентации.
|
||||
|
||||
Мы обещаем действовать и взаимодействовать таким образом, чтобы вносить вклад в открытое,
|
||||
дружелюбное, многообразное, инклюзивное и здоровое сообщество.
|
||||
|
||||
## Наши стандарты
|
||||
|
||||
Примеры поведения, создающие условия для благоприятных взаимоотношений включают в себя:
|
||||
|
||||
* Проявление доброты и эмпатии к другим участникам проекта
|
||||
* Уважение к чужой точке зрения и опыту
|
||||
* Конструктивная критика и принятие конструктивной критики
|
||||
* Принятие ответственности, принесение извинений тем, кто пострадал от наших ошибок
|
||||
и извлечение уроков из опыта
|
||||
* Ориентирование на то, что лучше подходит для сообщества, а не только для нас лично
|
||||
|
||||
Примеры неприемлемого поведения участников включают в себя:
|
||||
|
||||
* Использование выражений или изображений сексуального характера и нежелательное сексуальное внимание или домогательство в любой форме
|
||||
* Троллинг, оскорбительные или уничижительные комментарии, переход на личности или затрагивание политических убеждений
|
||||
* Публичное или приватное домогательство
|
||||
* Публикация личной информации других лиц, например, физического или электронного адреса, без явного разрешения
|
||||
* Иное поведение, которое обоснованно считать неуместным в профессиональной обстановке
|
||||
|
||||
## Обязанности
|
||||
|
||||
Лидеры сообщества отвечают за разъяснение и применение наших стандартов приемлемого
|
||||
поведения и будут предпринимать соответствующие и честные меры по исправлению положения
|
||||
в ответ на любое поведение, которое они сочтут неприемлемым, угрожающим, оскорбительным или вредным.
|
||||
|
||||
Лидеры сообщества обладают правом и обязанностью удалять, редактировать или отклонять
|
||||
комментарии, коммиты, код, изменения в вики, вопросы и другой вклад, который не совпадает
|
||||
с Кодексом Поведения, и предоставят причины принятого решения, когда сочтут нужным.
|
||||
|
||||
## Область применения
|
||||
|
||||
Данный Кодекс Поведения применим во всех во всех публичных физических и цифровых пространства сообщества,
|
||||
а также когда человек официально представляет сообщество в публичных местах.
|
||||
Примеры представления проекта или сообщества включают использование официальной электронной почты,
|
||||
публикации в официальном аккаунте в социальных сетях,
|
||||
или упоминания как представителя в онлайн или оффлайн мероприятии.
|
||||
|
||||
## Приведение в исполнение
|
||||
|
||||
О случаях домогательства, а так же оскорбительного или иного другого неприемлемого
|
||||
поведения можно сообщить ответственным лидерам сообщества с помощью письма на info@victoriametrics.com
|
||||
Все жалобы будут рассмотрены и расследованы оперативно и беспристрастно.
|
||||
|
||||
Все лидеры сообщества обязаны уважать неприкосновенность частной жизни и личную
|
||||
неприкосновенность автора сообщения.
|
||||
|
||||
## Руководство по исполнению
|
||||
|
||||
Лидеры сообщества будут следовать следующим Принципам Воздействия в Сообществе,
|
||||
чтобы определить последствия для тех, кого они считают виновными в нарушении данного Кодекса Поведения:
|
||||
|
||||
### 1. Исправление
|
||||
|
||||
**Общественное влияние**: Использование недопустимой лексики или другое поведение,
|
||||
считающиеся непрофессиональным или нежелательным в сообществе.
|
||||
|
||||
**Последствия**: Личное, письменное предупреждение от лидеров сообщества,
|
||||
объясняющее суть нарушения и почему такое поведение
|
||||
было неуместно. Лидеры сообщества могут попросить принести публичное извинение.
|
||||
|
||||
### 2. Предупреждение
|
||||
|
||||
**Общественное влияние**: Нарушение в результате одного инцидента или серии действий.
|
||||
|
||||
**Последствия**: Предупреждение о последствиях в случае продолжающегося неуместного поведения.
|
||||
На определенное время не допускается взаимодействие с людьми, вовлеченными в инцидент,
|
||||
включая незапрошенное взаимодействие
|
||||
с теми, кто обеспечивает соблюдение Кодекса. Это включает в себя избегание взаимодействия
|
||||
в публичных пространствах, а так же во внешних каналах,
|
||||
таких как социальные сети. Нарушение этих правил влечет за собой временный или вечный бан.
|
||||
|
||||
### 3. Временный бан
|
||||
|
||||
**Общественное влияние**: Серьёзное нарушение стандартов сообщества,
|
||||
включая продолжительное неуместное поведение.
|
||||
|
||||
**Последствия**: Временный запрет (бан) на любое взаимодействие
|
||||
или публичное общение с сообществом на определенный период времени.
|
||||
На этот период не допускается публичное или личное взаимодействие с людьми,
|
||||
вовлеченными в инцидент, включая незапрошенное взаимодействие
|
||||
с теми, кто обеспечивает соблюдение Кодекса.
|
||||
Нарушение этих правил влечет за собой вечный бан.
|
||||
|
||||
### 4. Вечный бан
|
||||
|
||||
**Общественное влияние**: Демонстрация систематических нарушений стандартов сообщества,
|
||||
включая продолжающееся неуместное поведение, домогательство до отдельных лиц,
|
||||
или проявление агрессии либо пренебрежительного отношения к категориям лиц.
|
||||
|
||||
**Последствия**: Вечный запрет на любое публичное взаимодействие с сообществом.
|
||||
|
||||
## Атрибуция
|
||||
|
||||
Данный Кодекс Поведения основан на [Кодекс Поведения участника][homepage],
|
||||
версии 2.0, доступной по адресу
|
||||
<https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>.
|
||||
|
||||
Принципы Воздействия в Сообществе были вдохновлены [Mozilla's code of conduct
|
||||
enforcement ladder](https://github.com/mozilla/diversity).
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
|
||||
Ответы на общие вопросы о данном кодексе поведения ищите на странице FAQ:
|
||||
<https://www.contributor-covenant.org/faq>. Переводы доступны по адресу
|
||||
<https://www.contributor-covenant.org/translations>.
|
||||
@@ -1 +1,16 @@
|
||||
The document has been moved [here](https://docs.victoriametrics.com/victoriametrics/contributing/).
|
||||
If you like VictoriaMetrics and want to contribute, then we need the following:
|
||||
|
||||
- Filing issues and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
|
||||
- Spreading a word about VictoriaMetrics: conference talks, articles, comments, experience sharing with colleagues.
|
||||
- Updating documentation.
|
||||
|
||||
We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):
|
||||
|
||||
- Prefer simple code and architecture.
|
||||
- Avoid complex abstractions.
|
||||
- Avoid magic code and fancy algorithms.
|
||||
- Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d).
|
||||
- Minimize the number of moving parts in the distributed system.
|
||||
- Avoid automated decisions, which may hurt cluster availability, consistency or performance.
|
||||
|
||||
Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2025 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2023 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
214
Makefile
214
Makefile
@@ -1,33 +1,22 @@
|
||||
PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
MAKE_CONCURRENCY ?= $(shell getconf _NPROCESSORS_ONLN)
|
||||
MAKE_PARALLEL := $(MAKE) -j $(MAKE_CONCURRENCY)
|
||||
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
|
||||
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
|
||||
LATEST_TAG ?= cluster-latest
|
||||
|
||||
PKG_TAG ?= $(shell git tag -l --points-at HEAD)
|
||||
ifeq ($(PKG_TAG),)
|
||||
PKG_TAG := $(BUILDINFO_TAG)
|
||||
endif
|
||||
|
||||
EXTRA_DOCKER_TAG_SUFFIX ?=
|
||||
EXTRA_GO_BUILD_TAGS ?=
|
||||
|
||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||
|
||||
GOLANGCI_LINT_VERSION := 2.4.0
|
||||
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
include app/*/Makefile
|
||||
include codespell/Makefile
|
||||
include docs/Makefile
|
||||
include deployment/*/Makefile
|
||||
include dashboards/Makefile
|
||||
include package/release/Makefile
|
||||
include benchmarks/Makefile
|
||||
|
||||
all: \
|
||||
vminsert \
|
||||
@@ -77,35 +66,16 @@ vmcluster-openbsd-amd64: \
|
||||
vmselect-openbsd-amd64 \
|
||||
vmstorage-openbsd-amd64
|
||||
|
||||
vmcluster-windows-amd64: \
|
||||
vminsert-windows-amd64 \
|
||||
vmselect-windows-amd64 \
|
||||
vmstorage-windows-amd64
|
||||
vmcluster-crossbuild: \
|
||||
vmcluster-linux-amd64 \
|
||||
vmcluster-linux-arm64 \
|
||||
vmcluster-linux-arm \
|
||||
vmcluster-linux-ppc64le \
|
||||
vmcluster-linux-386 \
|
||||
vmcluster-freebsd-amd64 \
|
||||
vmcluster-openbsd-amd64
|
||||
|
||||
vmcluster-darwin-amd64: \
|
||||
vminsert-darwin-amd64 \
|
||||
vmselect-darwin-amd64 \
|
||||
vmstorage-darwin-amd64
|
||||
|
||||
vmcluster-darwin-arm64: \
|
||||
vminsert-darwin-arm64 \
|
||||
vmselect-darwin-arm64 \
|
||||
vmstorage-darwin-arm64
|
||||
|
||||
# When adding a new crossbuild target, please also add it to the .github/workflows/build.yml
|
||||
crossbuild: vmcluster-crossbuild
|
||||
|
||||
# When adding a new crossbuild target, please also add it to the .github/workflows/build.yml
|
||||
vmcluster-crossbuild:
|
||||
$(MAKE_PARALLEL) vmcluster-linux-amd64 \
|
||||
vmcluster-linux-arm64 \
|
||||
vmcluster-linux-arm \
|
||||
vmcluster-linux-ppc64le \
|
||||
vmcluster-linux-386 \
|
||||
vmcluster-freebsd-amd64 \
|
||||
vmcluster-openbsd-amd64
|
||||
|
||||
publish: \
|
||||
publish: docker-scan \
|
||||
publish-vminsert \
|
||||
publish-vmselect \
|
||||
publish-vmstorage
|
||||
@@ -115,64 +85,20 @@ package: \
|
||||
package-vmselect \
|
||||
package-vmstorage
|
||||
|
||||
publish-final-images:
|
||||
PKG_TAG=$(TAG) APP_NAME=victoria-metrics $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmagent $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert-tool $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmauth $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmbackup $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmrestore $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmctl $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=victoria-metrics $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmagent $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmalert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmauth $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackup $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmrestore $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmgateway $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackupmanager $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) $(MAKE) publish-latest
|
||||
|
||||
publish-latest:
|
||||
PKG_TAG=$(TAG) APP_NAME=victoria-metrics $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmagent $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert-tool $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmauth $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmbackup $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmrestore $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmctl $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmgateway $(MAKE) publish-via-docker-latest
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackupmanager $(MAKE) publish-via-docker-latest
|
||||
|
||||
publish-release:
|
||||
rm -rf bin/*
|
||||
git checkout $(TAG) && $(MAKE) release && $(MAKE) publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release && $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release && $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release && $(MAKE) publish
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
|
||||
release:
|
||||
$(MAKE_PARALLEL) release-vmcluster
|
||||
release: \
|
||||
release-vmcluster
|
||||
|
||||
release-vmcluster: \
|
||||
release-vmcluster-linux-amd64 \
|
||||
release-vmcluster-linux-arm64 \
|
||||
release-vmcluster-freebsd-amd64 \
|
||||
release-vmcluster-openbsd-amd64 \
|
||||
release-vmcluster-windows-amd64 \
|
||||
release-vmcluster-darwin-amd64 \
|
||||
release-vmcluster-darwin-arm64
|
||||
release-vmcluster-openbsd-amd64
|
||||
|
||||
release-vmcluster-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 $(MAKE) release-vmcluster-goos-goarch
|
||||
@@ -186,21 +112,12 @@ release-vmcluster-freebsd-amd64:
|
||||
release-vmcluster-openbsd-amd64:
|
||||
GOOS=openbsd GOARCH=amd64 $(MAKE) release-vmcluster-goos-goarch
|
||||
|
||||
release-vmcluster-windows-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-vmcluster-windows-goarch
|
||||
|
||||
release-vmcluster-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmcluster-goos-goarch
|
||||
|
||||
release-vmcluster-darwin-arm64:
|
||||
GOOS=darwin GOARCH=arm64 $(MAKE) release-vmcluster-goos-goarch
|
||||
|
||||
release-vmcluster-goos-goarch: \
|
||||
vminsert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmselect-$(GOOS)-$(GOARCH)-prod \
|
||||
vmstorage-$(GOOS)-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar $(TAR_OWNERSHIP) --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf victoria-metrics-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf victoria-metrics-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vminsert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmselect-$(GOOS)-$(GOARCH)-prod \
|
||||
vmstorage-$(GOOS)-$(GOARCH)-prod \
|
||||
@@ -214,89 +131,59 @@ release-vmcluster-goos-goarch: \
|
||||
vmselect-$(GOOS)-$(GOARCH)-prod \
|
||||
vmstorage-$(GOOS)-$(GOARCH)-prod
|
||||
|
||||
release-vmcluster-windows-goarch: \
|
||||
vminsert-windows-$(GOARCH)-prod \
|
||||
vmselect-windows-$(GOARCH)-prod \
|
||||
vmstorage-windows-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
zip victoria-metrics-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vminsert-windows-$(GOARCH)-prod.exe \
|
||||
vmselect-windows-$(GOARCH)-prod.exe \
|
||||
vmstorage-windows-$(GOARCH)-prod.exe \
|
||||
&& sha256sum victoria-metrics-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vminsert-windows-$(GOARCH)-prod.exe \
|
||||
vmselect-windows-$(GOARCH)-prod.exe \
|
||||
vmstorage-windows-$(GOARCH)-prod.exe \
|
||||
> victoria-metrics-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
vminsert-windows-$(GOARCH)-prod.exe \
|
||||
vmselect-windows-$(GOARCH)-prod.exe \
|
||||
vmstorage-windows-$(GOARCH)-prod.exe
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
|
||||
fmt:
|
||||
gofmt -l -w -s ./lib
|
||||
gofmt -l -w -s ./app
|
||||
gofmt -l -w -s ./apptest
|
||||
|
||||
vet:
|
||||
GOEXPERIMENT=synctest go vet ./lib/...
|
||||
go vet ./lib/...
|
||||
go vet ./app/...
|
||||
go vet ./apptest/...
|
||||
|
||||
check-all: fmt vet golangci-lint govulncheck
|
||||
|
||||
clean-checkers: remove-golangci-lint remove-govulncheck
|
||||
|
||||
test:
|
||||
GOEXPERIMENT=synctest go test ./lib/... ./app/...
|
||||
go test ./lib/... ./app/...
|
||||
|
||||
test-race:
|
||||
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
|
||||
go test -race ./lib/... ./app/...
|
||||
|
||||
test-pure:
|
||||
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
|
||||
CGO_ENABLED=0 go test ./lib/... ./app/...
|
||||
|
||||
test-full:
|
||||
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
test-full-386:
|
||||
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
integration-test:
|
||||
$(MAKE) apptest
|
||||
|
||||
apptest:
|
||||
$(MAKE) all vmctl vmbackup vmrestore
|
||||
go test ./apptest/... -skip="^TestSingle.*"
|
||||
GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
benchmark:
|
||||
GOEXPERIMENT=synctest go test -bench=. ./lib/...
|
||||
go test -bench=. ./lib/...
|
||||
go test -bench=. ./app/...
|
||||
|
||||
benchmark-pure:
|
||||
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
|
||||
CGO_ENABLED=0 go test -bench=. ./lib/...
|
||||
CGO_ENABLED=0 go test -bench=. ./app/...
|
||||
|
||||
vendor-update:
|
||||
go get -u ./lib/...
|
||||
go get -u ./app/...
|
||||
go mod tidy -compat=1.24
|
||||
go get -u -d ./lib/...
|
||||
go get -u -d ./app/...
|
||||
go mod tidy -compat=1.19
|
||||
go mod vendor
|
||||
|
||||
app-local:
|
||||
CGO_ENABLED=1 go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
CGO_ENABLED=1 go build $(RACE) -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
app-local-pure:
|
||||
CGO_ENABLED=0 go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
CGO_ENABLED=0 go build $(RACE) -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
app-local-goos-goarch:
|
||||
CGO_ENABLED=$(CGO_ENABLED) GOOS=$(GOOS) GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-$(GOOS)-$(GOARCH)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
CGO_ENABLED=$(CGO_ENABLED) GOOS=$(GOOS) GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-$(GOOS)-$(GOARCH)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
app-local-windows-goarch:
|
||||
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
quicktemplate-gen: install-qtc
|
||||
qtc
|
||||
@@ -306,13 +193,10 @@ install-qtc:
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
GOEXPERIMENT=synctest golangci-lint run
|
||||
golangci-lint run
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
|
||||
|
||||
remove-golangci-lint:
|
||||
rm -rf `which golangci-lint`
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.51.1
|
||||
|
||||
govulncheck: install-govulncheck
|
||||
govulncheck ./...
|
||||
@@ -320,11 +204,33 @@ govulncheck: install-govulncheck
|
||||
install-govulncheck:
|
||||
which govulncheck || go install golang.org/x/vuln/cmd/govulncheck@latest
|
||||
|
||||
remove-govulncheck:
|
||||
rm -rf `which govulncheck`
|
||||
|
||||
install-wwhrd:
|
||||
which wwhrd || go install github.com/frapposelli/wwhrd@latest
|
||||
|
||||
check-licenses: install-wwhrd
|
||||
wwhrd check -f .wwhrd.yml
|
||||
|
||||
copy-docs:
|
||||
echo '' > ${DST}
|
||||
@if [ ${ORDER} -ne 0 ]; then \
|
||||
echo "---\nsort: ${ORDER}\n---\n" > ${DST}; \
|
||||
fi
|
||||
cat ${SRC} >> ${DST}
|
||||
sed -i='.tmp' 's/<img src=\"docs\//<img src=\"/' ${DST}
|
||||
rm -rf docs/*.tmp
|
||||
|
||||
# Copies docs for all components and adds the order tag.
|
||||
# For ORDER=0 it adds no order tag.
|
||||
# Images starting with <img src="docs/ are replaced with <img src="
|
||||
# Cluster docs are supposed to be ordered as 9th.
|
||||
# The rest of docs is ordered manually.
|
||||
docs-sync:
|
||||
SRC=README.md DST=docs/Cluster-VictoriaMetrics.md ORDER=2 $(MAKE) copy-docs
|
||||
SRC=app/vmagent/README.md DST=docs/vmagent.md ORDER=3 $(MAKE) copy-docs
|
||||
SRC=app/vmalert/README.md DST=docs/vmalert.md ORDER=4 $(MAKE) copy-docs
|
||||
SRC=app/vmauth/README.md DST=docs/vmauth.md ORDER=5 $(MAKE) copy-docs
|
||||
SRC=app/vmbackup/README.md DST=docs/vmbackup.md ORDER=6 $(MAKE) copy-docs
|
||||
SRC=app/vmrestore/README.md DST=docs/vmrestore.md ORDER=7 $(MAKE) copy-docs
|
||||
SRC=app/vmctl/README.md DST=docs/vmctl.md ORDER=8 $(MAKE) copy-docs
|
||||
SRC=app/vmgateway/README.md DST=docs/vmgateway.md ORDER=9 $(MAKE) copy-docs
|
||||
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md ORDER=10 $(MAKE) copy-docs
|
||||
|
||||
14
SECURITY.md
14
SECURITY.md
@@ -2,17 +2,13 @@
|
||||
|
||||
## Supported Versions
|
||||
|
||||
The following versions of VictoriaMetrics receive regular security fixes:
|
||||
|
||||
| Version | Supported |
|
||||
|---------|--------------------|
|
||||
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
||||
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||
| other releases | :x: |
|
||||
|
||||
See [this page](https://victoriametrics.com/security/) for more details.
|
||||
| 1.81.x | :white_check_mark: |
|
||||
| 1.80.x | :x: |
|
||||
| 1.79.x | :white_check_mark: |
|
||||
| < 1.78 | :x: |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Please report any security issues to <security@victoriametrics.com>
|
||||
Please report any security issues to security@victoriametrics.com
|
||||
|
||||
BIN
VM_logo.zip
BIN
VM_logo.zip
Binary file not shown.
@@ -1 +0,0 @@
|
||||
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).
|
||||
@@ -1 +0,0 @@
|
||||
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).
|
||||
@@ -1 +0,0 @@
|
||||
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).
|
||||
@@ -1 +0,0 @@
|
||||
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).
|
||||
@@ -1 +0,0 @@
|
||||
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).
|
||||
@@ -1 +0,0 @@
|
||||
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).
|
||||
@@ -1 +0,0 @@
|
||||
VictoriaLogs source code has been moved to [github.com/VictoriaMetrics/VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/).
|
||||
@@ -85,12 +85,6 @@ vmagent-linux-arm64:
|
||||
vmagent-linux-ppc64le:
|
||||
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmagent-linux-s390x:
|
||||
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmagent-linux-loong64:
|
||||
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=loong64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmagent-linux-386:
|
||||
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,27 +3,31 @@ package common
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
// PushCtx is a context used for populating WriteRequest.
|
||||
type PushCtx struct {
|
||||
// WriteRequest contains the WriteRequest, which must be pushed later to remote storage.
|
||||
//
|
||||
// The actual labels and samples for the time series are stored in Labels and Samples fields.
|
||||
WriteRequest prompb.WriteRequest
|
||||
WriteRequest prompbmarshal.WriteRequest
|
||||
|
||||
// Labels contains flat list of all the labels used in WriteRequest.
|
||||
Labels []prompb.Label
|
||||
Labels []prompbmarshal.Label
|
||||
|
||||
// Samples contains flat list of all the samples used in WriteRequest.
|
||||
Samples []prompb.Sample
|
||||
Samples []prompbmarshal.Sample
|
||||
}
|
||||
|
||||
// Reset resets ctx.
|
||||
func (ctx *PushCtx) Reset() {
|
||||
ctx.WriteRequest.Reset()
|
||||
tss := ctx.WriteRequest.Timeseries
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
ts.Labels = nil
|
||||
ts.Samples = nil
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = ctx.WriteRequest.Timeseries[:0]
|
||||
|
||||
promrelabel.CleanLabels(ctx.Labels)
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
@@ -35,10 +39,15 @@ func (ctx *PushCtx) Reset() {
|
||||
//
|
||||
// Call PutPushCtx when the ctx is no longer needed.
|
||||
func GetPushCtx() *PushCtx {
|
||||
if v := pushCtxPool.Get(); v != nil {
|
||||
return v.(*PushCtx)
|
||||
select {
|
||||
case ctx := <-pushCtxPoolCh:
|
||||
return ctx
|
||||
default:
|
||||
if v := pushCtxPool.Get(); v != nil {
|
||||
return v.(*PushCtx)
|
||||
}
|
||||
return &PushCtx{}
|
||||
}
|
||||
return &PushCtx{}
|
||||
}
|
||||
|
||||
// PutPushCtx returns ctx to the pool.
|
||||
@@ -46,7 +55,12 @@ func GetPushCtx() *PushCtx {
|
||||
// ctx mustn't be used after returning to the pool.
|
||||
func PutPushCtx(ctx *PushCtx) {
|
||||
ctx.Reset()
|
||||
pushCtxPool.Put(ctx)
|
||||
select {
|
||||
case pushCtxPoolCh <- ctx:
|
||||
default:
|
||||
pushCtxPool.Put(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
var pushCtxPool sync.Pool
|
||||
var pushCtxPoolCh = make(chan *PushCtx, cgroup.AvailableCPUs())
|
||||
|
||||
@@ -6,10 +6,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -22,16 +21,16 @@ var (
|
||||
|
||||
// InsertHandler processes csv data from req.
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return stream.Parse(req, func(rows []csvimport.Row) error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []csvimport.Row, extraLabels []prompb.Label) error {
|
||||
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -41,23 +40,23 @@ func insertRows(at *auth.Token, rows []csvimport.Row, extraLabels []prompb.Label
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: r.Metric,
|
||||
})
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
@@ -65,9 +64,7 @@ func insertRows(at *auth.Token, rows []csvimport.Row, extraLabels []prompb.Label
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
||||
97
app/vmagent/datadog/request_handler.go
Normal file
97
app/vmagent/datadog/request_handler.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package datadog
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadog"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadog"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadog"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range series {
|
||||
ss := &series[i]
|
||||
rowsTotal += len(ss.Points)
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: ss.Metric,
|
||||
})
|
||||
if ss.Host != "" {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "host",
|
||||
Value: ss.Host,
|
||||
})
|
||||
}
|
||||
if ss.Device != "" {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "device",
|
||||
Value: ss.Device,
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := parser.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for _, pt := range ss.Points {
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Timestamp: pt.Timestamp(),
|
||||
Value: pt.Value(),
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -1,95 +0,0 @@
|
||||
package datadogsketches
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogsketches"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogsketches/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogsketches"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogsketches"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogsketches"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/beta/sketches request.
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, func(sketches []*datadogsketches.Sketch) error {
|
||||
return insertRows(at, sketches, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels []prompb.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for _, sketch := range sketches {
|
||||
ms := sketch.ToSummary()
|
||||
for _, m := range ms {
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "__name__",
|
||||
Value: m.Name,
|
||||
})
|
||||
for _, label := range m.Labels {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: label.Name,
|
||||
Value: label.Value,
|
||||
})
|
||||
}
|
||||
for _, tag := range sketch.Tags {
|
||||
name, value := datadogutil.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for _, p := range m.Points {
|
||||
samples = append(samples, prompb.Sample{
|
||||
Timestamp: p.Timestamp,
|
||||
Value: p.Value,
|
||||
})
|
||||
}
|
||||
rowsTotal += len(m.Points)
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package datadogv1
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv1"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv1"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv1"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, func(series []datadogv1.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadogv1.Series, extraLabels []prompb.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range series {
|
||||
ss := &series[i]
|
||||
rowsTotal += len(ss.Points)
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "__name__",
|
||||
Value: ss.Metric,
|
||||
})
|
||||
if ss.Host != "" {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "host",
|
||||
Value: ss.Host,
|
||||
})
|
||||
}
|
||||
if ss.Device != "" {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "device",
|
||||
Value: ss.Device,
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadogutil.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for _, pt := range ss.Points {
|
||||
samples = append(samples, prompb.Sample{
|
||||
Timestamp: pt.Timestamp(),
|
||||
Value: pt.Value(),
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
package datadogv2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv2"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv2"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv2"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v2/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ct := req.Header.Get("Content-Type")
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, ct, func(series []datadogv2.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadogv2.Series, extraLabels []prompb.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range series {
|
||||
ss := &series[i]
|
||||
rowsTotal += len(ss.Points)
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "__name__",
|
||||
Value: ss.Metric,
|
||||
})
|
||||
for _, rs := range ss.Resources {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: rs.Type,
|
||||
Value: rs.Name,
|
||||
})
|
||||
}
|
||||
if ss.SourceTypeName != "" {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "source_type_name",
|
||||
Value: ss.SourceTypeName,
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadogutil.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for _, pt := range ss.Points {
|
||||
samples = append(samples, prompb.Sample{
|
||||
Timestamp: pt.Timestamp * 1000,
|
||||
Value: pt.Value,
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
ARG base_image=non-existing
|
||||
ARG base_image
|
||||
FROM $base_image
|
||||
|
||||
EXPOSE 8429
|
||||
|
||||
ENTRYPOINT ["/vmagent-prod"]
|
||||
ARG src_binary=non-existing
|
||||
ARG src_binary
|
||||
COPY $src_binary ./vmagent-prod
|
||||
|
||||
@@ -5,10 +5,8 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -21,12 +19,10 @@ var (
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return stream.Parse(r, "", func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows)
|
||||
})
|
||||
return parser.ParseStream(r, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
func insertRows(rows []parser.Row) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -36,22 +32,22 @@ func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: r.Metric,
|
||||
})
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
@@ -59,9 +55,7 @@ func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -10,18 +10,18 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol")
|
||||
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metric name if InfluxDB line contains only a single field")
|
||||
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field")
|
||||
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
|
||||
dbLabel = flag.String("influxDBLabel", "db", "Default label for the DB name sent over '?db={db_name}' query parameter")
|
||||
)
|
||||
@@ -35,9 +35,9 @@ var (
|
||||
// InsertHandlerForReader processes remote write for influx line protocol.
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
|
||||
return stream.Parse(r, encoding, true, "", "", func(db string, rows []influx.Row) error {
|
||||
return insertRows(at, db, rows, nil)
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -45,22 +45,21 @@ func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error
|
||||
//
|
||||
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
isStreamMode := req.Header.Get("Stream-Mode") == "1"
|
||||
return stream.Parse(req.Body, encoding, isStreamMode, precision, db, func(db string, rows []influx.Row) error {
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prompb.Label) error {
|
||||
func insertRows(at *auth.Token, db string, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := getPushCtx()
|
||||
defer putPushCtx(ctx)
|
||||
|
||||
@@ -80,13 +79,13 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
|
||||
if tag.Key == *dbLabel {
|
||||
hasDBKey = true
|
||||
}
|
||||
commonLabels = append(commonLabels, prompb.Label{
|
||||
commonLabels = append(commonLabels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
if len(db) > 0 && !hasDBKey {
|
||||
commonLabels = append(commonLabels, prompb.Label{
|
||||
commonLabels = append(commonLabels, prompbmarshal.Label{
|
||||
Name: *dbLabel,
|
||||
Value: db,
|
||||
})
|
||||
@@ -110,16 +109,16 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
|
||||
}
|
||||
metricGroup := bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: metricGroup,
|
||||
})
|
||||
labels = append(labels, commonLabels...)
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Timestamp: r.Timestamp,
|
||||
Value: f.Value,
|
||||
})
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
@@ -130,9 +129,7 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
|
||||
ctx.ctx.Labels = labels
|
||||
ctx.ctx.Samples = samples
|
||||
ctx.commonLabels = commonLabels
|
||||
if !remotewrite.TryPush(at, &ctx.ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
@@ -144,7 +141,7 @@ func insertRows(at *auth.Token, db string, rows []influx.Row, extraLabels []prom
|
||||
|
||||
type pushCtx struct {
|
||||
ctx common.PushCtx
|
||||
commonLabels []prompb.Label
|
||||
commonLabels []prompbmarshal.Label
|
||||
metricGroupBuf []byte
|
||||
buf []byte
|
||||
}
|
||||
@@ -160,15 +157,25 @@ func (ctx *pushCtx) reset() {
|
||||
}
|
||||
|
||||
func getPushCtx() *pushCtx {
|
||||
if v := pushCtxPool.Get(); v != nil {
|
||||
return v.(*pushCtx)
|
||||
select {
|
||||
case ctx := <-pushCtxPoolCh:
|
||||
return ctx
|
||||
default:
|
||||
if v := pushCtxPool.Get(); v != nil {
|
||||
return v.(*pushCtx)
|
||||
}
|
||||
return &pushCtx{}
|
||||
}
|
||||
return &pushCtx{}
|
||||
}
|
||||
|
||||
func putPushCtx(ctx *pushCtx) {
|
||||
ctx.reset()
|
||||
pushCtxPool.Put(ctx)
|
||||
select {
|
||||
case pushCtxPoolCh <- ctx:
|
||||
default:
|
||||
pushCtxPool.Put(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
var pushCtxPool sync.Pool
|
||||
var pushCtxPoolCh = make(chan *pushCtx, cgroup.AvailableCPUs())
|
||||
|
||||
@@ -8,19 +8,14 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogsketches"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/newrelic"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentelemetry"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/prometheusimport"
|
||||
@@ -30,11 +25,10 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/influxutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/influxutils"
|
||||
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
|
||||
influxserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/influx"
|
||||
opentsdbserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdb"
|
||||
@@ -42,20 +36,17 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "TCP address to listen for incoming http requests. "+
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . "+
|
||||
"See also -influxListenAddr.useProxyProtocol")
|
||||
@@ -65,23 +56,19 @@ var (
|
||||
"See also -graphiteListenAddr.useProxyProtocol")
|
||||
graphiteUseProxyProtocol = flag.Bool("graphiteListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -graphiteListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpenTSDB metrics. "+
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
"Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol")
|
||||
opentsdbUseProxyProtocol = flag.Bool("opentsdbListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. "+
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. "+
|
||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
||||
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . "+
|
||||
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag")
|
||||
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 0, "The maximum number of labels per time series to be accepted. Series with superfluous labels are ignored. In this case the vm_rows_ignored_total{reason=\"too_many_labels\"} metric at /metrics page is incremented")
|
||||
maxLabelNameLen = flag.Int("maxLabelNameLen", 0, "The maximum length of label names in the accepted time series. Series with longer label name are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_name\"} metric at /metrics page is incremented")
|
||||
maxLabelValueLen = flag.Int("maxLabelValueLen", 0, "The maximum length of label values in the accepted time series. Series with longer label value are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_value\"} metric at /metrics page is incremented")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -98,15 +85,6 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// vmagent is optimized for reduced memory allocations,
|
||||
// so it can run with the reduced GOGC in order to reduce the used memory,
|
||||
// while keeping CPU usage spent in GC at low levels.
|
||||
//
|
||||
// Some workloads may need increased GOGC values. Then such values can be set via GOGC environment variable.
|
||||
// It is recommended increasing GOGC if go_memstats_gc_cpu_fraction metric exposed at /metrics page
|
||||
// exceeds 0.05 for extended periods of time.
|
||||
cgroup.SetGOGC(50)
|
||||
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
@@ -114,41 +92,33 @@ func main() {
|
||||
remotewrite.InitSecretFlags()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen)
|
||||
pushmetrics.Init()
|
||||
|
||||
if promscrape.IsDryRun() {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("-promscrape.config is ok; exiting with 0 status code")
|
||||
logger.Infof("-promscrape.config is ok; exitting with 0 status code")
|
||||
return
|
||||
}
|
||||
if *dryRun {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
if err := remotewrite.CheckRelabelConfigs(); err != nil {
|
||||
logger.Fatalf("error when checking relabel configs: %s", err)
|
||||
}
|
||||
if err := remotewrite.CheckStreamAggrConfigs(); err != nil {
|
||||
logger.Fatalf("error when checking -streamAggr.config and -remoteWrite.streamAggr.config: %s", err)
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("all the configs are ok; exiting with 0 status code")
|
||||
logger.Infof("all the configs are ok; exitting with 0 status code")
|
||||
return
|
||||
}
|
||||
|
||||
listenAddrs := *httpListenAddrs
|
||||
if len(listenAddrs) == 0 {
|
||||
listenAddrs = []string{":8429"}
|
||||
}
|
||||
logger.Infof("starting vmagent at %q...", listenAddrs)
|
||||
logger.Infof("starting vmagent at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
remotewrite.StartIngestionRateLimiter()
|
||||
remotewrite.Init()
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
common.StartUnmarshalWorkers()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(nil, r, "")
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
})
|
||||
}
|
||||
if len(*graphiteListenAddr) > 0 {
|
||||
@@ -163,25 +133,24 @@ func main() {
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
|
||||
promscrape.Init(remotewrite.Push)
|
||||
|
||||
go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{
|
||||
UseProxyProtocol: useProxyProtocol,
|
||||
})
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
}
|
||||
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
remotewrite.StopIngestionRateLimiter()
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime = time.Now()
|
||||
logger.Infof("gracefully shutting down webservice at %q", listenAddrs)
|
||||
if err := httpserver.Stop(listenAddrs); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
if len(*httpListenAddr) > 0 {
|
||||
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
|
||||
if err := httpserver.Stop(*httpListenAddr); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
}
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
promscrape.Stop()
|
||||
|
||||
@@ -197,7 +166,7 @@ func main() {
|
||||
if len(*opentsdbHTTPListenAddr) > 0 {
|
||||
opentsdbhttpServer.MustStop()
|
||||
}
|
||||
protoparserutil.StopUnmarshalWorkers()
|
||||
common.StopUnmarshalWorkers()
|
||||
remotewrite.Stop()
|
||||
|
||||
logger.Infof("successfully stopped vmagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
@@ -206,7 +175,7 @@ func main() {
|
||||
func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
|
||||
if !remotewrite.MultitenancyEnabled() {
|
||||
return func(req *http.Request) error {
|
||||
path := strings.ReplaceAll(req.URL.Path, "//", "/")
|
||||
path := strings.Replace(req.URL.Path, "//", "/", -1)
|
||||
if path != "/api/put" {
|
||||
return fmt.Errorf("unsupported path requested: %q; expecting '/api/put'", path)
|
||||
}
|
||||
@@ -214,7 +183,7 @@ func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
|
||||
}
|
||||
}
|
||||
return func(req *http.Request) error {
|
||||
path := strings.ReplaceAll(req.URL.Path, "//", "/")
|
||||
path := strings.Replace(req.URL.Path, "//", "/", -1)
|
||||
at, err := getAuthTokenFromPath(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain auth token from path %q: %w", path, err)
|
||||
@@ -234,17 +203,17 @@ func getAuthTokenFromPath(path string) (*auth.Token, error) {
|
||||
if p.Suffix != "opentsdb/api/put" {
|
||||
return nil, fmt.Errorf("unsupported path requested: %q; expecting 'opentsdb/api/put'", p.Suffix)
|
||||
}
|
||||
return auth.NewTokenPossibleMultitenant(p.AuthToken)
|
||||
return auth.NewToken(p.AuthToken)
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
if r.Method != "GET" {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>vmagent</h2>")
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/vmagent.html'>https://docs.victoriametrics.com/vmagent.html</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
{"targets", "status for discovered active targets"},
|
||||
@@ -259,7 +228,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
path := strings.ReplaceAll(r.URL.Path, "//", "/")
|
||||
path := strings.Replace(r.URL.Path, "//", "/", -1)
|
||||
if strings.HasPrefix(path, "/prometheus/api/v1/import/prometheus") || strings.HasPrefix(path, "/api/v1/import/prometheus") {
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(nil, r); err != nil {
|
||||
@@ -277,16 +246,13 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.WriteHeader(statusCode)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/datadog/") {
|
||||
if strings.HasPrefix(path, "datadog/") {
|
||||
// Trim suffix from paths starting from /datadog/ in order to support legacy DataDog agent.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2670
|
||||
path = strings.TrimSuffix(path, "/")
|
||||
}
|
||||
switch path {
|
||||
case "/prometheus/api/v1/write", "/api/v1/write", "/api/v1/push", "/prometheus/api/v1/push":
|
||||
if protoparserutil.HandleVMProtoServerHandshake(w, r) {
|
||||
return true
|
||||
}
|
||||
case "/prometheus/api/v1/write", "/api/v1/write":
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(nil, r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
@@ -333,59 +299,12 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/influx/query", "/query":
|
||||
influxQueryRequests.Inc()
|
||||
influxutil.WriteDatabaseNames(w)
|
||||
return true
|
||||
case "/influx/health":
|
||||
influxHealthRequests.Inc()
|
||||
influxutil.WriteHealthCheckResponse(w)
|
||||
return true
|
||||
case "/opentelemetry/api/v1/push", "/opentelemetry/v1/metrics":
|
||||
opentelemetryPushRequests.Inc()
|
||||
if err := opentelemetry.InsertHandler(nil, r); err != nil {
|
||||
opentelemetryPushErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
firehose.WriteSuccessResponse(w, r)
|
||||
return true
|
||||
case "/newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/newrelic/inventory/deltas":
|
||||
newrelicInventoryRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
|
||||
return true
|
||||
case "/newrelic/infra/v2/metrics/events/bulk":
|
||||
newrelicWriteRequests.Inc()
|
||||
if err := newrelic.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
newrelicWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
influxutils.WriteDatabaseNames(w)
|
||||
return true
|
||||
case "/datadog/api/v1/series":
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -394,15 +313,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/beta/sketches":
|
||||
datadogsketchesWriteRequests.Inc()
|
||||
if err := datadogsketches.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogsketchesWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(202)
|
||||
return true
|
||||
case "/datadog/api/v1/validate":
|
||||
datadogValidateRequests.Inc()
|
||||
// See https://docs.datadoghq.com/api/latest/authentication/#validate-api-key
|
||||
@@ -445,10 +355,8 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/prometheus/api/v1/targets", "/api/v1/targets":
|
||||
promscrapeAPIV1TargetsRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
// https://prometheus.io/docs/prometheus/latest/querying/api/#targets
|
||||
state := r.FormValue("state")
|
||||
scrapePool := r.FormValue("scrapePool")
|
||||
promscrape.WriteAPIV1Targets(w, state, scrapePool)
|
||||
promscrape.WriteAPIV1Targets(w, state)
|
||||
return true
|
||||
case "/prometheus/target_response", "/target_response":
|
||||
promscrapeTargetResponseRequests.Inc()
|
||||
@@ -459,7 +367,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return true
|
||||
case "/prometheus/config", "/config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeConfigRequests.Inc()
|
||||
@@ -468,25 +376,22 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/prometheus/api/v1/status/config", "/api/v1/status/config":
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#config
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeStatusConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
var bb bytesutil.ByteBuffer
|
||||
promscrape.WriteConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%q}}`, bb.B)
|
||||
return true
|
||||
case "/prometheus/-/reload", "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
||||
return true
|
||||
}
|
||||
promscrapeConfigReloadRequests.Inc()
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "/ready":
|
||||
if rdy := promscrape.PendingScrapeConfigs.Load(); rdy > 0 {
|
||||
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
|
||||
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)
|
||||
http.Error(w, errMsg, http.StatusTooEarly)
|
||||
} else {
|
||||
@@ -517,7 +422,7 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
httpserver.Errorf(w, r, `unsupported multitenant prefix: %q; expected "insert"`, p.Prefix)
|
||||
return true
|
||||
}
|
||||
at, err := auth.NewTokenPossibleMultitenant(p.AuthToken)
|
||||
at, err := auth.NewToken(p.AuthToken)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot obtain auth token: %s", err)
|
||||
return true
|
||||
@@ -529,13 +434,7 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
statusCode := http.StatusNoContent
|
||||
if strings.HasPrefix(p.Suffix, "prometheus/api/v1/import/prometheus/metrics/job/") {
|
||||
// Return 200 status code for pushgateway requests.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
|
||||
statusCode = http.StatusOK
|
||||
}
|
||||
w.WriteHeader(statusCode)
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(p.Suffix, "datadog/") {
|
||||
@@ -544,7 +443,7 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
p.Suffix = strings.TrimSuffix(p.Suffix, "/")
|
||||
}
|
||||
switch p.Suffix {
|
||||
case "prometheus/", "prometheus", "prometheus/api/v1/write", "prometheus/api/v1/push":
|
||||
case "prometheus/", "prometheus", "prometheus/api/v1/write":
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(at, r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
@@ -591,58 +490,12 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
return true
|
||||
case "influx/query":
|
||||
influxQueryRequests.Inc()
|
||||
influxutil.WriteDatabaseNames(w)
|
||||
return true
|
||||
case "influx/health":
|
||||
influxHealthRequests.Inc()
|
||||
influxutil.WriteHealthCheckResponse(w)
|
||||
return true
|
||||
case "opentelemetry/api/v1/push", "opentelemetry/v1/metrics":
|
||||
opentelemetryPushRequests.Inc()
|
||||
if err := opentelemetry.InsertHandler(at, r); err != nil {
|
||||
opentelemetryPushErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
firehose.WriteSuccessResponse(w, r)
|
||||
return true
|
||||
case "newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "newrelic/inventory/deltas":
|
||||
newrelicInventoryRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
|
||||
return true
|
||||
case "newrelic/infra/v2/metrics/events/bulk":
|
||||
newrelicWriteRequests.Inc()
|
||||
if err := newrelic.InsertHandlerForHTTP(at, r); err != nil {
|
||||
newrelicWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
influxutils.WriteDatabaseNames(w)
|
||||
return true
|
||||
case "datadog/api/v1/series":
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -650,15 +503,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/beta/sketches":
|
||||
datadogsketchesWriteRequests.Inc()
|
||||
if err := datadogsketches.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogsketchesWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(202)
|
||||
return true
|
||||
case "datadog/api/v1/validate":
|
||||
datadogValidateRequests.Inc()
|
||||
// See https://docs.datadoghq.com/api/latest/authentication/#validate-api-key
|
||||
@@ -707,32 +551,16 @@ var (
|
||||
influxWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/write", protocol="influx"}`)
|
||||
influxWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/influx/write", protocol="influx"}`)
|
||||
|
||||
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
|
||||
influxHealthRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/health", protocol="influx"}`)
|
||||
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
|
||||
|
||||
datadogv1WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogv1WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
|
||||
datadogv2WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
datadogv2WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
|
||||
datadogsketchesWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/beta/sketches", protocol="datadog"}`)
|
||||
datadogsketchesWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/beta/sketches", protocol="datadog"}`)
|
||||
datadogWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
|
||||
datadogValidateRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/validate", protocol="datadog"}`)
|
||||
datadogCheckRunRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/check_run", protocol="datadog"}`)
|
||||
datadogIntakeRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/intake", protocol="datadog"}`)
|
||||
datadogMetadataRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/metadata", protocol="datadog"}`)
|
||||
|
||||
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
|
||||
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
|
||||
newrelicInventoryRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/inventory/deltas", protocol="newrelic"}`)
|
||||
newrelicCheckRequest = metrics.NewCounter(`vm_http_requests_total{path="/newrelic", protocol="newrelic"}`)
|
||||
|
||||
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
|
||||
promscrapeServiceDiscoveryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/service-discovery"}`)
|
||||
|
||||
@@ -754,7 +582,7 @@ func usage() {
|
||||
const s = `
|
||||
vmagent collects metrics data via popular data ingestion protocols and routes it to VictoriaMetrics.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image=non-existing
|
||||
ARG root_image=non-existing
|
||||
FROM $certs_image AS certs
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
@@ -9,5 +9,4 @@ COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certifica
|
||||
EXPOSE 8429
|
||||
ENTRYPOINT ["/vmagent-prod"]
|
||||
ARG TARGETARCH
|
||||
ARG BINARY_SUFFIX=non-existing
|
||||
COPY vmagent-linux-${TARGETARCH}-prod${BINARY_SUFFIX} ./vmagent-prod
|
||||
COPY vmagent-linux-${TARGETARCH}-prod ./vmagent-prod
|
||||
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -25,17 +25,17 @@ var (
|
||||
//
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, encoding, func(block *stream.Block) error {
|
||||
isGzip := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
|
||||
return insertRows(at, block, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompb.Label) error {
|
||||
func insertRows(at *auth.Token, block *parser.Block, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -53,13 +53,13 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompb.Label)
|
||||
samples := ctx.Samples[:0]
|
||||
mn := &block.MetricName
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: bytesutil.ToUnsafeString(mn.MetricGroup),
|
||||
})
|
||||
for j := range mn.Tags {
|
||||
tag := &mn.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
||||
})
|
||||
@@ -72,20 +72,18 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompb.Label)
|
||||
}
|
||||
samplesLen := len(samples)
|
||||
for j, value := range values {
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: value,
|
||||
Timestamp: timestamps[j],
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
package newrelic
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="newrelic"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="newrelic"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="newrelic"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for NewRelic POST /infra/v2/metrics/events/bulk request.
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, encoding, func(rows []newrelic.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
samplesCount := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
tags := r.Tags
|
||||
srcSamples := r.Samples
|
||||
for j := range srcSamples {
|
||||
s := &srcSamples[j]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "__name__",
|
||||
Value: bytesutil.ToUnsafeString(s.Name),
|
||||
})
|
||||
for k := range tags {
|
||||
t := &tags[k]
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: bytesutil.ToUnsafeString(t.Key),
|
||||
Value: bytesutil.ToUnsafeString(t.Value),
|
||||
})
|
||||
}
|
||||
samples = append(samples, prompb.Sample{
|
||||
Value: s.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
labels = append(labels, extraLabels...)
|
||||
}
|
||||
samplesCount += len(srcSamples)
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
}
|
||||
rowsPerInsert.Update(float64(samplesCount))
|
||||
return nil
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package opentelemetry
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="opentelemetry"}`)
|
||||
metadataInserted = metrics.NewCounter(`vmagent_metadata_inserted_total{type="opentelemetry"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="opentelemetry"}`)
|
||||
metadataTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_metadata_total{type="opentelemetry"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes opentelemetry metrics.
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
var processBody func([]byte) ([]byte, error)
|
||||
if req.Header.Get("Content-Type") == "application/json" {
|
||||
if req.Header.Get("X-Amz-Firehose-Protocol-Version") != "" {
|
||||
processBody = firehose.ProcessRequestBody
|
||||
} else {
|
||||
return fmt.Errorf("json encoding isn't supported for opentelemetry format. Use protobuf encoding")
|
||||
}
|
||||
}
|
||||
return stream.ParseStream(req.Body, encoding, processBody, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
||||
return insertRows(at, tss, mms, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabels []prompb.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
rowsTotal += len(ts.Samples)
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, ts.Labels...)
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
samples = append(samples, ts.Samples...)
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
|
||||
var metadataTotal int
|
||||
if promscrape.IsMetadataEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mm.AccountID = accountID
|
||||
mm.ProjectID = projectID
|
||||
}
|
||||
}
|
||||
ctx.WriteRequest.Metadata = mms
|
||||
metadataTotal = len(mms)
|
||||
}
|
||||
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
metadataInserted.Add(metadataTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
metadataTenantInserted.Get(at).Add(metadataTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -5,9 +5,8 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -20,7 +19,7 @@ var (
|
||||
//
|
||||
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return stream.Parse(r, insertRows)
|
||||
return parser.ParseStream(r, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
@@ -33,22 +32,22 @@ func insertRows(rows []parser.Row) error {
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: r.Metric,
|
||||
})
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
@@ -56,9 +55,7 @@ func insertRows(rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(nil, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -6,10 +6,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -21,16 +20,16 @@ var (
|
||||
// InsertHandler processes HTTP OpenTSDB put requests.
|
||||
// See http://opentsdb.net/docs/build/html/api_http/put.html
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return stream.Parse(req, func(rows []opentsdbhttp.Row) error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []opentsdbhttp.Row, extraLabels []prompb.Label) error {
|
||||
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -40,23 +39,23 @@ func insertRows(at *auth.Token, rows []opentsdbhttp.Row, extraLabels []prompb.La
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: r.Metric,
|
||||
})
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
@@ -64,9 +63,7 @@ func insertRows(at *auth.Token, rows []opentsdbhttp.Row, extraLabels []prompb.La
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -7,103 +7,75 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="prometheus"}`)
|
||||
metadataInserted = metrics.NewCounter(`vmagent_metadata_inserted_total{type="prometheus"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="prometheus"}`)
|
||||
metadataTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_metadata_total{type="prometheus"}`)
|
||||
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="prometheus"}`)
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="prometheus"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="prometheus"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="prometheus"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes `/api/v1/import/prometheus` request.
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defaultTimestamp, err := protoparserutil.GetTimestamp(req)
|
||||
defaultTimestamp, err := parserCommon.GetTimestamp(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
|
||||
return insertRows(at, rows, mms, extraLabels)
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, func(s string) {
|
||||
httpserver.LogError(req, s)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata, extraLabels []prompb.Label) error {
|
||||
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
mmsDst := ctx.WriteRequest.Metadata[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: r.Metric,
|
||||
})
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
}
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
}
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mmsDst = append(mmsDst, prompb.MetricMetadata{
|
||||
MetricFamilyName: mm.Metric,
|
||||
Help: mm.Help,
|
||||
Type: mm.Type,
|
||||
// there is no unit in Prometheus exposition formats
|
||||
|
||||
AccountID: accountID,
|
||||
ProjectID: projectID,
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.WriteRequest.Metadata = mmsDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
metadataInserted.Add(len(mms))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
metadataTenantInserted.Get(at).Add(len(mms))
|
||||
}
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -23,14 +23,14 @@ var (
|
||||
func TestInsertHandler(t *testing.T) {
|
||||
setUp()
|
||||
defer tearDown()
|
||||
req := httptest.NewRequest(http.MethodPost, "/insert/0/api/v1/import/prometheus", bytes.NewBufferString(`{"foo":"bar"}
|
||||
req := httptest.NewRequest("POST", "/insert/0/api/v1/import/prometheus", bytes.NewBufferString(`{"foo":"bar"}
|
||||
go_memstats_alloc_bytes_total 1`))
|
||||
if err := InsertHandler(nil, req); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
t.Errorf("unxepected error %s", err)
|
||||
}
|
||||
expectedMsg := "cannot unmarshal Prometheus line"
|
||||
if !strings.Contains(testOutput.String(), expectedMsg) {
|
||||
t.Fatalf("output %q should contain %q", testOutput.String(), expectedMsg)
|
||||
t.Errorf("output %q should contain %q", testOutput.String(), expectedMsg)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,17 +44,17 @@ func setUp() {
|
||||
log.Fatalf("unable to set %q with value %q, err: %v", remoteWriteFlag, srv.URL, err)
|
||||
}
|
||||
logger.Init()
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
common.StartUnmarshalWorkers()
|
||||
remotewrite.Init()
|
||||
testOutput = &bytes.Buffer{}
|
||||
logger.SetOutputForTests(testOutput)
|
||||
}
|
||||
|
||||
func tearDown() {
|
||||
protoparserutil.StopUnmarshalWorkers()
|
||||
common.StopUnmarshalWorkers()
|
||||
srv.Close()
|
||||
logger.ResetOutputForTest()
|
||||
tmpDataDir := flag.Lookup("remoteWrite.tmpDataPath").Value.String()
|
||||
fs.MustRemoveDir(tmpDataDir)
|
||||
fs.MustRemoveAll(tmpDataDir)
|
||||
|
||||
}
|
||||
|
||||
@@ -6,41 +6,38 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="promremotewrite"}`)
|
||||
metadataInserted = metrics.NewCounter(`vmagent_metadata_inserted_total{type="promremotewrite"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="promremotewrite"}`)
|
||||
metadataTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_metadata_total{type="promremotewrite"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="promremotewrite"}`)
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="promremotewrite"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="promremotewrite"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="promremotewrite"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes remote write for prometheus.
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
isVMRemoteWrite := req.Header.Get("Content-Encoding") == "zstd"
|
||||
return stream.Parse(req.Body, isVMRemoteWrite, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
||||
return insertRows(at, tss, mms, extraLabels)
|
||||
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabels []prompb.Label) error {
|
||||
func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
mmsDst := ctx.WriteRequest.Metadata[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range timeseries {
|
||||
@@ -49,61 +46,33 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
||||
labelsLen := len(labels)
|
||||
for i := range ts.Labels {
|
||||
label := &ts.Labels[i]
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: label.Name,
|
||||
Value: label.Value,
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(label.Name),
|
||||
Value: bytesutil.ToUnsafeString(label.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for i := range ts.Samples {
|
||||
sample := &ts.Samples[i]
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: sample.Value,
|
||||
Timestamp: sample.Timestamp,
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
|
||||
var metadataTotal int
|
||||
if promscrape.IsMetadataEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
}
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mmsDst = append(mmsDst, prompb.MetricMetadata{
|
||||
MetricFamilyName: mm.MetricFamilyName,
|
||||
Help: mm.Help,
|
||||
Type: mm.Type,
|
||||
Unit: mm.Unit,
|
||||
|
||||
AccountID: accountID,
|
||||
ProjectID: projectID,
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Metadata = mmsDst
|
||||
metadataTotal = len(mms)
|
||||
}
|
||||
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
metadataTenantInserted.Get(at).Add(metadataTotal)
|
||||
}
|
||||
metadataInserted.Add(metadataTotal)
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,58 +2,39 @@ package remotewrite
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var (
|
||||
forcePromProto = flagutil.NewArrayBool("remoteWrite.forcePromProto", "Whether to force Prometheus remote write protocol for sending data "+
|
||||
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol")
|
||||
forceVMProto = flagutil.NewArrayBool("remoteWrite.forceVMProto", "Whether to force VictoriaMetrics remote write protocol for sending data "+
|
||||
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol")
|
||||
|
||||
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", 0, "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"is sent after temporary unavailability of the remote storage. See also -maxIngestionRate")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
|
||||
retryMinInterval = flagutil.NewArrayDuration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
|
||||
// deprecated in the future. use -remoteWrite.retryMaxInterval instead
|
||||
retryMaxTime = flagutil.NewArrayDuration("remoteWrite.retryMaxTime", time.Minute, "The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. This flag is deprecated, use -remoteWrite.retryMaxInterval instead")
|
||||
retryMaxInterval = flagutil.NewArrayDuration("remoteWrite.retryMaxInterval", time.Minute, "The maximum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. The delay doubles with each retry until this maximum is reached, after which it remains constant. See also -remoteWrite.retryMinInterval")
|
||||
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
|
||||
"By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"is sent after temporary unavailability of the remote storage")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
|
||||
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
|
||||
tlsHandshakeTimeout = flagutil.NewArrayDuration("remoteWrite.tlsHandshakeTimeout", 20*time.Second, "The timeout for establishing tls connections to the corresponding -remoteWrite.url")
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("remoteWrite.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCertFile = flagutil.NewArrayString("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting "+
|
||||
"to the corresponding -remoteWrite.url")
|
||||
tlsKeyFile = flagutil.NewArrayString("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCAFile = flagutil.NewArrayString("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. "+
|
||||
"By default, system CA is used")
|
||||
"By default system CA is used")
|
||||
tlsServerName = flagutil.NewArrayString("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to the corresponding -remoteWrite.url. "+
|
||||
"By default, the server name from -remoteWrite.url is used")
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
|
||||
headers = flagutil.NewArrayString("remoteWrite.headers", "Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. "+
|
||||
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. "+
|
||||
@@ -70,10 +51,8 @@ var (
|
||||
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("remoteWrite.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
|
||||
awsUseSigv4 = flagutil.NewArrayBool("remoteWrite.aws.useSigv4", "Enables SigV4 request signing for the corresponding -remoteWrite.url. "+
|
||||
"It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled")
|
||||
@@ -90,22 +69,14 @@ var (
|
||||
type client struct {
|
||||
sanitizedURL string
|
||||
remoteWriteURL string
|
||||
|
||||
// Whether to use VictoriaMetrics remote write protocol for sending the data to remoteWriteURL
|
||||
useVMProto atomic.Bool
|
||||
canDowngradeVMProto atomic.Bool
|
||||
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
retryMinInterval time.Duration
|
||||
retryMaxInterval time.Duration
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
sendBlock func(block []byte) bool
|
||||
authCfg *promauth.Config
|
||||
awsCfg *awsapi.Config
|
||||
|
||||
rl *ratelimiter.RateLimiter
|
||||
rl rateLimiter
|
||||
|
||||
bytesSent *metrics.Counter
|
||||
blocksSent *metrics.Counter
|
||||
@@ -124,20 +95,22 @@ type client struct {
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
authCfg, err := getAuthConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tlsCfg := authCfg.NewTLSConfig()
|
||||
awsCfg, err := getAWSAPIConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize AWS Config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Fatalf("FATAL: cannot initialize AWS Config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tr := &http.Transport{
|
||||
DialContext: statDial,
|
||||
TLSClientConfig: tlsCfg,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
MaxConnsPerHost: 2 * concurrency,
|
||||
MaxIdleConnsPerHost: 2 * concurrency,
|
||||
IdleConnTimeout: time.Minute,
|
||||
WriteBufferSize: 64 * 1024,
|
||||
}
|
||||
|
||||
tr := httputil.NewTransport(false, "vmagent_remotewrite")
|
||||
tr.TLSHandshakeTimeout = tlsHandshakeTimeout.GetOptionalArg(argIdx)
|
||||
tr.MaxConnsPerHost = 2 * concurrency
|
||||
tr.MaxIdleConnsPerHost = 2 * concurrency
|
||||
tr.IdleConnTimeout = time.Minute
|
||||
tr.WriteBufferSize = 64 * 1024
|
||||
|
||||
pURL := proxyURL.GetOptionalArg(argIdx)
|
||||
if len(pURL) > 0 {
|
||||
if !strings.Contains(pURL, "://") {
|
||||
@@ -149,52 +122,33 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
|
||||
}
|
||||
tr.Proxy = http.ProxyURL(pu)
|
||||
}
|
||||
hc := &http.Client{
|
||||
Transport: authCfg.NewRoundTripper(tr),
|
||||
Timeout: sendTimeout.GetOptionalArg(argIdx),
|
||||
}
|
||||
retryMaxIntervalFlag := retryMaxTime
|
||||
if retryMaxInterval.String() != "" {
|
||||
retryMaxIntervalFlag = retryMaxInterval
|
||||
}
|
||||
c := &client{
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
authCfg: authCfg,
|
||||
awsCfg: awsCfg,
|
||||
fq: fq,
|
||||
hc: hc,
|
||||
retryMinInterval: retryMinInterval.GetOptionalArg(argIdx),
|
||||
retryMaxInterval: retryMaxIntervalFlag.GetOptionalArg(argIdx),
|
||||
stopCh: make(chan struct{}),
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
authCfg: authCfg,
|
||||
awsCfg: awsCfg,
|
||||
fq: fq,
|
||||
hc: &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
|
||||
},
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.sendBlock = c.sendBlockHTTP
|
||||
|
||||
useVMProto := forceVMProto.GetOptionalArg(argIdx)
|
||||
usePromProto := forcePromProto.GetOptionalArg(argIdx)
|
||||
if useVMProto && usePromProto {
|
||||
logger.Fatalf("-remoteWrite.useVMProto and -remoteWrite.usePromProto cannot be set simultaneously for -remoteWrite.url=%s", sanitizedURL)
|
||||
}
|
||||
if !useVMProto && !usePromProto {
|
||||
// The VM protocol could be downgraded later at runtime if unsupported media type response status is received.
|
||||
useVMProto = true
|
||||
c.canDowngradeVMProto.Store(true)
|
||||
}
|
||||
c.useVMProto.Store(useVMProto)
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
limitReached := metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
if bytesPerSec := rateLimit.GetOptionalArg(argIdx); bytesPerSec > 0 {
|
||||
if bytesPerSec := rateLimit.GetOptionalArgOrDefault(argIdx, 0); bytesPerSec > 0 {
|
||||
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
|
||||
c.rl = ratelimiter.New(int64(bytesPerSec), limitReached, c.stopCh)
|
||||
c.rl.perSecondLimit = int64(bytesPerSec)
|
||||
}
|
||||
c.rl.limitReached = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
|
||||
c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.rateLimit = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_rate_limit{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(rateLimit.GetOptionalArg(argIdx))
|
||||
return float64(rateLimit.GetOptionalArgOrDefault(argIdx, 0))
|
||||
})
|
||||
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL))
|
||||
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL))
|
||||
@@ -246,16 +200,10 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
clientSecret := oauth2ClientSecret.GetOptionalArg(argIdx)
|
||||
clientSecretFile := oauth2ClientSecretFile.GetOptionalArg(argIdx)
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(argIdx)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
oauth2Cfg = &promauth.OAuth2Config{
|
||||
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
|
||||
}
|
||||
@@ -279,7 +227,7 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
}
|
||||
authCfg, err := opts.NewConfig()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot populate auth config for remoteWrite idx: %d, err: %w", argIdx, err)
|
||||
return nil, fmt.Errorf("cannot populate OAuth2 config for remoteWrite idx: %d, err: %w", argIdx, err)
|
||||
}
|
||||
return authCfg, nil
|
||||
}
|
||||
@@ -311,11 +259,6 @@ func (c *client) runWorker() {
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if len(block) == 0 {
|
||||
// skip empty data blocks from sending
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6241
|
||||
continue
|
||||
}
|
||||
go func() {
|
||||
startTime := time.Now()
|
||||
ch <- c.sendBlock(block)
|
||||
@@ -328,7 +271,7 @@ func (c *client) runWorker() {
|
||||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
c.fq.MustWriteBlock(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
@@ -337,94 +280,57 @@ func (c *client) runWorker() {
|
||||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
c.fq.MustWriteBlock(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
c.fq.MustWriteBlock(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
|
||||
req, err := c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// sendBlockHTTP returns false only if c.stopCh is closed.
|
||||
// Otherwise it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.register(len(block), c.stopCh)
|
||||
retryDuration := time.Second
|
||||
retriesCount := 0
|
||||
c.bytesSent.Add(len(block))
|
||||
c.blocksSent.Inc()
|
||||
sigv4Hash := ""
|
||||
if c.awsCfg != nil {
|
||||
sigv4Hash = awsapi.HashHex(block)
|
||||
}
|
||||
resp, err := c.hc.Do(req)
|
||||
if err == nil {
|
||||
return resp, nil
|
||||
}
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return nil, err
|
||||
}
|
||||
// It is likely connection became stale or timed out during the first request.
|
||||
// Make another attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, err = c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
reqBody := bytes.NewBuffer(body)
|
||||
req, err := http.NewRequest(http.MethodPost, url, reqBody)
|
||||
again:
|
||||
req, err := http.NewRequest("POST", c.remoteWriteURL, bytes.NewBuffer(block))
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
|
||||
}
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.sanitizedURL, err)
|
||||
}
|
||||
c.authCfg.SetHeaders(req, true)
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vmagent")
|
||||
h.Set("Content-Type", "application/x-protobuf")
|
||||
if encoding.IsZstd(body) {
|
||||
h.Set("Content-Encoding", "zstd")
|
||||
h.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
|
||||
} else {
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
}
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
if c.awsCfg != nil {
|
||||
sigv4Hash := awsapi.HashHex(body)
|
||||
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
|
||||
return nil, fmt.Errorf("cannot sign remoteWrite request with AWS sigv4: %w", err)
|
||||
// there is no need in retry, request will be rejected by client.Do and retried by code below
|
||||
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
|
||||
}
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// sendBlockHTTP sends the given block to c.remoteWriteURL.
|
||||
//
|
||||
// The function returns false only if c.stopCh is closed.
|
||||
// Otherwise, it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.Register(len(block))
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
|
||||
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
|
||||
retriesCount := 0
|
||||
|
||||
again:
|
||||
startTime := time.Now()
|
||||
resp, err := c.doRequest(c.remoteWriteURL, block)
|
||||
resp, err := c.hc.Do(req)
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
}
|
||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
logger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
@@ -437,59 +343,27 @@ again:
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
statusCode := resp.StatusCode
|
||||
if statusCode/100 == 2 {
|
||||
_ = resp.Body.Close()
|
||||
c.requestsOKCount.Inc()
|
||||
c.bytesSent.Add(len(block))
|
||||
c.blocksSent.Inc()
|
||||
return true
|
||||
}
|
||||
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.sanitizedURL, statusCode)).Inc()
|
||||
switch statusCode {
|
||||
case 409:
|
||||
logBlockRejected(block, c.sanitizedURL, resp)
|
||||
|
||||
// Just drop block on 409 status code like Prometheus does.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/873
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149
|
||||
if statusCode == 409 || statusCode == 400 {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
c.packetsDropped.Inc()
|
||||
return true
|
||||
// - Remote Write v1 specification implicitly expects a `400 Bad Request` when the encoding is not supported.
|
||||
// - Remote Write v2 specification explicitly specifies a `415 Unsupported Media Type` for unsupported encodings.
|
||||
// - Real-world implementations of v1 use both 400 and 415 status codes.
|
||||
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
|
||||
case 415, 400:
|
||||
if encoding.IsZstd(block) {
|
||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||
|
||||
zstdBlockLen := len(block)
|
||||
block, err = repackBlockFromZstdToSnappy(block)
|
||||
if err == nil {
|
||||
if c.canDowngradeVMProto.Swap(false) {
|
||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||
c.useVMProto.Store(false)
|
||||
}
|
||||
|
||||
c.retriesCount.Inc()
|
||||
_ = resp.Body.Close()
|
||||
goto again
|
||||
}
|
||||
|
||||
logger.Warnf("failed to repack zstd block (%s bytes) to snappy: %s; The block will be rejected. "+
|
||||
"Possible cause: ungraceful shutdown leading to persisted queue corruption.",
|
||||
zstdBlockLen, err)
|
||||
if err != nil {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; "+
|
||||
"failed to read response body: %s",
|
||||
len(block), c.sanitizedURL, statusCode, err)
|
||||
} else {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; response body: %s",
|
||||
len(block), c.sanitizedURL, statusCode, string(body))
|
||||
}
|
||||
|
||||
// Just drop snappy blocks on 400 or 415 status codes like Prometheus does.
|
||||
// Just drop block on 409 and 400 status codes like Prometheus does.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/873
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149
|
||||
logBlockRejected(block, c.sanitizedURL, resp)
|
||||
_ = resp.Body.Close()
|
||||
c.packetsDropped.Inc()
|
||||
return true
|
||||
@@ -497,10 +371,10 @@ again:
|
||||
|
||||
// Unexpected status code returned
|
||||
retriesCount++
|
||||
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
|
||||
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
|
||||
|
||||
// Handle response
|
||||
retryDuration *= 2
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
@@ -522,79 +396,45 @@ again:
|
||||
}
|
||||
|
||||
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
|
||||
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
|
||||
|
||||
// getRetryDuration returns retry duration.
|
||||
// retryAfterDuration has the highest priority.
|
||||
// If retryAfterDuration is not specified, retryDuration gets doubled.
|
||||
// retryDuration can't exceed maxRetryDuration.
|
||||
//
|
||||
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
|
||||
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
|
||||
// retryAfterDuration has the highest priority duration
|
||||
if retryAfterDuration > 0 {
|
||||
return timeutil.AddJitterToDuration(retryAfterDuration)
|
||||
}
|
||||
type rateLimiter struct {
|
||||
perSecondLimit int64
|
||||
|
||||
// default backoff retry policy
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
// mu protects budget and deadline from concurrent access.
|
||||
mu sync.Mutex
|
||||
|
||||
return retryDuration
|
||||
// The current budget. It is increased by perSecondLimit every second.
|
||||
budget int64
|
||||
|
||||
// The next deadline for increasing the budget by perSecondLimit
|
||||
deadline time.Time
|
||||
|
||||
limitReached *metrics.Counter
|
||||
}
|
||||
|
||||
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
|
||||
//
|
||||
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
|
||||
// failed to properly update the persisted queue files. In such cases, zstd decompression
|
||||
// will fail and an error will be returned.
|
||||
//
|
||||
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
||||
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
||||
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
||||
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("zstd: decompress: %s", err)
|
||||
func (rl *rateLimiter) register(dataLen int, stopCh <-chan struct{}) {
|
||||
limit := rl.perSecondLimit
|
||||
if limit <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
return snappy.Encode(nil, plainBlock), nil
|
||||
}
|
||||
|
||||
func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; "+
|
||||
"failed to read response body: %s",
|
||||
len(block), sanitizedURL, resp.StatusCode, err)
|
||||
} else {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; response body: %s",
|
||||
len(block), sanitizedURL, resp.StatusCode, string(body))
|
||||
}
|
||||
}
|
||||
|
||||
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
|
||||
// retryAfterString should be in either HTTP-date or a number of seconds.
|
||||
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
|
||||
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
|
||||
if retryAfterString == "" {
|
||||
return retryAfterDuration
|
||||
}
|
||||
|
||||
defer func() {
|
||||
v := retryAfterDuration.Seconds()
|
||||
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
|
||||
}()
|
||||
|
||||
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
|
||||
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
|
||||
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
|
||||
}
|
||||
// Retry-After could be in seconds.
|
||||
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
return 0
|
||||
rl.mu.Lock()
|
||||
defer rl.mu.Unlock()
|
||||
|
||||
for rl.budget <= 0 {
|
||||
if d := time.Until(rl.deadline); d > 0 {
|
||||
rl.limitReached.Inc()
|
||||
t := timerpool.Get(d)
|
||||
select {
|
||||
case <-stopCh:
|
||||
timerpool.Put(t)
|
||||
return
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
}
|
||||
rl.budget += limit
|
||||
rl.deadline = time.Now().Add(time.Second)
|
||||
}
|
||||
rl.budget -= int64(dataLen)
|
||||
}
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"math"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func TestCalculateRetryDuration(t *testing.T) {
|
||||
// `testFunc` call `calculateRetryDuration` for `n` times
|
||||
// and evaluate if the result of `calculateRetryDuration` is
|
||||
// 1. >= expectMinDuration
|
||||
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
|
||||
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||
}
|
||||
|
||||
expectMaxDuration := helper(expectMinDuration)
|
||||
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
|
||||
|
||||
if retryDuration < expectMinDuration || retryDuration > expectMaxDuration {
|
||||
t.Fatalf(
|
||||
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
|
||||
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
|
||||
retryDuration.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for 1 time.
|
||||
{
|
||||
// default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
// default backoff policy exceed max limit"
|
||||
f(0, 10*time.Minute, 1, time.Minute)
|
||||
|
||||
// retry after > default backoff policy
|
||||
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
|
||||
// retry after < default backoff policy
|
||||
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
|
||||
// retry after invalid and < default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for multiple times.
|
||||
{
|
||||
// default backoff policy 2 times
|
||||
f(0, time.Second, 2, 4*time.Second)
|
||||
// default backoff policy 3 times
|
||||
f(0, time.Second, 3, 8*time.Second)
|
||||
// default backoff policy N times exceed max limit
|
||||
f(0, time.Second, 10, time.Minute)
|
||||
|
||||
// retry after 120s 1 times
|
||||
f(120*time.Second, time.Second, 1, 120*time.Second)
|
||||
// retry after 120s 2 times
|
||||
f(120*time.Second, time.Second, 2, 120*time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRetryAfterHeader(t *testing.T) {
|
||||
f := func(retryAfterString string, expectResult time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
result := parseRetryAfterHeader(retryAfterString)
|
||||
// expect `expectResult == result` when retryAfterString is in seconds or invalid
|
||||
// expect the difference between result and expectResult to be lower than 10%
|
||||
if !(expectResult == result || math.Abs(float64(expectResult-result))/float64(expectResult) < 0.10) {
|
||||
t.Fatalf(
|
||||
"incorrect retry after duration, want (ms): %d, got (ms): %d",
|
||||
expectResult.Milliseconds(), result.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// retry after header in seconds
|
||||
f("10", 10*time.Second)
|
||||
// retry after header in date time
|
||||
f(time.Now().Add(30*time.Second).UTC().Format(http.TimeFormat), 30*time.Second)
|
||||
// retry after header invalid
|
||||
f("invalid-retry-after", 0)
|
||||
// retry after header not in GMT
|
||||
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
|
||||
}
|
||||
|
||||
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||
func helper(d time.Duration) time.Duration {
|
||||
dv := d / 10
|
||||
if dv > 10*time.Second {
|
||||
dv = 10 * time.Second
|
||||
}
|
||||
|
||||
return d + dv
|
||||
}
|
||||
|
||||
func TestRepackBlockFromZstdToSnappy(t *testing.T) {
|
||||
expectedPlainBlock := []byte(`foobar`)
|
||||
|
||||
zstdBlock := encoding.CompressZSTDLevel(nil, expectedPlainBlock, 1)
|
||||
snappyBlock, err := repackBlockFromZstdToSnappy(zstdBlock)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
actualPlainBlock, err := snappy.Decode(nil, snappyBlock)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if string(actualPlainBlock) != string(expectedPlainBlock) {
|
||||
t.Fatalf("unexpected plain block; got %q; want %q", actualPlainBlock, expectedPlainBlock)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRepackBlockFromZstdToSnappyInvalidBlock(t *testing.T) {
|
||||
snappyBlock, err := repackBlockFromZstdToSnappy([]byte("invalid zstd block"))
|
||||
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for invalid zstd block; got nil")
|
||||
}
|
||||
if len(snappyBlock) != 0 {
|
||||
t.Fatalf("expected empty snappy block; got %d bytes", len(snappyBlock))
|
||||
}
|
||||
}
|
||||
@@ -7,30 +7,22 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var (
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
|
||||
"This option takes effect only when less than -remoteWrite.maxRowsPerBlock data points per -remoteWrite.flushInterval are pushed to -remoteWrite.url")
|
||||
"This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url")
|
||||
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxRowsPerBlock")
|
||||
maxRowsPerBlock = flag.Int("remoteWrite.maxRowsPerBlock", 10000, "The maximum number of samples to send in each block to remote storage. Higher number may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxBlockSize")
|
||||
maxMetadataPerBlock = flag.Int("remoteWrite.maxMetadataPerBlock", 5000, "The maximum number of metadata to send in each block to remote storage. Higher number may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxBlockSize")
|
||||
vmProtoCompressLevel = flag.Int("remoteWrite.vmProtoCompressLevel", 0, "The compression level for VictoriaMetrics remote write protocol. "+
|
||||
"Higher values reduce network traffic at the cost of higher CPU usage. Negative values reduce CPU usage at the cost of increased network traffic. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol")
|
||||
)
|
||||
|
||||
type pendingSeries struct {
|
||||
@@ -41,10 +33,9 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
func newPendingSeries(pushBlock func(block []byte), significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.fq = fq
|
||||
ps.wr.isVMRemoteWrite = isVMRemoteWrite
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
ps.stopCh = make(chan struct{})
|
||||
@@ -61,18 +52,10 @@ func (ps *pendingSeries) MustStop() {
|
||||
ps.periodicFlusherWG.Wait()
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) TryPushTimeSeries(tss []prompb.TimeSeries) bool {
|
||||
func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
|
||||
ps.mu.Lock()
|
||||
ok := ps.wr.tryPushTimeSeries(tss)
|
||||
ps.wr.push(tss)
|
||||
ps.mu.Unlock()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) TryPushMetadata(mms []prompb.MetricMetadata) bool {
|
||||
ps.mu.Lock()
|
||||
ok := ps.wr.tryPushMetadata(mms)
|
||||
ps.mu.Unlock()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) periodicFlusher() {
|
||||
@@ -80,111 +63,82 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
d := timeutil.AddJitterToDuration(*flushInterval)
|
||||
ticker := time.NewTicker(d)
|
||||
ticker := time.NewTicker(*flushInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
mustStop := false
|
||||
for !mustStop {
|
||||
select {
|
||||
case <-ps.stopCh:
|
||||
ps.mu.Lock()
|
||||
ps.wr.mustFlushOnStop()
|
||||
ps.mu.Unlock()
|
||||
return
|
||||
mustStop = true
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-ps.wr.lastFlushTime.Load() < uint64(flushSeconds) {
|
||||
if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
ps.mu.Lock()
|
||||
_ = ps.wr.tryFlush()
|
||||
ps.wr.flush()
|
||||
ps.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
type writeRequest struct {
|
||||
lastFlushTime atomic.Uint64
|
||||
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
|
||||
lastFlushTime uint64
|
||||
|
||||
// The queue to send blocks to.
|
||||
fq *persistentqueue.FastQueue
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite *atomic.Bool
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to fq.
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
significantFigures int
|
||||
|
||||
// How many decimal digits after point must be left before sending the writeRequest to fq.
|
||||
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
|
||||
roundDigits int
|
||||
|
||||
wr prompb.WriteRequest
|
||||
wr prompbmarshal.WriteRequest
|
||||
|
||||
tss []prompb.TimeSeries
|
||||
mms []prompb.MetricMetadata
|
||||
labels []prompb.Label
|
||||
samples []prompb.Sample
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
// buf holds labels data
|
||||
buf []byte
|
||||
// metadatabuf holds metadata data
|
||||
metadatabuf []byte
|
||||
labels []prompbmarshal.Label
|
||||
samples []prompbmarshal.Sample
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset lastFlushTime, fq, isVMRemoteWrite, significantFigures and roundDigits, since they are reused.
|
||||
// Do not reset pushBlock, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
wr.wr.Metadata = nil
|
||||
|
||||
clear(wr.tss)
|
||||
for i := range wr.tss {
|
||||
ts := &wr.tss[i]
|
||||
ts.Labels = nil
|
||||
ts.Samples = nil
|
||||
}
|
||||
wr.tss = wr.tss[:0]
|
||||
|
||||
clear(wr.mms)
|
||||
wr.mms = wr.mms[:0]
|
||||
|
||||
promrelabel.CleanLabels(wr.labels)
|
||||
wr.labels = wr.labels[:0]
|
||||
|
||||
wr.samples = wr.samples[:0]
|
||||
wr.buf = wr.buf[:0]
|
||||
wr.metadatabuf = wr.metadatabuf[:0]
|
||||
}
|
||||
|
||||
// mustFlushOnStop force pushes wr data into wr.fq
|
||||
//
|
||||
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
|
||||
func (wr *writeRequest) mustFlushOnStop() {
|
||||
func (wr *writeRequest) flush() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.wr.Metadata = wr.mms
|
||||
if !tryPushWriteRequest(&wr.wr, wr.mustWriteBlock, wr.isVMRemoteWrite.Load()) {
|
||||
logger.Panicf("BUG: final flush must always return true")
|
||||
}
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock)
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
func (wr *writeRequest) mustWriteBlock(block []byte) bool {
|
||||
wr.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryFlush() bool {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.wr.Metadata = wr.mms
|
||||
wr.lastFlushTime.Store(fasttime.UnixTimestamp())
|
||||
if !tryPushWriteRequest(&wr.wr, wr.fq.TryWriteBlock, wr.isVMRemoteWrite.Load()) {
|
||||
return false
|
||||
}
|
||||
wr.reset()
|
||||
return true
|
||||
}
|
||||
|
||||
func adjustSampleValues(samples []prompb.Sample, significantFigures, roundDigits int) {
|
||||
if n := significantFigures; n > 0 {
|
||||
func (wr *writeRequest) adjustSampleValues() {
|
||||
samples := wr.samples
|
||||
if n := wr.significantFigures; n > 0 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
|
||||
}
|
||||
}
|
||||
if n := roundDigits; n < 100 {
|
||||
if n := wr.roundDigits; n < 100 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
|
||||
@@ -192,236 +146,100 @@ func adjustSampleValues(samples []prompb.Sample, significantFigures, roundDigits
|
||||
}
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryPushMetadata(mms []prompb.MetricMetadata) bool {
|
||||
mmdDst := wr.mms
|
||||
maxMetadataPerBlock := *maxMetadataPerBlock
|
||||
for i := range mms {
|
||||
if len(wr.mms) >= maxMetadataPerBlock {
|
||||
if !wr.tryFlush() {
|
||||
return false
|
||||
}
|
||||
mmdDst = wr.mms
|
||||
}
|
||||
mmSrc := &mms[i]
|
||||
mmdDst = append(mmdDst, prompb.MetricMetadata{})
|
||||
wr.copyMetadata(&mmdDst[len(mmdDst)-1], mmSrc)
|
||||
}
|
||||
wr.mms = mmdDst
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
|
||||
// Direct copy for non-string fields, which are safe by value.
|
||||
dst.Type = src.Type
|
||||
dst.Unit = src.Unit
|
||||
|
||||
// Pre-allocate memory for all string fields.
|
||||
neededBufLen := len(src.MetricFamilyName) + len(src.Help)
|
||||
bufLen := len(wr.metadatabuf)
|
||||
wr.metadatabuf = slicesutil.SetLength(wr.metadatabuf, bufLen+neededBufLen)
|
||||
buf := wr.metadatabuf[:bufLen]
|
||||
|
||||
// Copy MetricFamilyName
|
||||
bufLen = len(buf)
|
||||
buf = append(buf, src.MetricFamilyName...)
|
||||
dst.MetricFamilyName = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
|
||||
// Copy Help
|
||||
bufLen = len(buf)
|
||||
buf = append(buf, src.Help...)
|
||||
dst.Help = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
|
||||
wr.metadatabuf = buf
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryPushTimeSeries(src []prompb.TimeSeries) bool {
|
||||
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
|
||||
tssDst := wr.tss
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
for i := range src {
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
|
||||
if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
|
||||
wr.tss = tssDst
|
||||
if !wr.tryFlush() {
|
||||
return false
|
||||
}
|
||||
wr.flush()
|
||||
tssDst = wr.tss
|
||||
}
|
||||
tsSrc := &src[i]
|
||||
adjustSampleValues(tsSrc.Samples, wr.significantFigures, wr.roundDigits)
|
||||
tssDst = append(tssDst, prompb.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], tsSrc)
|
||||
}
|
||||
|
||||
wr.tss = tssDst
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) copyTimeSeries(dst, src *prompb.TimeSeries) {
|
||||
labelsSrc := src.Labels
|
||||
|
||||
// Pre-allocate memory for labels.
|
||||
func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
labelsDst := wr.labels
|
||||
labelsLen := len(wr.labels)
|
||||
wr.labels = slicesutil.SetLength(wr.labels, labelsLen+len(labelsSrc))
|
||||
labelsDst := wr.labels[labelsLen:]
|
||||
samplesDst := wr.samples
|
||||
buf := wr.buf
|
||||
for i := range src.Labels {
|
||||
labelsDst = append(labelsDst, prompbmarshal.Label{})
|
||||
dstLabel := &labelsDst[len(labelsDst)-1]
|
||||
srcLabel := &src.Labels[i]
|
||||
|
||||
// Pre-allocate memory for byte slice needed for storing label names and values.
|
||||
neededBufLen := 0
|
||||
for i := range labelsSrc {
|
||||
label := &labelsSrc[i]
|
||||
neededBufLen += len(label.Name) + len(label.Value)
|
||||
}
|
||||
bufLen := len(wr.buf)
|
||||
wr.buf = slicesutil.SetLength(wr.buf, bufLen+neededBufLen)
|
||||
buf := wr.buf[:bufLen]
|
||||
|
||||
// Copy labels
|
||||
for i := range labelsSrc {
|
||||
dstLabel := &labelsDst[i]
|
||||
srcLabel := &labelsSrc[i]
|
||||
|
||||
bufLen := len(buf)
|
||||
buf = append(buf, srcLabel.Name...)
|
||||
dstLabel.Name = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
|
||||
bufLen = len(buf)
|
||||
dstLabel.Name = bytesutil.ToUnsafeString(buf[len(buf)-len(srcLabel.Name):])
|
||||
buf = append(buf, srcLabel.Value...)
|
||||
dstLabel.Value = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
dstLabel.Value = bytesutil.ToUnsafeString(buf[len(buf)-len(srcLabel.Value):])
|
||||
}
|
||||
wr.buf = buf
|
||||
dst.Labels = labelsDst
|
||||
dst.Labels = labelsDst[labelsLen:]
|
||||
|
||||
// Copy samples
|
||||
samplesLen := len(wr.samples)
|
||||
wr.samples = append(wr.samples, src.Samples...)
|
||||
dst.Samples = wr.samples[samplesLen:]
|
||||
samplesDst = append(samplesDst, src.Samples...)
|
||||
dst.Samples = samplesDst[len(samplesDst)-len(src.Samples):]
|
||||
|
||||
wr.samples = samplesDst
|
||||
wr.labels = labelsDst
|
||||
wr.buf = buf
|
||||
}
|
||||
|
||||
// marshalConcurrency limits the maximum number of concurrent workers, which marshal and compress WriteRequest.
|
||||
var marshalConcurrencyCh = make(chan struct{}, cgroup.AvailableCPUs())
|
||||
|
||||
func tryPushWriteRequest(wr *prompb.WriteRequest, tryPushBlock func(block []byte) bool, isVMRemoteWrite bool) bool {
|
||||
if wr.IsEmpty() {
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte)) {
|
||||
if len(wr.Timeseries) == 0 {
|
||||
// Nothing to push
|
||||
return true
|
||||
return
|
||||
}
|
||||
|
||||
marshalConcurrencyCh <- struct{}{}
|
||||
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = wr.MarshalProtobuf(bb.B[:0])
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
|
||||
zb := compressBufPool.Get()
|
||||
if isVMRemoteWrite {
|
||||
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, *vmProtoCompressLevel)
|
||||
} else {
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
}
|
||||
zb := snappyBufPool.Get()
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
writeRequestBufPool.Put(bb)
|
||||
|
||||
<-marshalConcurrencyCh
|
||||
|
||||
if len(zb.B) <= persistentqueue.MaxBlockSize {
|
||||
zbLen := len(zb.B)
|
||||
ok := tryPushBlock(zb.B)
|
||||
compressBufPool.Put(zb)
|
||||
if ok {
|
||||
blockSizeRows.Update(float64(len(wr.Timeseries)))
|
||||
blockMetadataRows.Update(float64(len(wr.Metadata)))
|
||||
blockSizeBytes.Update(float64(zbLen))
|
||||
}
|
||||
return ok
|
||||
pushBlock(zb.B)
|
||||
blockSizeRows.Update(float64(len(wr.Timeseries)))
|
||||
blockSizeBytes.Update(float64(len(zb.B)))
|
||||
snappyBufPool.Put(zb)
|
||||
return
|
||||
}
|
||||
compressBufPool.Put(zb)
|
||||
snappyBufPool.Put(zb)
|
||||
} else {
|
||||
writeRequestBufPool.Put(bb)
|
||||
|
||||
<-marshalConcurrencyCh
|
||||
}
|
||||
|
||||
// Split timeseries or metadata into two smaller blocks
|
||||
switch len(wr.Timeseries) {
|
||||
case 0:
|
||||
if len(wr.Metadata) == 1 {
|
||||
logger.Warnf("dropping a metadata exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
|
||||
return true
|
||||
}
|
||||
metadata := wr.Metadata
|
||||
n := len(metadata) / 2
|
||||
wr.Metadata = metadata[:n]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Metadata = metadata
|
||||
return false
|
||||
}
|
||||
wr.Metadata = metadata[n:]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Metadata = metadata
|
||||
return false
|
||||
}
|
||||
wr.Metadata = metadata
|
||||
return true
|
||||
|
||||
case 1:
|
||||
// A single time series left. Recursively split its samples and metadata into smaller parts if possible.
|
||||
// Too big block. Recursively split it into smaller parts if possible.
|
||||
if len(wr.Timeseries) == 1 {
|
||||
// A single time series left. Recursively split its samples into smaller parts if possible.
|
||||
samples := wr.Timeseries[0].Samples
|
||||
metaData := wr.Metadata
|
||||
if len(samples) == 1 && len(metaData) <= 1 {
|
||||
logger.Warnf("dropping a sample for metric and %d metadata which are exceeding -remoteWrite.maxBlockSize=%d bytes", len(metaData), maxUnpackedBlockSize.N)
|
||||
return true
|
||||
if len(samples) == 1 {
|
||||
logger.Warnf("dropping a sample for metric with too long labels exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
|
||||
return
|
||||
}
|
||||
n := len(samples) / 2
|
||||
m := len(metaData) / 2
|
||||
wr.Timeseries[0].Samples = samples[:n]
|
||||
wr.Metadata = metaData[:m]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
wr.Metadata = metaData
|
||||
return false
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
wr.Timeseries[0].Samples = samples[n:]
|
||||
wr.Metadata = metaData[m:]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
wr.Metadata = metaData
|
||||
return false
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
wr.Timeseries[0].Samples = samples
|
||||
wr.Metadata = metaData
|
||||
return true
|
||||
|
||||
default:
|
||||
// Split both timeseries and metadata.
|
||||
timeseries := wr.Timeseries
|
||||
metaData := wr.Metadata
|
||||
n := len(timeseries) / 2
|
||||
m := len(metaData) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
wr.Metadata = metaData[:m]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
wr.Metadata = metaData
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries[n:]
|
||||
wr.Metadata = metaData[m:]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
wr.Metadata = metaData
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries
|
||||
wr.Metadata = metaData
|
||||
return true
|
||||
return
|
||||
}
|
||||
timeseries := wr.Timeseries
|
||||
n := len(timeseries) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
wr.Timeseries = timeseries[n:]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
wr.Timeseries = timeseries
|
||||
}
|
||||
|
||||
var (
|
||||
blockSizeBytes = metrics.NewHistogram(`vmagent_remotewrite_block_size_bytes`)
|
||||
blockSizeRows = metrics.NewHistogram(`vmagent_remotewrite_block_size_rows`)
|
||||
blockMetadataRows = metrics.NewHistogram(`vmagent_remotewrite_block_metadata_rows`)
|
||||
blockSizeBytes = metrics.NewHistogram(`vmagent_remotewrite_block_size_bytes`)
|
||||
blockSizeRows = metrics.NewHistogram(`vmagent_remotewrite_block_size_rows`)
|
||||
)
|
||||
|
||||
var (
|
||||
writeRequestBufPool bytesutil.ByteBufferPool
|
||||
compressBufPool bytesutil.ByteBufferPool
|
||||
)
|
||||
var writeRequestBufPool bytesutil.ByteBufferPool
|
||||
var snappyBufPool bytesutil.ByteBufferPool
|
||||
|
||||
@@ -2,66 +2,55 @@ package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func TestPushWriteRequest(t *testing.T) {
|
||||
rowsCounts := []int{1, 10, 100, 1e3, 1e4}
|
||||
expectedBlockLensProm := []int{216, 1848, 16424, 169882, 1757876}
|
||||
expectedBlockLensVM := []int{138, 492, 3927, 34995, 288476}
|
||||
for i, rowsCount := range rowsCounts {
|
||||
expectedBlockLenProm := expectedBlockLensProm[i]
|
||||
expectedBlockLenVM := expectedBlockLensVM[i]
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
t.Run(fmt.Sprintf("%d", rowsCount), func(t *testing.T) {
|
||||
testPushWriteRequest(t, rowsCount, expectedBlockLenProm, expectedBlockLenVM)
|
||||
testPushWriteRequest(t, rowsCount)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expectedBlockLenVM int) {
|
||||
f := func(isVMRemoteWrite bool, expectedBlockLen int, tolerancePrc float64) {
|
||||
t.Helper()
|
||||
wr := newTestWriteRequest(rowsCount, 20)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) bool {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
return true
|
||||
}
|
||||
if !tryPushWriteRequest(wr, pushBlock, isVMRemoteWrite) {
|
||||
t.Fatalf("cannot push data to remote storage")
|
||||
}
|
||||
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
|
||||
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
|
||||
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
|
||||
func testPushWriteRequest(t *testing.T, rowsCount int) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
b := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
zb := snappy.Encode(nil, b)
|
||||
maxPushBlockLen := len(zb)
|
||||
minPushBlockLen := maxPushBlockLen / 2
|
||||
if pushBlockLen < minPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be at least %d bytes", pushBlockLen, minPushBlockLen)
|
||||
}
|
||||
if pushBlockLen > maxPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be smaller or equal to %d bytes", pushBlockLen, maxPushBlockLen)
|
||||
}
|
||||
|
||||
// Check Prometheus remote write
|
||||
f(false, expectedBlockLenProm, 3)
|
||||
|
||||
// Check VictoriaMetrics remote write
|
||||
f(true, expectedBlockLenVM, 15)
|
||||
}
|
||||
|
||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
|
||||
var wr prompb.WriteRequest
|
||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompbmarshal.WriteRequest {
|
||||
var wr prompbmarshal.WriteRequest
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
var labels []prompb.Label
|
||||
var labels []prompbmarshal.Label
|
||||
for j := 0; j < labelsCount; j++ {
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: fmt.Sprintf("label_%d_%d", i, j),
|
||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||
})
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, prompb.TimeSeries{
|
||||
wr.Timeseries = append(wr.Timeseries, prompbmarshal.TimeSeries{
|
||||
Labels: labels,
|
||||
Samples: []prompb.Sample{
|
||||
Samples: []prompbmarshal.Sample{
|
||||
{
|
||||
Value: float64(i),
|
||||
Timestamp: 1000 * int64(i),
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/klauspost/compress/s2"
|
||||
)
|
||||
@@ -21,7 +22,7 @@ func benchmarkCompressWriteRequest(b *testing.B, compressFunc func(dst, src []by
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
b.Run(fmt.Sprintf("rows_%d", rowsCount), func(b *testing.B) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
data := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(rowsCount))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
|
||||
@@ -3,17 +3,13 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -22,31 +18,17 @@ var (
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||
"The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/relabeling/")
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel configs for the corresponding -remoteWrite.url. "+
|
||||
"See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/relabeling/")
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
|
||||
usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+
|
||||
"in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+
|
||||
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
||||
)
|
||||
|
||||
var labelsGlobal []prompb.Label
|
||||
|
||||
var (
|
||||
relabelConfigReloads *metrics.Counter
|
||||
relabelConfigReloadErrors *metrics.Counter
|
||||
relabelConfigSuccess *metrics.Gauge
|
||||
relabelConfigTimestamp *metrics.Counter
|
||||
)
|
||||
|
||||
func initRelabelMetrics() {
|
||||
relabelConfigReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
|
||||
relabelConfigReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
|
||||
relabelConfigSuccess = metrics.NewGauge(`vmagent_relabel_config_last_reload_successful`, nil)
|
||||
relabelConfigTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
|
||||
}
|
||||
var labelsGlobal []prompbmarshal.Label
|
||||
|
||||
// CheckRelabelConfigs checks -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig.
|
||||
func CheckRelabelConfigs() error {
|
||||
@@ -54,39 +36,6 @@ func CheckRelabelConfigs() error {
|
||||
return err
|
||||
}
|
||||
|
||||
func initRelabelConfigs() {
|
||||
rcs, err := loadRelabelConfigs()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize relabel configs: %s", err)
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
if rcs.isSet() {
|
||||
initRelabelMetrics()
|
||||
relabelConfigSuccess.Set(1)
|
||||
relabelConfigTimestamp.Set(fasttime.UnixTimestamp())
|
||||
}
|
||||
}
|
||||
|
||||
func reloadRelabelConfigs() {
|
||||
rcs := allRelabelConfigs.Load()
|
||||
if !rcs.isSet() {
|
||||
return
|
||||
}
|
||||
relabelConfigReloads.Inc()
|
||||
logger.Infof("reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
|
||||
rcs, err := loadRelabelConfigs()
|
||||
if err != nil {
|
||||
relabelConfigReloadErrors.Inc()
|
||||
relabelConfigSuccess.Set(0)
|
||||
logger.Errorf("cannot reload relabel configs; preserving the previous configs; error: %s", err)
|
||||
return
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
relabelConfigSuccess.Set(1)
|
||||
relabelConfigTimestamp.Set(fasttime.UnixTimestamp())
|
||||
logger.Infof("successfully reloaded relabel configs")
|
||||
}
|
||||
|
||||
func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
var rcs relabelConfigs
|
||||
if *relabelConfigPathGlobal != "" {
|
||||
@@ -96,11 +45,11 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
}
|
||||
rcs.global = global
|
||||
}
|
||||
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
||||
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
||||
if len(*relabelConfigPaths) > (len(*remoteWriteURLs) + len(*remoteWriteMultitenantURLs)) {
|
||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url or -remoteWrite.multitenantURL args: %d",
|
||||
len(*relabelConfigPaths), (len(*remoteWriteURLs) + len(*remoteWriteMultitenantURLs)))
|
||||
}
|
||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, (len(*remoteWriteURLs) + len(*remoteWriteMultitenantURLs)))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
// Skip empty relabel config.
|
||||
@@ -120,21 +69,6 @@ type relabelConfigs struct {
|
||||
perURL []*promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
func (rcs *relabelConfigs) isSet() bool {
|
||||
if rcs == nil {
|
||||
return false
|
||||
}
|
||||
if rcs.global.Len() > 0 {
|
||||
return true
|
||||
}
|
||||
for _, pc := range rcs.perURL {
|
||||
if pc.Len() > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// initLabelsGlobal must be called after parsing command-line flags.
|
||||
func initLabelsGlobal() {
|
||||
labelsGlobal = nil
|
||||
@@ -146,35 +80,53 @@ func initLabelsGlobal() {
|
||||
if n < 0 {
|
||||
logger.Fatalf("missing '=' in `-remoteWrite.label`. It must contain label in the form `name=value`; got %q", s)
|
||||
}
|
||||
labelsGlobal = append(labelsGlobal, prompb.Label{
|
||||
labelsGlobal = append(labelsGlobal, prompbmarshal.Label{
|
||||
Name: s[:n],
|
||||
Value: s[n+1:],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) applyRelabeling(tss []prompb.TimeSeries, pcs *promrelabel.ParsedConfigs) []prompb.TimeSeries {
|
||||
if pcs.Len() == 0 && !*usePromCompatibleNaming {
|
||||
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 && !*usePromCompatibleNaming {
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
rctx.reset()
|
||||
tssDst := tss[:0]
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, ts.Labels...)
|
||||
// extraLabels must be added before applying relabeling according to https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
|
||||
for j := range extraLabels {
|
||||
extraLabel := &extraLabels[j]
|
||||
tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
|
||||
if tmp != nil {
|
||||
tmp.Value = extraLabel.Value
|
||||
} else {
|
||||
labels = append(labels, *extraLabel)
|
||||
}
|
||||
}
|
||||
if *usePromCompatibleNaming {
|
||||
// Replace unsupported Prometheus chars in label names and metric names with underscores.
|
||||
tmpLabels := labels[labelsLen:]
|
||||
for j := range tmpLabels {
|
||||
label := &tmpLabels[j]
|
||||
if label.Name == "__name__" {
|
||||
label.Value = promrelabel.SanitizeName(label.Value)
|
||||
} else {
|
||||
label.Name = promrelabel.SanitizeName(label.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
labels = pcs.Apply(labels, labelsLen)
|
||||
labels = promrelabel.FinalizeLabels(labels[:labelsLen], labels[labelsLen:])
|
||||
if len(labels) == labelsLen {
|
||||
// Drop the current time series, since relabeling removed all the labels.
|
||||
continue
|
||||
}
|
||||
if *usePromCompatibleNaming {
|
||||
fixPromCompatibleNaming(labels[labelsLen:])
|
||||
}
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: ts.Samples,
|
||||
})
|
||||
@@ -183,61 +135,9 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompb.TimeSeries, pcs *promrelabe
|
||||
return tssDst
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) appendExtraLabels(tss []prompb.TimeSeries, extraLabels []prompb.Label) {
|
||||
if len(extraLabels) == 0 {
|
||||
return
|
||||
}
|
||||
rctx.reset()
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, ts.Labels...)
|
||||
for j := range extraLabels {
|
||||
extraLabel := extraLabels[j]
|
||||
tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
|
||||
if tmp != nil {
|
||||
tmp.Value = extraLabel.Value
|
||||
} else {
|
||||
labels = append(labels, extraLabel)
|
||||
}
|
||||
}
|
||||
ts.Labels = labels[labelsLen:]
|
||||
}
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) tenantToLabels(tss []prompb.TimeSeries, accountID, projectID uint32) {
|
||||
rctx.reset()
|
||||
accountIDStr := strconv.FormatUint(uint64(accountID), 10)
|
||||
projectIDStr := strconv.FormatUint(uint64(projectID), 10)
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labelsLen := len(labels)
|
||||
for _, label := range ts.Labels {
|
||||
labelName := label.Name
|
||||
if labelName == "vm_account_id" || labelName == "vm_project_id" {
|
||||
continue
|
||||
}
|
||||
labels = append(labels, label)
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "vm_account_id",
|
||||
Value: accountIDStr,
|
||||
})
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "vm_project_id",
|
||||
Value: projectIDStr,
|
||||
})
|
||||
ts.Labels = labels[labelsLen:]
|
||||
}
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
type relabelCtx struct {
|
||||
// pool for labels, which are used during the relabeling.
|
||||
labels []prompb.Label
|
||||
labels []prompbmarshal.Label
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) reset() {
|
||||
@@ -246,7 +146,7 @@ func (rctx *relabelCtx) reset() {
|
||||
}
|
||||
|
||||
var relabelCtxPool = &sync.Pool{
|
||||
New: func() any {
|
||||
New: func() interface{} {
|
||||
return &relabelCtx{}
|
||||
},
|
||||
}
|
||||
@@ -256,18 +156,6 @@ func getRelabelCtx() *relabelCtx {
|
||||
}
|
||||
|
||||
func putRelabelCtx(rctx *relabelCtx) {
|
||||
rctx.reset()
|
||||
rctx.labels = rctx.labels[:0]
|
||||
relabelCtxPool.Put(rctx)
|
||||
}
|
||||
|
||||
func fixPromCompatibleNaming(labels []prompb.Label) {
|
||||
// Replace unsupported Prometheus chars in label names and metric names with underscores.
|
||||
for i := range labels {
|
||||
label := &labels[i]
|
||||
if label.Name == "__name__" {
|
||||
label.Value = promrelabel.SanitizeMetricName(label.Value)
|
||||
} else {
|
||||
label.Name = promrelabel.SanitizeLabelName(label.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,22 +4,24 @@ import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestApplyRelabeling(t *testing.T) {
|
||||
f := func(pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
|
||||
f := func(extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
|
||||
rctx := &relabelCtx{}
|
||||
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
|
||||
gotTss := rctx.applyRelabeling(tss, pcs)
|
||||
gotTss := rctx.applyRelabeling(tss, extraLabels, pcs)
|
||||
if !reflect.DeepEqual(gotTss, expTss) {
|
||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, gotTss)
|
||||
}
|
||||
}
|
||||
|
||||
f(nil, "up", "up")
|
||||
f(nil, nil, "up", "up")
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, "up", `up{foo="bar"}`)
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, `up{foo="baz"}`, `up{foo="bar"}`)
|
||||
|
||||
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
|
||||
- target_label: "foo"
|
||||
@@ -30,40 +32,18 @@ func TestApplyRelabeling(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
f(pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
|
||||
f(nil, pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
|
||||
|
||||
oldVal := *usePromCompatibleNaming
|
||||
*usePromCompatibleNaming = true
|
||||
f(nil, `foo.bar`, `foo_bar`)
|
||||
f(nil, nil, `foo.bar`, `foo_bar`)
|
||||
*usePromCompatibleNaming = oldVal
|
||||
}
|
||||
|
||||
func TestAppendExtraLabels(t *testing.T) {
|
||||
f := func(extraLabels []prompb.Label, sTss, sExpTss string) {
|
||||
t.Helper()
|
||||
rctx := &relabelCtx{}
|
||||
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
|
||||
rctx.appendExtraLabels(tss, extraLabels)
|
||||
if !reflect.DeepEqual(tss, expTss) {
|
||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, tss)
|
||||
}
|
||||
}
|
||||
|
||||
f(nil, "up", "up")
|
||||
f([]prompb.Label{{Name: "foo", Value: "bar"}}, "up", `up{foo="bar"}`)
|
||||
f([]prompb.Label{{Name: "foo", Value: "bar"}}, `up{foo="baz"}`, `up{foo="bar"}`)
|
||||
f([]prompb.Label{{Name: "baz", Value: "qux"}}, `up{foo="baz"}`, `up{foo="baz",baz="qux"}`)
|
||||
|
||||
oldVal := *usePromCompatibleNaming
|
||||
*usePromCompatibleNaming = true
|
||||
f([]prompb.Label{{Name: "foo.bar", Value: "baz"}}, "up", `up{foo.bar="baz"}`)
|
||||
*usePromCompatibleNaming = oldVal
|
||||
}
|
||||
|
||||
func parseSeries(data string) []prompb.TimeSeries {
|
||||
var tss []prompb.TimeSeries
|
||||
tss = append(tss, prompb.TimeSeries{
|
||||
Labels: promutil.MustNewLabelsFromString(data).GetLabels(),
|
||||
func parseSeries(data string) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
tss = append(tss, prompbmarshal.TimeSeries{
|
||||
Labels: promutils.MustNewLabelsFromString(data).GetLabels(),
|
||||
})
|
||||
return tss
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,349 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func TestGetLabelsHash_Distribution(t *testing.T) {
|
||||
f := func(bucketsCount int) {
|
||||
t.Helper()
|
||||
|
||||
// Distribute itemsCount hashes returned by getLabelsHash() across bucketsCount buckets.
|
||||
itemsCount := 1_000 * bucketsCount
|
||||
m := make([]int, bucketsCount)
|
||||
var labels []prompb.Label
|
||||
for i := 0; i < itemsCount; i++ {
|
||||
labels = append(labels[:0], prompb.Label{
|
||||
Name: "__name__",
|
||||
Value: fmt.Sprintf("some_name_%d", i),
|
||||
})
|
||||
for j := 0; j < 10; j++ {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: fmt.Sprintf("label_%d", j),
|
||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||
})
|
||||
}
|
||||
h := getLabelsHash(labels)
|
||||
m[h%uint64(bucketsCount)]++
|
||||
}
|
||||
|
||||
// Verify that the distribution is even
|
||||
expectedItemsPerBucket := itemsCount / bucketsCount
|
||||
for _, n := range m {
|
||||
if math.Abs(1-float64(n)/float64(expectedItemsPerBucket)) > 0.04 {
|
||||
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want around %d", bucketsCount, n, expectedItemsPerBucket)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f(2)
|
||||
f(3)
|
||||
f(4)
|
||||
f(5)
|
||||
f(10)
|
||||
}
|
||||
|
||||
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
|
||||
t.Helper()
|
||||
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("cannot load relabel configs: %s", err)
|
||||
}
|
||||
rcs := &relabelConfigs{
|
||||
perURL: []*promrelabel.ParsedConfigs{
|
||||
perURLRelabel,
|
||||
},
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
|
||||
pss := make([]*pendingSeries, 1)
|
||||
isVMProto := &atomic.Bool{}
|
||||
isVMProto.Store(true)
|
||||
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: 0,
|
||||
streamAggrKeepInput: keepInput,
|
||||
streamAggrDropInput: dropInput,
|
||||
pss: pss,
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
|
||||
}
|
||||
if dedupInterval > 0 {
|
||||
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
|
||||
}
|
||||
|
||||
if streamAggrConfig != "" {
|
||||
pushNoop := func(_ []prompb.TimeSeries) {}
|
||||
opts := streamaggr.Options{
|
||||
EnableWindows: enableWindows,
|
||||
}
|
||||
sas, err := streamaggr.LoadFromData([]byte(streamAggrConfig), pushNoop, &opts, "global")
|
||||
if err != nil {
|
||||
t.Fatalf("cannot load streamaggr configs: %s", err)
|
||||
}
|
||||
defer sas.MustStop()
|
||||
rwctx.sas.Store(sas)
|
||||
}
|
||||
|
||||
offsetMsecs := time.Now().UnixMilli()
|
||||
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
|
||||
expectedTss := make([]prompb.TimeSeries, len(inputTss))
|
||||
|
||||
// copy inputTss to make sure it is not mutated during TryPush call
|
||||
copy(expectedTss, inputTss)
|
||||
if !rwctx.TryPushTimeSeries(inputTss, false) {
|
||||
t.Fatalf("cannot push samples to rwctx")
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedTss, inputTss) {
|
||||
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
|
||||
}
|
||||
}
|
||||
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [sum_samples]
|
||||
- interval: 2m
|
||||
outputs: [count_series]
|
||||
`, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, false, 0, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
f(``, ``, true, time.Hour, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="foo"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="foo"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, true, false, `
|
||||
metric{env="test"} 10
|
||||
metric{env="dev"} 20
|
||||
metric{env="foo"} 15
|
||||
metric{env="dev"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, false, true, `
|
||||
metric{env="foo"} 10
|
||||
metric{env="dev"} 20
|
||||
metric{env="foo"} 15
|
||||
metric{env="dev"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, true, true, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="test"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
}
|
||||
|
||||
func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||
// 1. distribute 100000 series to n nodes.
|
||||
// 2. remove the last node from healthy list.
|
||||
// 3. distribute the same 10000 series to (n-1) node again.
|
||||
// 4. check active time series change rate:
|
||||
// change rate must < (3/total nodes). e.g. +30% if 10 you have 10 nodes.
|
||||
|
||||
f := func(remoteWriteCount int, healthyIdx []int, replicas int) {
|
||||
t.Helper()
|
||||
defer func() {
|
||||
rwctxsGlobal = nil
|
||||
rwctxsGlobalIdx = nil
|
||||
rwctxConsistentHashGlobal = nil
|
||||
}()
|
||||
|
||||
rwctxsGlobal = make([]*remoteWriteCtx, remoteWriteCount)
|
||||
rwctxsGlobalIdx = make([]int, remoteWriteCount)
|
||||
rwctxs := make([]*remoteWriteCtx, 0, len(healthyIdx))
|
||||
|
||||
for i := range remoteWriteCount {
|
||||
rwCtx := &remoteWriteCtx{
|
||||
idx: i,
|
||||
}
|
||||
rwctxsGlobalIdx[i] = i
|
||||
|
||||
if i >= len(healthyIdx) {
|
||||
rwctxsGlobal[i] = rwCtx
|
||||
continue
|
||||
}
|
||||
hIdx := healthyIdx[i]
|
||||
if hIdx != i {
|
||||
rwctxs = append(rwctxs, &remoteWriteCtx{
|
||||
idx: hIdx,
|
||||
})
|
||||
} else {
|
||||
rwctxs = append(rwctxs, rwCtx)
|
||||
}
|
||||
rwctxsGlobal[i] = rwCtx
|
||||
}
|
||||
|
||||
seriesCount := 100000
|
||||
// build 1000000 series
|
||||
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
tssBlock = append(tssBlock, prompb.TimeSeries{
|
||||
Labels: []prompb.Label{
|
||||
{
|
||||
Name: "label",
|
||||
Value: strconv.Itoa(i),
|
||||
},
|
||||
},
|
||||
Samples: []prompb.Sample{
|
||||
{
|
||||
Timestamp: 0,
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// build consistent hash for x remote write context
|
||||
// build active time series set
|
||||
nodes := make([]string, 0, remoteWriteCount)
|
||||
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
|
||||
for i := 0; i < remoteWriteCount; i++ {
|
||||
nodes = append(nodes, fmt.Sprintf("node%d", i))
|
||||
activeTimeSeriesByNodes[i] = make(map[string]struct{})
|
||||
}
|
||||
rwctxConsistentHashGlobal = consistenthash.NewConsistentHash(nodes, 0)
|
||||
|
||||
// create shards
|
||||
x := getTSSShards(len(rwctxs))
|
||||
shards := x.shards
|
||||
|
||||
// execute
|
||||
shardAmountRemoteWriteCtx(tssBlock, shards, rwctxs, replicas)
|
||||
|
||||
for i, nodeIdx := range healthyIdx {
|
||||
for _, ts := range shards[i] {
|
||||
// add it to node[nodeIdx]'s active time series
|
||||
activeTimeSeriesByNodes[nodeIdx][prompb.LabelsToString(ts.Labels)] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
totalActiveTimeSeries := 0
|
||||
for _, activeTimeSeries := range activeTimeSeriesByNodes {
|
||||
totalActiveTimeSeries += len(activeTimeSeries)
|
||||
}
|
||||
avgActiveTimeSeries1 := totalActiveTimeSeries / remoteWriteCount
|
||||
putTSSShards(x)
|
||||
|
||||
// removed last node
|
||||
rwctxs = rwctxs[:len(rwctxs)-1]
|
||||
healthyIdx = healthyIdx[:len(healthyIdx)-1]
|
||||
|
||||
x = getTSSShards(len(rwctxs))
|
||||
shards = x.shards
|
||||
|
||||
// execute
|
||||
shardAmountRemoteWriteCtx(tssBlock, shards, rwctxs, replicas)
|
||||
for i, nodeIdx := range healthyIdx {
|
||||
for _, ts := range shards[i] {
|
||||
// add it to node[nodeIdx]'s active time series
|
||||
activeTimeSeriesByNodes[nodeIdx][prompb.LabelsToString(ts.Labels)] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
totalActiveTimeSeries = 0
|
||||
for _, activeTimeSeries := range activeTimeSeriesByNodes {
|
||||
totalActiveTimeSeries += len(activeTimeSeries)
|
||||
}
|
||||
avgActiveTimeSeries2 := totalActiveTimeSeries / remoteWriteCount
|
||||
|
||||
changed := math.Abs(float64(avgActiveTimeSeries2-avgActiveTimeSeries1) / float64(avgActiveTimeSeries1))
|
||||
threshold := 3 / float64(remoteWriteCount)
|
||||
|
||||
if changed >= threshold {
|
||||
t.Fatalf("average active time series before: %d, after: %d, changed: %.2f. threshold: %.2f", avgActiveTimeSeries1, avgActiveTimeSeries2, changed, threshold)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
f(5, []int{0, 1, 2, 3, 4}, 1)
|
||||
|
||||
f(5, []int{0, 1, 2, 3, 4}, 2)
|
||||
|
||||
f(10, []int{0, 1, 2, 3, 4, 5, 6, 7, 9}, 1)
|
||||
|
||||
f(10, []int{0, 1, 2, 3, 4, 5, 6, 7, 9}, 3)
|
||||
}
|
||||
|
||||
func TestCalculateHealthyRwctxIdx(t *testing.T) {
|
||||
f := func(total int, healthyIdx []int, unhealthyIdx []int) {
|
||||
t.Helper()
|
||||
|
||||
healthyMap := make(map[int]bool)
|
||||
for _, idx := range healthyIdx {
|
||||
healthyMap[idx] = true
|
||||
}
|
||||
rwctxsGlobal = make([]*remoteWriteCtx, total)
|
||||
rwctxsGlobalIdx = make([]int, total)
|
||||
rwctxs := make([]*remoteWriteCtx, 0, len(healthyIdx))
|
||||
for i := range rwctxsGlobal {
|
||||
rwctx := &remoteWriteCtx{idx: i}
|
||||
rwctxsGlobal[i] = rwctx
|
||||
if healthyMap[i] {
|
||||
rwctxs = append(rwctxs, rwctx)
|
||||
}
|
||||
rwctxsGlobalIdx[i] = i
|
||||
}
|
||||
|
||||
gotHealthyIdx, gotUnhealthyIdx := calculateHealthyRwctxIdx(rwctxs)
|
||||
if !reflect.DeepEqual(healthyIdx, gotHealthyIdx) {
|
||||
t.Errorf("calculateHealthyRwctxIdx want healthyIdx = %v, got %v", healthyIdx, gotHealthyIdx)
|
||||
}
|
||||
if !reflect.DeepEqual(unhealthyIdx, gotUnhealthyIdx) {
|
||||
t.Errorf("calculateHealthyRwctxIdx want unhealthyIdx = %v, got %v", unhealthyIdx, gotUnhealthyIdx)
|
||||
}
|
||||
}
|
||||
|
||||
f(5, []int{0, 1, 2, 3, 4}, nil)
|
||||
f(5, []int{0, 1, 2, 4}, []int{3})
|
||||
f(5, []int{2, 4}, []int{0, 1, 3})
|
||||
f(5, []int{0, 2, 4}, []int{1, 3})
|
||||
f(5, []int{}, []int{0, 1, 2, 3, 4})
|
||||
f(5, []int{4}, []int{0, 1, 2, 3})
|
||||
f(1, []int{0}, nil)
|
||||
f(1, []int{}, []int{0})
|
||||
}
|
||||
92
app/vmagent/remotewrite/statconn.go
Normal file
92
app/vmagent/remotewrite/statconn.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func getStdDialer() *net.Dialer {
|
||||
stdDialerOnce.Do(func() {
|
||||
stdDialer = &net.Dialer{
|
||||
Timeout: 30 * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
DualStack: netutil.TCP6Enabled(),
|
||||
}
|
||||
})
|
||||
return stdDialer
|
||||
}
|
||||
|
||||
var (
|
||||
stdDialer *net.Dialer
|
||||
stdDialerOnce sync.Once
|
||||
)
|
||||
|
||||
func statDial(ctx context.Context, networkUnused, addr string) (conn net.Conn, err error) {
|
||||
network := netutil.GetTCPNetwork()
|
||||
d := getStdDialer()
|
||||
conn, err = d.DialContext(ctx, network, addr)
|
||||
dialsTotal.Inc()
|
||||
if err != nil {
|
||||
dialErrors.Inc()
|
||||
return nil, err
|
||||
}
|
||||
conns.Inc()
|
||||
sc := &statConn{
|
||||
Conn: conn,
|
||||
}
|
||||
return sc, nil
|
||||
}
|
||||
|
||||
var (
|
||||
dialsTotal = metrics.NewCounter(`vmagent_remotewrite_dials_total`)
|
||||
dialErrors = metrics.NewCounter(`vmagent_remotewrite_dial_errors_total`)
|
||||
conns = metrics.NewCounter(`vmagent_remotewrite_conns`)
|
||||
)
|
||||
|
||||
type statConn struct {
|
||||
closed uint64
|
||||
net.Conn
|
||||
}
|
||||
|
||||
func (sc *statConn) Read(p []byte) (int, error) {
|
||||
n, err := sc.Conn.Read(p)
|
||||
connReadsTotal.Inc()
|
||||
if err != nil {
|
||||
connReadErrors.Inc()
|
||||
}
|
||||
connBytesRead.Add(n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (sc *statConn) Write(p []byte) (int, error) {
|
||||
n, err := sc.Conn.Write(p)
|
||||
connWritesTotal.Inc()
|
||||
if err != nil {
|
||||
connWriteErrors.Inc()
|
||||
}
|
||||
connBytesWritten.Add(n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (sc *statConn) Close() error {
|
||||
err := sc.Conn.Close()
|
||||
if atomic.AddUint64(&sc.closed, 1) == 1 {
|
||||
conns.Dec()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var (
|
||||
connReadsTotal = metrics.NewCounter(`vmagent_remotewrite_conn_reads_total`)
|
||||
connWritesTotal = metrics.NewCounter(`vmagent_remotewrite_conn_writes_total`)
|
||||
connReadErrors = metrics.NewCounter(`vmagent_remotewrite_conn_read_errors_total`)
|
||||
connWriteErrors = metrics.NewCounter(`vmagent_remotewrite_conn_write_errors_total`)
|
||||
connBytesRead = metrics.NewCounter(`vmagent_remotewrite_conn_bytes_read_total`)
|
||||
connBytesWritten = metrics.NewCounter(`vmagent_remotewrite_conn_bytes_written_total`)
|
||||
)
|
||||
@@ -1,258 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
// Global config
|
||||
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
||||
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
|
||||
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
|
||||
"aggregator before optional aggregation with -streamAggr.config . "+
|
||||
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||
streamAggrGlobalIgnoreOldSamples = flag.Bool("streamAggr.ignoreOldSamples", false, "Whether to ignore input samples with old timestamps outside the "+
|
||||
"current aggregation interval for aggregator. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples")
|
||||
streamAggrGlobalIgnoreFirstIntervals = flag.Int("streamAggr.ignoreFirstIntervals", 0, "Number of aggregation intervals to skip after the start for "+
|
||||
"aggregator. Increase this value if you observe incorrect aggregation results after vmagent restarts. It could be caused by receiving unordered delayed data from "+
|
||||
"clients pushing data into the vmagent. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignore-aggregation-intervals-on-start")
|
||||
streamAggrGlobalDropInputLabels = flagutil.NewArrayString("streamAggr.dropInputLabels", "An optional list of labels to drop from samples for aggregator "+
|
||||
"before stream de-duplication and aggregation . See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#dropping-unneeded-labels")
|
||||
streamAggrGlobalEnableWindows = flag.Bool("streamAggr.enableWindows", false, "Enables aggregation within fixed windows for all global aggregators. "+
|
||||
"This allows to get more precise results, but impacts resource usage as it requires twice more memory to store two states. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#aggregation-windows.")
|
||||
|
||||
// Per URL config
|
||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
|
||||
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||
streamAggrIgnoreOldSamples = flagutil.NewArrayBool("remoteWrite.streamAggr.ignoreOldSamples", "Whether to ignore input samples with old timestamps outside the current "+
|
||||
"aggregation interval for the corresponding -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples")
|
||||
streamAggrIgnoreFirstIntervals = flagutil.NewArrayInt("remoteWrite.streamAggr.ignoreFirstIntervals", 0, "Number of aggregation intervals to skip after the start "+
|
||||
"for the corresponding -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. Increase this value if "+
|
||||
"you observe incorrect aggregation results after vmagent restarts. It could be caused by receiving buffered delayed data from clients pushing data into the vmagent. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignore-aggregation-intervals-on-start")
|
||||
streamAggrDropInputLabels = flagutil.NewArrayString("remoteWrite.streamAggr.dropInputLabels", "An optional list of labels to drop from samples "+
|
||||
"before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. "+
|
||||
"Multiple labels per remoteWrite.url must be delimited by '^^': -remoteWrite.streamAggr.dropInputLabels='replica^^az,replica'. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#dropping-unneeded-labels")
|
||||
streamAggrEnableWindows = flagutil.NewArrayBool("remoteWrite.streamAggr.enableWindows", "Enables aggregation within fixed windows for all remote write's aggregators. "+
|
||||
"This allows to get more precise results, but impacts resource usage as it requires twice more memory to store two states. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#aggregation-windows.")
|
||||
)
|
||||
|
||||
// CheckStreamAggrConfigs checks -remoteWrite.streamAggr.config and -streamAggr.config.
|
||||
func CheckStreamAggrConfigs() error {
|
||||
// Check global config
|
||||
sas, err := newStreamAggrConfigGlobal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sas.MustStop()
|
||||
|
||||
if len(*streamAggrConfig) > len(*remoteWriteURLs) {
|
||||
return fmt.Errorf("too many -remoteWrite.streamAggr.config args: %d; it mustn't exceed the number of -remoteWrite.url args: %d", len(*streamAggrConfig), len(*remoteWriteURLs))
|
||||
}
|
||||
|
||||
pushNoop := func(_ []prompb.TimeSeries) {}
|
||||
for idx := range *streamAggrConfig {
|
||||
sas, err := newStreamAggrConfigPerURL(idx, pushNoop)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sas.MustStop()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func reloadStreamAggrConfigs() {
|
||||
reloadStreamAggrConfigGlobal()
|
||||
for _, rwctx := range rwctxsGlobal {
|
||||
rwctx.reloadStreamAggrConfig()
|
||||
}
|
||||
}
|
||||
|
||||
func reloadStreamAggrConfigGlobal() {
|
||||
path := *streamAggrGlobalConfig
|
||||
if path == "" {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("reloading stream aggregation configs pointed by -streamAggr.config=%q", path)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, path)).Inc()
|
||||
|
||||
sasNew, err := newStreamAggrConfigGlobal()
|
||||
if err != nil {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, path)).Inc()
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(0)
|
||||
logger.Errorf("cannot reload -streamAggr.config=%q; continue using the previously loaded config; error: %s", path, err)
|
||||
return
|
||||
}
|
||||
|
||||
sas := sasGlobal.Load()
|
||||
if !sasNew.Equal(sas) {
|
||||
sasOld := sasGlobal.Swap(sasNew)
|
||||
sasOld.MustStop()
|
||||
logger.Infof("successfully reloaded -streamAggr.config=%q", path)
|
||||
} else {
|
||||
sasNew.MustStop()
|
||||
logger.Infof("-streamAggr.config=%q wasn't changed since the last reload", path)
|
||||
}
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(1)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, path)).Set(fasttime.UnixTimestamp())
|
||||
}
|
||||
|
||||
func initStreamAggrConfigGlobal() {
|
||||
sas, err := newStreamAggrConfigGlobal()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize global stream aggregators: %s", err)
|
||||
}
|
||||
if sas != nil {
|
||||
filePath := sas.FilePath()
|
||||
sasGlobal.Store(sas)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, filePath)).Set(1)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, filePath)).Set(fasttime.UnixTimestamp())
|
||||
}
|
||||
dedupInterval := *streamAggrGlobalDedupInterval
|
||||
if dedupInterval > 0 {
|
||||
deduplicatorGlobal = streamaggr.NewDeduplicator(pushTimeSeriesToRemoteStoragesTrackDropped, *streamAggrGlobalEnableWindows, dedupInterval, *streamAggrGlobalDropInputLabels, "dedup-global")
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) initStreamAggrConfig() {
|
||||
idx := rwctx.idx
|
||||
|
||||
sas, err := rwctx.newStreamAggrConfig()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators: %s", err)
|
||||
}
|
||||
if sas != nil {
|
||||
filePath := sas.FilePath()
|
||||
rwctx.sas.Store(sas)
|
||||
rwctx.streamAggrKeepInput = streamAggrKeepInput.GetOptionalArg(idx)
|
||||
rwctx.streamAggrDropInput = streamAggrDropInput.GetOptionalArg(idx)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, filePath)).Set(1)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, filePath)).Set(fasttime.UnixTimestamp())
|
||||
}
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(idx)
|
||||
if dedupInterval > 0 {
|
||||
alias := fmt.Sprintf("dedup-%d", idx+1)
|
||||
var dropLabels []string
|
||||
if streamAggrDropInputLabels.GetOptionalArg(idx) != "" {
|
||||
dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^")
|
||||
}
|
||||
rwctx.deduplicator = streamaggr.NewDeduplicator(rwctx.pushInternalTrackDropped, *streamAggrGlobalEnableWindows, dedupInterval, dropLabels, alias)
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) reloadStreamAggrConfig() {
|
||||
path := streamAggrConfig.GetOptionalArg(rwctx.idx)
|
||||
if path == "" {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", path)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, path)).Inc()
|
||||
|
||||
sasNew, err := rwctx.newStreamAggrConfig()
|
||||
if err != nil {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, path)).Inc()
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(0)
|
||||
logger.Errorf("cannot reload -remoteWrite.streamAggr.config=%q; continue using the previously loaded config; error: %s", path, err)
|
||||
return
|
||||
}
|
||||
|
||||
sas := rwctx.sas.Load()
|
||||
if !sasNew.Equal(sas) {
|
||||
sasOld := rwctx.sas.Swap(sasNew)
|
||||
sasOld.MustStop()
|
||||
logger.Infof("successfully reloaded -remoteWrite.streamAggr.config=%q", path)
|
||||
} else {
|
||||
sasNew.MustStop()
|
||||
logger.Infof("-remoteWrite.streamAggr.config=%q wasn't changed since the last reload", path)
|
||||
}
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, path)).Set(1)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, path)).Set(fasttime.UnixTimestamp())
|
||||
}
|
||||
|
||||
func newStreamAggrConfigGlobal() (*streamaggr.Aggregators, error) {
|
||||
path := *streamAggrGlobalConfig
|
||||
if path == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
opts := &streamaggr.Options{
|
||||
DedupInterval: *streamAggrGlobalDedupInterval,
|
||||
DropInputLabels: *streamAggrGlobalDropInputLabels,
|
||||
IgnoreOldSamples: *streamAggrGlobalIgnoreOldSamples,
|
||||
IgnoreFirstIntervals: *streamAggrGlobalIgnoreFirstIntervals,
|
||||
KeepInput: *streamAggrGlobalKeepInput,
|
||||
EnableWindows: *streamAggrGlobalEnableWindows,
|
||||
}
|
||||
|
||||
sas, err := streamaggr.LoadFromFile(path, pushTimeSeriesToRemoteStoragesTrackDropped, opts, "global")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load -streamAggr.config=%q: %w", *streamAggrGlobalConfig, err)
|
||||
}
|
||||
return sas, nil
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) newStreamAggrConfig() (*streamaggr.Aggregators, error) {
|
||||
return newStreamAggrConfigPerURL(rwctx.idx, rwctx.pushInternalTrackDropped)
|
||||
}
|
||||
|
||||
func newStreamAggrConfigPerURL(idx int, pushFunc streamaggr.PushFunc) (*streamaggr.Aggregators, error) {
|
||||
path := streamAggrConfig.GetOptionalArg(idx)
|
||||
if path == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
alias := fmt.Sprintf("%d:secret-url", idx+1)
|
||||
if *showRemoteWriteURL {
|
||||
alias = fmt.Sprintf("%d:%s", idx+1, remoteWriteURLs.GetOptionalArg(idx))
|
||||
}
|
||||
var dropLabels []string
|
||||
if streamAggrDropInputLabels.GetOptionalArg(idx) != "" {
|
||||
dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^")
|
||||
}
|
||||
opts := &streamaggr.Options{
|
||||
DedupInterval: streamAggrDedupInterval.GetOptionalArg(idx),
|
||||
DropInputLabels: dropLabels,
|
||||
IgnoreOldSamples: streamAggrIgnoreOldSamples.GetOptionalArg(idx),
|
||||
IgnoreFirstIntervals: streamAggrIgnoreFirstIntervals.GetOptionalArg(idx),
|
||||
KeepInput: streamAggrKeepInput.GetOptionalArg(idx),
|
||||
EnableWindows: streamAggrEnableWindows.GetOptionalArg(idx),
|
||||
}
|
||||
|
||||
sas, err := streamaggr.LoadFromFile(path, pushFunc, opts, alias)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.streamAggr.config=%q: %w", path, err)
|
||||
}
|
||||
return sas, nil
|
||||
}
|
||||
@@ -8,10 +8,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -26,17 +25,17 @@ var (
|
||||
//
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, encoding, func(rows []vmimport.Row) error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label) error {
|
||||
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -50,7 +49,7 @@ func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label)
|
||||
labelsLen := len(labels)
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
||||
})
|
||||
@@ -63,12 +62,12 @@ func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label)
|
||||
}
|
||||
samplesLen := len(samples)
|
||||
for j, value := range values {
|
||||
samples = append(samples, prompb.Sample{
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: value,
|
||||
Timestamp: timestamps[j],
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
@@ -76,9 +75,7 @@ func insertRows(at *auth.Token, rows []vmimport.Row, extraLabels []prompb.Label)
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
See vmalert-tool docs [here](https://docs.victoriametrics.com/victoriametrics/vmalert-tool/).
|
||||
|
||||
vmalert-tool docs can be edited at [docs/vmalert-tool.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/vmalert-tool.md).
|
||||
@@ -1,8 +0,0 @@
|
||||
ARG base_image=non-existing
|
||||
FROM $base_image
|
||||
|
||||
EXPOSE 8880
|
||||
|
||||
ENTRYPOINT ["/vmalert-tool-prod"]
|
||||
ARG src_binary=non-existing
|
||||
COPY $src_binary ./vmalert-tool-prod
|
||||
@@ -1,13 +0,0 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image=non-existing
|
||||
ARG root_image=non-existing
|
||||
FROM $certs_image AS certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8429
|
||||
ENTRYPOINT ["/vmalert-tool-prod"]
|
||||
ARG TARGETARCH
|
||||
ARG BINARY_SUFFIX=non-existing
|
||||
COPY vmalert-tool-linux-${TARGETARCH}-prod${BINARY_SUFFIX} ./vmalert-tool-prod
|
||||
@@ -68,13 +68,11 @@ publish-vmalert:
|
||||
|
||||
test-vmalert:
|
||||
go test -v -race -cover ./app/vmalert -loggerLevel=ERROR
|
||||
go test -v -race -cover ./app/vmalert/rule
|
||||
go test -v -race -cover ./app/vmalert/templates
|
||||
go test -v -race -cover ./app/vmalert/datasource
|
||||
go test -v -race -cover ./app/vmalert/notifier
|
||||
go test -v -race -cover ./app/vmalert/config
|
||||
go test -v -race -cover ./app/vmalert/remotewrite
|
||||
go test -v -race -cover ./app/vmalert/vmalertutil
|
||||
|
||||
run-vmalert: vmalert
|
||||
./bin/vmalert -rule=app/vmalert/config/testdata/rules/rules2-good.rules \
|
||||
@@ -101,7 +99,8 @@ replay-vmalert: vmalert
|
||||
-remoteWrite.url=http://localhost:8428 \
|
||||
-external.label=cluster=east-1 \
|
||||
-external.label=replica=a \
|
||||
-replay.timeFrom=2024-06-01T00:00:00Z
|
||||
-replay.timeFrom=2021-05-11T07:21:43Z \
|
||||
-replay.timeTo=2021-05-29T18:40:43Z
|
||||
|
||||
vmalert-linux-amd64:
|
||||
APP_NAME=vmalert CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
@@ -115,12 +114,6 @@ vmalert-linux-arm64:
|
||||
vmalert-linux-ppc64le:
|
||||
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-linux-s390x:
|
||||
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-linux-loong64:
|
||||
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=loong64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-linux-386:
|
||||
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
695
app/vmalert/alerting.go
Normal file
695
app/vmalert/alerting.go
Normal file
@@ -0,0 +1,695 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
// AlertingRule is basic alert entity
|
||||
type AlertingRule struct {
|
||||
Type config.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
For time.Duration
|
||||
Labels map[string]string
|
||||
Annotations map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
EvalInterval time.Duration
|
||||
Debug bool
|
||||
|
||||
q datasource.Querier
|
||||
|
||||
alertsMu sync.RWMutex
|
||||
// stores list of active alerts
|
||||
alerts map[uint64]*notifier.Alert
|
||||
|
||||
// state stores recent state changes
|
||||
// during evaluations
|
||||
state *ruleState
|
||||
|
||||
metrics *alertingRuleMetrics
|
||||
}
|
||||
|
||||
type alertingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
}
|
||||
|
||||
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
|
||||
ar := &AlertingRule{
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Alert,
|
||||
Expr: cfg.Expr,
|
||||
For: cfg.For.Duration(),
|
||||
Labels: cfg.Labels,
|
||||
Annotations: cfg.Annotations,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
EvalInterval: group.Interval,
|
||||
Debug: cfg.Debug,
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
QueryParams: group.Params,
|
||||
Headers: group.Headers,
|
||||
Debug: cfg.Debug,
|
||||
}),
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
metrics: &alertingRuleMetrics{},
|
||||
}
|
||||
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
ar.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StatePending {
|
||||
num++
|
||||
}
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.active = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_firing{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateFiring {
|
||||
num++
|
||||
}
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
return float64(e.samples)
|
||||
})
|
||||
return ar
|
||||
}
|
||||
|
||||
// Close unregisters rule metrics
|
||||
func (ar *AlertingRule) Close() {
|
||||
ar.metrics.active.Unregister()
|
||||
ar.metrics.pending.Unregister()
|
||||
ar.metrics.errors.Unregister()
|
||||
ar.metrics.samples.Unregister()
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
func (ar *AlertingRule) String() string {
|
||||
return ar.Name
|
||||
}
|
||||
|
||||
// ID returns unique Rule ID
|
||||
// within the parent Group.
|
||||
func (ar *AlertingRule) ID() uint64 {
|
||||
return ar.RuleID
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...interface{}) {
|
||||
if !ar.Debug {
|
||||
return
|
||||
}
|
||||
prefix := fmt.Sprintf("DEBUG rule %q:%q (%d) at %v: ",
|
||||
ar.GroupName, ar.Name, ar.RuleID, at.Format(time.RFC3339))
|
||||
|
||||
if a != nil {
|
||||
labelKeys := make([]string, len(a.Labels))
|
||||
var i int
|
||||
for k := range a.Labels {
|
||||
labelKeys[i] = k
|
||||
i++
|
||||
}
|
||||
sort.Strings(labelKeys)
|
||||
labels := make([]string, len(labelKeys))
|
||||
for i, l := range labelKeys {
|
||||
labels[i] = fmt.Sprintf("%s=%q", l, a.Labels[l])
|
||||
}
|
||||
labelsStr := strings.Join(labels, ",")
|
||||
prefix += fmt.Sprintf("alert %d {%s} ", a.ID, labelsStr)
|
||||
}
|
||||
msg := fmt.Sprintf(format, args...)
|
||||
logger.Infof("%s", prefix+msg)
|
||||
}
|
||||
|
||||
type labelSet struct {
|
||||
// origin labels extracted from received time series
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
// in case of conflicts, origin labels from time series preferred.
|
||||
// used for templating annotations
|
||||
origin map[string]string
|
||||
// processed labels includes origin labels
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
// in case of conflicts, extra labels are preferred.
|
||||
// used as labels attached to notifier.Alert and ALERTS series written to remote storage.
|
||||
processed map[string]string
|
||||
}
|
||||
|
||||
// toLabels converts labels from given Metric
|
||||
// to labelSet which contains original and processed labels.
|
||||
func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
||||
ls := &labelSet{
|
||||
origin: make(map[string]string),
|
||||
processed: make(map[string]string),
|
||||
}
|
||||
for _, l := range m.Labels {
|
||||
ls.origin[l.Name] = l.Value
|
||||
// drop __name__ to be consistent with Prometheus alerting
|
||||
if l.Name == "__name__" {
|
||||
continue
|
||||
}
|
||||
ls.processed[l.Name] = l.Value
|
||||
}
|
||||
|
||||
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
|
||||
Labels: ls.origin,
|
||||
Value: m.Values[0],
|
||||
Expr: ar.Expr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.processed[k] = v
|
||||
if _, ok := ls.origin[k]; !ok {
|
||||
ls.origin[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// set additional labels to identify group and rule name
|
||||
if ar.Name != "" {
|
||||
ls.processed[alertNameLabel] = ar.Name
|
||||
if _, ok := ls.origin[alertNameLabel]; !ok {
|
||||
ls.origin[alertNameLabel] = ar.Name
|
||||
}
|
||||
}
|
||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||
ls.processed[alertGroupNameLabel] = ar.GroupName
|
||||
if _, ok := ls.origin[alertGroupNameLabel]; !ok {
|
||||
ls.origin[alertGroupNameLabel] = ar.GroupName
|
||||
}
|
||||
}
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
// ExecRange executes alerting rule on the given time range similarly to Exec.
|
||||
// It doesn't update internal states of the Rule and meant to be used just
|
||||
// to get time series for backfilling.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
||||
func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
series, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var result []prompbmarshal.TimeSeries
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
|
||||
}
|
||||
for _, s := range series {
|
||||
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
}
|
||||
if ar.For == 0 { // if alert is instant
|
||||
a.State = notifier.StateFiring
|
||||
for i := range s.Values {
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// if alert with For > 0
|
||||
prevT := time.Time{}
|
||||
for i := range s.Values {
|
||||
at := time.Unix(s.Timestamps[i], 0)
|
||||
if at.Sub(prevT) > ar.EvalInterval {
|
||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = at
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For {
|
||||
a.State = notifier.StateFiring
|
||||
a.Start = at
|
||||
}
|
||||
prevT = at
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// resolvedRetention is the duration for which a resolved alert instance
|
||||
// is kept in memory state and consequently repeatedly sent to the AlertManager.
|
||||
const resolvedRetention = 15 * time.Minute
|
||||
|
||||
// Exec executes AlertingRule expression via the given Querier.
|
||||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
qMetrics, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(qMetrics),
|
||||
err: err,
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
}()
|
||||
|
||||
ar.alertsMu.Lock()
|
||||
defer ar.alertsMu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||
}
|
||||
|
||||
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.samples, curState.duration)
|
||||
|
||||
for h, a := range ar.alerts {
|
||||
// cleanup inactive alerts from previous Exec
|
||||
if a.State == notifier.StateInactive && ts.Sub(a.ResolvedAt) > resolvedRetention {
|
||||
ar.logDebugf(ts, a, "deleted as inactive")
|
||||
delete(ar.alerts, h)
|
||||
}
|
||||
}
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res, err
|
||||
}
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
curState.err = fmt.Errorf("failed to expand labels: %s", err)
|
||||
return nil, curState.err
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
if _, ok := updated[h]; ok {
|
||||
// duplicate may be caused by extra labels
|
||||
// conflicting with the metric labels
|
||||
curState.err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||
return nil, curState.err
|
||||
}
|
||||
updated[h] = struct{}{}
|
||||
if a, ok := ar.alerts[h]; ok {
|
||||
if a.State == notifier.StateInactive {
|
||||
// alert could be in inactive state for resolvedRetention
|
||||
// so when we again receive metrics for it - we switch it
|
||||
// back to notifier.StatePending
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = ts
|
||||
ar.logDebugf(ts, a, "INACTIVE => PENDING")
|
||||
}
|
||||
a.Value = m.Values[0]
|
||||
// re-exec template since Value or query can be used in annotations
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
a, err := ar.newAlert(m, ls, start, qFn)
|
||||
if err != nil {
|
||||
curState.err = fmt.Errorf("failed to create alert: %w", err)
|
||||
return nil, curState.err
|
||||
}
|
||||
a.ID = h
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = ts
|
||||
ar.alerts[h] = a
|
||||
ar.logDebugf(ts, a, "created in state PENDING")
|
||||
}
|
||||
var numActivePending int
|
||||
for h, a := range ar.alerts {
|
||||
// if alert wasn't updated in this iteration
|
||||
// means it is resolved already
|
||||
if _, ok := updated[h]; !ok {
|
||||
if a.State == notifier.StatePending {
|
||||
// alert was in Pending state - it is not
|
||||
// active anymore
|
||||
delete(ar.alerts, h)
|
||||
ar.logDebugf(ts, a, "PENDING => DELETED: is absent in current evaluation round")
|
||||
continue
|
||||
}
|
||||
if a.State == notifier.StateFiring {
|
||||
a.State = notifier.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
|
||||
}
|
||||
continue
|
||||
}
|
||||
numActivePending++
|
||||
if a.State == notifier.StatePending && ts.Sub(a.ActiveAt) >= ar.For {
|
||||
a.State = notifier.StateFiring
|
||||
a.Start = ts
|
||||
alertsFired.Inc()
|
||||
ar.logDebugf(ts, a, "PENDING => FIRING: %s since becoming active at %v", ts.Sub(a.ActiveAt), a.ActiveAt)
|
||||
}
|
||||
}
|
||||
if limit > 0 && numActivePending > limit {
|
||||
ar.alerts = map[uint64]*notifier.Alert{}
|
||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||
return nil, curState.err
|
||||
}
|
||||
return ar.toTimeSeries(ts.Unix()), nil
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateInactive {
|
||||
continue
|
||||
}
|
||||
ts := ar.alertToTimeSeries(a, timestamp)
|
||||
tss = append(tss, ts...)
|
||||
}
|
||||
return tss
|
||||
}
|
||||
|
||||
// UpdateWith copies all significant fields.
|
||||
// alerts state isn't copied since
|
||||
// it should be updated in next 2 Execs
|
||||
func (ar *AlertingRule) UpdateWith(r Rule) error {
|
||||
nr, ok := r.(*AlertingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
|
||||
}
|
||||
ar.Expr = nr.Expr
|
||||
ar.For = nr.For
|
||||
ar.Labels = nr.Labels
|
||||
ar.Annotations = nr.Annotations
|
||||
ar.EvalInterval = nr.EvalInterval
|
||||
ar.Debug = nr.Debug
|
||||
ar.q = nr.q
|
||||
ar.state = nr.state
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: consider hashing algorithm in VM
|
||||
func hash(labels map[string]string) uint64 {
|
||||
hash := fnv.New64a()
|
||||
keys := make([]string, 0, len(labels))
|
||||
for k := range labels {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
// drop __name__ to be consistent with Prometheus alerting
|
||||
if k == "__name__" {
|
||||
continue
|
||||
}
|
||||
name, value := k, labels[k]
|
||||
hash.Write([]byte(name))
|
||||
hash.Write([]byte(value))
|
||||
hash.Write([]byte("\xff"))
|
||||
}
|
||||
return hash.Sum64()
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.Time, qFn templates.QueryFn) (*notifier.Alert, error) {
|
||||
var err error
|
||||
if ls == nil {
|
||||
ls, err = ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
}
|
||||
a := ¬ifier.Alert{
|
||||
GroupID: ar.GroupID,
|
||||
Name: ar.Name,
|
||||
Labels: ls.processed,
|
||||
Value: m.Values[0],
|
||||
ActiveAt: start,
|
||||
Expr: ar.Expr,
|
||||
For: ar.For,
|
||||
}
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
|
||||
return a, err
|
||||
}
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
||||
func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
a, ok := ar.alerts[id]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return ar.newAlertAPI(*a)
|
||||
}
|
||||
|
||||
// ToAPI returns Rule representation in form of APIRule
|
||||
// Isn't thread-safe. Call must be protected by AlertingRule mutex.
|
||||
func (ar *AlertingRule) ToAPI() APIRule {
|
||||
lastState := ar.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "alerting",
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
MaxUpdates: ar.state.size(),
|
||||
Updates: ar.state.getAll(),
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
}
|
||||
if lastState.err != nil {
|
||||
r.LastError = lastState.err.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
// satisfy APIRule.State logic
|
||||
if len(r.Alerts) > 0 {
|
||||
r.State = notifier.StatePending.String()
|
||||
stateFiring := notifier.StateFiring.String()
|
||||
for _, a := range r.Alerts {
|
||||
if a.State == stateFiring {
|
||||
r.State = stateFiring
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// AlertsToAPI generates list of APIAlert objects from existing alerts
|
||||
func (ar *AlertingRule) AlertsToAPI() []*APIAlert {
|
||||
var alerts []*APIAlert
|
||||
ar.alertsMu.RLock()
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateInactive {
|
||||
continue
|
||||
}
|
||||
alerts = append(alerts, ar.newAlertAPI(*a))
|
||||
}
|
||||
ar.alertsMu.RUnlock()
|
||||
return alerts
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
|
||||
aa := &APIAlert{
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", a.ID),
|
||||
GroupID: fmt.Sprintf("%d", a.GroupID),
|
||||
RuleID: fmt.Sprintf("%d", ar.RuleID),
|
||||
|
||||
Name: a.Name,
|
||||
Expression: ar.Expr,
|
||||
Labels: a.Labels,
|
||||
Annotations: a.Annotations,
|
||||
State: a.State.String(),
|
||||
ActiveAt: a.ActiveAt,
|
||||
Restored: a.Restored,
|
||||
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
||||
}
|
||||
if alertURLGeneratorFn != nil {
|
||||
aa.SourceLink = alertURLGeneratorFn(a)
|
||||
}
|
||||
return aa
|
||||
}
|
||||
|
||||
const (
|
||||
// alertMetricName is the metric name for synthetic alert timeseries.
|
||||
alertMetricName = "ALERTS"
|
||||
// alertForStateMetricName is the metric name for 'for' state of alert.
|
||||
alertForStateMetricName = "ALERTS_FOR_STATE"
|
||||
|
||||
// alertNameLabel is the label name indicating the name of an alert.
|
||||
alertNameLabel = "alertname"
|
||||
// alertStateLabel is the label name indicating the state of an alert.
|
||||
alertStateLabel = "alertstate"
|
||||
|
||||
// alertGroupNameLabel defines the label name attached for generated time series.
|
||||
// attaching this label may be disabled via `-disableAlertgroupLabel` flag.
|
||||
alertGroupNameLabel = "alertgroup"
|
||||
)
|
||||
|
||||
// alertToTimeSeries converts the given alert with the given timestamp to time series
|
||||
func (ar *AlertingRule) alertToTimeSeries(a *notifier.Alert, timestamp int64) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
tss = append(tss, alertToTimeSeries(a, timestamp))
|
||||
if ar.For > 0 {
|
||||
tss = append(tss, alertForToTimeSeries(a, timestamp))
|
||||
}
|
||||
return tss
|
||||
}
|
||||
|
||||
func alertToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
|
||||
labels := make(map[string]string)
|
||||
for k, v := range a.Labels {
|
||||
labels[k] = v
|
||||
}
|
||||
labels["__name__"] = alertMetricName
|
||||
labels[alertStateLabel] = a.State.String()
|
||||
return newTimeSeries([]float64{1}, []int64{timestamp}, labels)
|
||||
}
|
||||
|
||||
// alertForToTimeSeries returns a timeseries that represents
|
||||
// state of active alerts, where value is time when alert become active
|
||||
func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
|
||||
labels := make(map[string]string)
|
||||
for k, v := range a.Labels {
|
||||
labels[k] = v
|
||||
}
|
||||
labels["__name__"] = alertForStateMetricName
|
||||
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
|
||||
}
|
||||
|
||||
// Restore restores the value of ActiveAt field for active alerts,
|
||||
// based on previously written time series `alertForStateMetricName`.
|
||||
// Only rules with For > 0 can be restored.
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||
if ar.For < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ar.alertsMu.Lock()
|
||||
defer ar.alertsMu.Unlock()
|
||||
|
||||
if len(ar.alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, a := range ar.alerts {
|
||||
if a.Restored || a.State != notifier.StatePending {
|
||||
continue
|
||||
}
|
||||
|
||||
var labelsFilter []string
|
||||
for k, v := range a.Labels {
|
||||
labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
|
||||
}
|
||||
sort.Strings(labelsFilter)
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
|
||||
alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
|
||||
|
||||
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
|
||||
|
||||
qMetrics, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(qMetrics) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
continue
|
||||
}
|
||||
|
||||
// only one series expected in response
|
||||
m := qMetrics[0]
|
||||
// __name__ supposed to be alertForStateMetricName
|
||||
m.DelLabel("__name__")
|
||||
|
||||
// we assume that restore query contains all label matchers,
|
||||
// so all received labels will match anyway if their number is equal.
|
||||
if len(m.Labels) != len(a.Labels) {
|
||||
ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
|
||||
a.Restored = true
|
||||
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// alertsToSend walks through the current alerts of AlertingRule
|
||||
// and returns only those which should be sent to notifier.
|
||||
// Isn't concurrent safe.
|
||||
func (ar *AlertingRule) alertsToSend(ts time.Time, resolveDuration, resendDelay time.Duration) []notifier.Alert {
|
||||
needsSending := func(a *notifier.Alert) bool {
|
||||
if a.State == notifier.StatePending {
|
||||
return false
|
||||
}
|
||||
if a.ResolvedAt.After(a.LastSent) {
|
||||
return true
|
||||
}
|
||||
return a.LastSent.Add(resendDelay).Before(ts)
|
||||
}
|
||||
|
||||
var alerts []notifier.Alert
|
||||
for _, a := range ar.alerts {
|
||||
if !needsSending(a) {
|
||||
continue
|
||||
}
|
||||
a.End = ts.Add(resolveDuration)
|
||||
if a.State == notifier.StateInactive {
|
||||
a.End = a.ResolvedAt
|
||||
}
|
||||
a.LastSent = ts
|
||||
alerts = append(alerts, *a)
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
980
app/vmalert/alerting_test.go
Normal file
980
app/vmalert/alerting_test.go
Normal file
@@ -0,0 +1,980 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestAlertingRule_ToTimeSeries(t *testing.T) {
|
||||
timestamp := time.Now()
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
alert *notifier.Alert
|
||||
expTS []prompbmarshal.TimeSeries
|
||||
}{
|
||||
{
|
||||
newTestAlertingRule("instant", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("instant extra labels", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("instant labels override", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
||||
alertStateLabel: "foo",
|
||||
"__name__": "bar",
|
||||
}},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for", time.Second),
|
||||
¬ifier.Alert{State: notifier.StateFiring, ActiveAt: timestamp.Add(time.Second)},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
}),
|
||||
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
|
||||
[]int64{timestamp.UnixNano()},
|
||||
map[string]string{
|
||||
"__name__": alertForStateMetricName,
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for pending", 10*time.Second),
|
||||
¬ifier.Alert{State: notifier.StatePending, ActiveAt: timestamp.Add(time.Second)},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StatePending.String(),
|
||||
}),
|
||||
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
|
||||
[]int64{timestamp.UnixNano()},
|
||||
map[string]string{
|
||||
"__name__": alertForStateMetricName,
|
||||
}),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
tc.rule.alerts[tc.alert.ID] = tc.alert
|
||||
tss := tc.rule.toTimeSeries(timestamp.Unix())
|
||||
if err := compareTimeSeries(t, tc.expTS, tss); err != nil {
|
||||
t.Fatalf("timeseries missmatch: %s", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_Exec(t *testing.T) {
|
||||
const defaultStep = 5 * time.Millisecond
|
||||
type testAlert struct {
|
||||
labels []string
|
||||
alert *notifier.Alert
|
||||
}
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
steps [][]datasource.Metric
|
||||
expAlerts []testAlert
|
||||
}{
|
||||
{
|
||||
newTestAlertingRule("empty", 0),
|
||||
[][]datasource.Metric{},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("empty labels", 0),
|
||||
[][]datasource.Metric{
|
||||
{datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}},
|
||||
},
|
||||
[]testAlert{
|
||||
{alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing=>inactive", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing=>inactive=>firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing=>inactive=>firing=>inactive", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>inactive", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
{},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{},
|
||||
{},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("multiple-firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{
|
||||
metricWithLabels(t, "name", "foo"),
|
||||
metricWithLabels(t, "name", "foo1"),
|
||||
metricWithLabels(t, "name", "foo2"),
|
||||
},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
{labels: []string{"name", "foo1"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
{labels: []string{"name", "foo2"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("multiple-steps-firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo1")},
|
||||
{metricWithLabels(t, "name", "foo2")},
|
||||
},
|
||||
// 1: fire first alert
|
||||
// 2: fire second alert, set first inactive
|
||||
// 3: fire third alert, set second inactive
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
||||
{labels: []string{"name", "foo1"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
||||
{labels: []string{"name", "foo2"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending", time.Minute),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-fired", defaultStep),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending=>empty", time.Second),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
// empty step to reset and delete pending alerts
|
||||
{},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending=>firing=>inactive", defaultStep),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
{},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending=>firing=>inactive=>pending", defaultStep),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
{},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending=>firing=>inactive=>pending=>firing", defaultStep),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
{},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
},
|
||||
[]testAlert{
|
||||
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
||||
},
|
||||
},
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.q = fq
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
for _, step := range tc.steps {
|
||||
fq.reset()
|
||||
fq.add(step...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
// artificial delay between applying steps
|
||||
time.Sleep(defaultStep)
|
||||
}
|
||||
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
||||
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
||||
}
|
||||
expAlerts := make(map[uint64]*notifier.Alert)
|
||||
for _, ta := range tc.expAlerts {
|
||||
labels := make(map[string]string)
|
||||
for i := 0; i < len(ta.labels); i += 2 {
|
||||
k, v := ta.labels[i], ta.labels[i+1]
|
||||
labels[k] = v
|
||||
}
|
||||
labels[alertNameLabel] = tc.rule.Name
|
||||
h := hash(labels)
|
||||
expAlerts[h] = ta.alert
|
||||
}
|
||||
for key, exp := range expAlerts {
|
||||
got, ok := tc.rule.alerts[key]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have key %d", key)
|
||||
}
|
||||
if got.State != exp.State {
|
||||
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
data []datasource.Metric
|
||||
expAlerts []*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestAlertingRule("empty", 0),
|
||||
[]datasource.Metric{},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("empty labels", 0),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1}, Timestamps: []int64{1}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing", 0),
|
||||
[]datasource.Metric{
|
||||
metricWithLabels(t, "name", "foo"),
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{
|
||||
Labels: map[string]string{"name": "foo"},
|
||||
State: notifier.StateFiring,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing-on-range", 0),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1e3, 2e3, 3e3}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending", time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-firing", 3*time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1, 1, 1}, Timestamps: []int64{1, 2, 5, 6, 20}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
||||
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
||||
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
|
||||
},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
//
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
}},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1}, Timestamps: []int64{1, 100}},
|
||||
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
||||
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
|
||||
},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"source": "vm",
|
||||
}},
|
||||
//
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"source": "vm",
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.q = fq
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
fq.add(tc.data...)
|
||||
gotTS, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
var expTS []prompbmarshal.TimeSeries
|
||||
var j int
|
||||
for _, series := range tc.data {
|
||||
for _, timestamp := range series.Timestamps {
|
||||
a := tc.expAlerts[j]
|
||||
if a.Labels == nil {
|
||||
a.Labels = make(map[string]string)
|
||||
}
|
||||
a.Labels[alertNameLabel] = tc.rule.Name
|
||||
expTS = append(expTS, tc.rule.alertToTimeSeries(a, timestamp)...)
|
||||
j++
|
||||
}
|
||||
}
|
||||
if len(gotTS) != len(expTS) {
|
||||
t.Fatalf("expected %d time series; got %d", len(expTS), len(gotTS))
|
||||
}
|
||||
for i := range expTS {
|
||||
got, exp := gotTS[i], expTS[i]
|
||||
if !reflect.DeepEqual(got, exp) {
|
||||
t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroup_Restore(t *testing.T) {
|
||||
defaultTS := time.Now()
|
||||
fqr := &fakeQuerierWithRegistry{}
|
||||
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
|
||||
t.Helper()
|
||||
defer fqr.reset()
|
||||
|
||||
for _, r := range rules {
|
||||
fqr.set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
||||
}
|
||||
|
||||
fg := newGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
nts := func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} }
|
||||
fg.start(context.Background(), nts, nil, fqr)
|
||||
wg.Done()
|
||||
}()
|
||||
fg.close()
|
||||
wg.Wait()
|
||||
|
||||
gotAlerts := make(map[uint64]*notifier.Alert)
|
||||
for _, rs := range fg.Rules {
|
||||
alerts := rs.(*AlertingRule).alerts
|
||||
for k, v := range alerts {
|
||||
if !v.Restored {
|
||||
// set not restored alerts to predictable timestamp
|
||||
v.ActiveAt = defaultTS
|
||||
}
|
||||
gotAlerts[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
if len(gotAlerts) != len(expAlerts) {
|
||||
t.Fatalf("expected %d alerts; got %d", len(expAlerts), len(gotAlerts))
|
||||
}
|
||||
for key, exp := range expAlerts {
|
||||
got, ok := gotAlerts[key]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have key %d", key)
|
||||
}
|
||||
if got.State != notifier.StatePending {
|
||||
t.Fatalf("expected state %d; got %d", notifier.StatePending, got.State)
|
||||
}
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stateMetric := func(name string, value time.Time, labels ...string) datasource.Metric {
|
||||
labels = append(labels, "__name__", alertForStateMetricName)
|
||||
labels = append(labels, alertNameLabel, name)
|
||||
labels = append(labels, alertGroupNameLabel, "TestRestore")
|
||||
return metricWithValueAndLabels(t, float64(value.Unix()), labels...)
|
||||
}
|
||||
|
||||
// one active alert, no previous state
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
fqr.reset()
|
||||
|
||||
// one active alert with state restore
|
||||
ts := time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, two with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("bar", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// one active alert but wrong state restore
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
||||
stateMetric("wrong alert", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with labels
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with restore labels missmatch
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
ar := newTestAlertingRule("test", 0)
|
||||
ar.Labels = map[string]string{"job": "test"}
|
||||
ar.q = fq
|
||||
|
||||
// successful attempt
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
_, err := ar.Exec(context.TODO(), time.Now(), 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// label `job` will collide with rule extra label and will make both time series equal
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
_, err = ar.Exec(context.TODO(), time.Now(), 0)
|
||||
if !errors.Is(err, errDuplicate) {
|
||||
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
||||
}
|
||||
|
||||
fq.reset()
|
||||
|
||||
expErr := "connection reset by peer"
|
||||
fq.setErr(errors.New(expErr))
|
||||
_, err = ar.Exec(context.TODO(), time.Now(), 0)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to get err; got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), expErr) {
|
||||
t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRuleLimit(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
ar := newTestAlertingRule("test", 0)
|
||||
ar.Labels = map[string]string{"job": "test"}
|
||||
ar.q = fq
|
||||
ar.For = time.Minute
|
||||
testCases := []struct {
|
||||
limit int
|
||||
err string
|
||||
tssNum int
|
||||
}{
|
||||
{
|
||||
limit: 0,
|
||||
tssNum: 4,
|
||||
},
|
||||
{
|
||||
limit: -1,
|
||||
tssNum: 4,
|
||||
},
|
||||
{
|
||||
limit: 1,
|
||||
err: "exec exceeded limit of 1 with 2 alerts",
|
||||
tssNum: 0,
|
||||
},
|
||||
{
|
||||
limit: 4,
|
||||
tssNum: 4,
|
||||
},
|
||||
}
|
||||
var (
|
||||
err error
|
||||
timestamp = time.Now()
|
||||
)
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
|
||||
for _, testCase := range testCases {
|
||||
_, err = ar.Exec(context.TODO(), timestamp, testCase.limit)
|
||||
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
fq.reset()
|
||||
}
|
||||
|
||||
func TestAlertingRule_Template(t *testing.T) {
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
metrics []datasource.Metric
|
||||
expAlerts map[uint64]*notifier.Alert
|
||||
}{
|
||||
{
|
||||
&AlertingRule{
|
||||
Name: "common",
|
||||
Labels: map[string]string{
|
||||
"region": "east",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `{{ $labels.alertname }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
||||
Annotations: map[string]string{
|
||||
"summary": `common: Too high connection number for "foo"`,
|
||||
},
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "common",
|
||||
"region": "east",
|
||||
"instance": "foo",
|
||||
},
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "bar"}): {
|
||||
Annotations: map[string]string{
|
||||
"summary": `common: Too high connection number for "bar"`,
|
||||
},
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "common",
|
||||
"region": "east",
|
||||
"instance": "bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
&AlertingRule{
|
||||
Name: "override label",
|
||||
Labels: map[string]string{
|
||||
"instance": "{{ $labels.instance }}",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `first: Too high connection number for "foo"`,
|
||||
"description": `override: It is 2 connections for "foo"`,
|
||||
},
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "bar"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"instance": "bar",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `second: Too high connection number for "bar"`,
|
||||
"description": `override: It is 10 connections for "bar"`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
&AlertingRule{
|
||||
Name: "OriginLabels",
|
||||
GroupName: "Testing",
|
||||
Labels: map[string]string{
|
||||
"instance": "{{ $labels.instance }}",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1,
|
||||
alertNameLabel, "originAlertname",
|
||||
alertGroupNameLabel, "originGroupname",
|
||||
"instance", "foo"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = newRuleState(10)
|
||||
fq.add(tc.metrics...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
for hash, expAlert := range tc.expAlerts {
|
||||
gotAlert := tc.rule.alerts[hash]
|
||||
if gotAlert == nil {
|
||||
t.Fatalf("alert %d is missing; labels: %v; annotations: %v",
|
||||
hash, expAlert.Labels, expAlert.Annotations)
|
||||
}
|
||||
if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
|
||||
t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
|
||||
}
|
||||
if !reflect.DeepEqual(expAlert.Labels, gotAlert.Labels) {
|
||||
t.Fatalf("expected to have labels %#v; got %#v", expAlert.Labels, gotAlert.Labels)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertsToSend(t *testing.T) {
|
||||
ts := time.Now()
|
||||
f := func(alerts, expAlerts []*notifier.Alert, resolveDuration, resendDelay time.Duration) {
|
||||
t.Helper()
|
||||
ar := &AlertingRule{alerts: make(map[uint64]*notifier.Alert)}
|
||||
for i, a := range alerts {
|
||||
ar.alerts[uint64(i)] = a
|
||||
}
|
||||
gotAlerts := ar.alertsToSend(ts, resolveDuration, resendDelay)
|
||||
if gotAlerts == nil && expAlerts == nil {
|
||||
return
|
||||
}
|
||||
if len(gotAlerts) != len(expAlerts) {
|
||||
t.Fatalf("expected to get %d alerts; got %d instead",
|
||||
len(expAlerts), len(gotAlerts))
|
||||
}
|
||||
sort.Slice(expAlerts, func(i, j int) bool {
|
||||
return expAlerts[i].Name < expAlerts[j].Name
|
||||
})
|
||||
sort.Slice(gotAlerts, func(i, j int) bool {
|
||||
return gotAlerts[i].Name < gotAlerts[j].Name
|
||||
})
|
||||
for i, exp := range expAlerts {
|
||||
got := gotAlerts[i]
|
||||
if got.LastSent != exp.LastSent {
|
||||
t.Fatalf("expected LastSent to be %v; got %v", exp.LastSent, got.LastSent)
|
||||
}
|
||||
if got.End != exp.End {
|
||||
t.Fatalf("expected End to be %v; got %v", exp.End, got.End)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f( // send firing alert with custom resolve time
|
||||
[]*notifier.Alert{{State: notifier.StateFiring}},
|
||||
[]*notifier.Alert{{LastSent: ts, End: ts.Add(5 * time.Minute)}},
|
||||
5*time.Minute, time.Minute,
|
||||
)
|
||||
f( // resolve inactive alert at the current timestamp
|
||||
[]*notifier.Alert{{State: notifier.StateInactive, ResolvedAt: ts}},
|
||||
[]*notifier.Alert{{LastSent: ts, End: ts}},
|
||||
time.Minute, time.Minute,
|
||||
)
|
||||
f( // mixed case of firing and resolved alerts. Names are added for deterministic sorting
|
||||
[]*notifier.Alert{{Name: "a", State: notifier.StateFiring}, {Name: "b", State: notifier.StateInactive, ResolvedAt: ts}},
|
||||
[]*notifier.Alert{{Name: "a", LastSent: ts, End: ts.Add(5 * time.Minute)}, {Name: "b", LastSent: ts, End: ts}},
|
||||
5*time.Minute, time.Minute,
|
||||
)
|
||||
f( // mixed case of pending and resolved alerts. Names are added for deterministic sorting
|
||||
[]*notifier.Alert{{Name: "a", State: notifier.StatePending}, {Name: "b", State: notifier.StateInactive, ResolvedAt: ts}},
|
||||
[]*notifier.Alert{{Name: "b", LastSent: ts, End: ts}},
|
||||
5*time.Minute, time.Minute,
|
||||
)
|
||||
f( // attempt to send alert that was already sent in the resendDelay interval
|
||||
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-time.Second)}},
|
||||
nil,
|
||||
time.Minute, time.Minute,
|
||||
)
|
||||
f( // attempt to send alert that was sent out of the resendDelay interval
|
||||
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-2 * time.Minute)}},
|
||||
[]*notifier.Alert{{LastSent: ts, End: ts.Add(time.Minute)}},
|
||||
time.Minute, time.Minute,
|
||||
)
|
||||
f( // alert must be sent even if resendDelay interval is 0
|
||||
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-time.Second)}},
|
||||
[]*notifier.Alert{{LastSent: ts, End: ts.Add(time.Minute)}},
|
||||
time.Minute, 0,
|
||||
)
|
||||
f( // inactive alert which has been sent already
|
||||
[]*notifier.Alert{{State: notifier.StateInactive, LastSent: ts.Add(-time.Second), ResolvedAt: ts.Add(-2 * time.Second)}},
|
||||
nil,
|
||||
time.Minute, time.Minute,
|
||||
)
|
||||
f( // inactive alert which has been resolved after last send
|
||||
[]*notifier.Alert{{State: notifier.StateInactive, LastSent: ts.Add(-time.Second), ResolvedAt: ts}},
|
||||
[]*notifier.Alert{{LastSent: ts, End: ts}},
|
||||
time.Minute, time.Minute,
|
||||
)
|
||||
}
|
||||
|
||||
func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
|
||||
r := newTestAlertingRule(name, 0)
|
||||
r.Labels = make(map[string]string)
|
||||
for i := 0; i < len(labels); i += 2 {
|
||||
r.Labels[labels[i]] = labels[i+1]
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||
return &AlertingRule{
|
||||
Name: name,
|
||||
For: waitFor,
|
||||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(10),
|
||||
}
|
||||
}
|
||||
@@ -1,39 +1,33 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/log"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
var defaultRuleType = flag.String("rule.defaultRuleType", "prometheus", `Default type for rule expressions, can be overridden via "type" parameter on the group level, see https://docs.victoriametrics.com/victoriametrics/vmalert/#groups. Supported values: "graphite", "prometheus" and "vlogs".`)
|
||||
|
||||
// Group contains list of Rules grouped into
|
||||
// entity with one name and evaluation interval
|
||||
type Group struct {
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutil.Duration `yaml:"interval,omitempty"`
|
||||
EvalOffset *promutil.Duration `yaml:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
|
||||
Limit int `yaml:"limit,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutils.Duration `yaml:"interval,omitempty"`
|
||||
Limit int `yaml:"limit,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
// Labels is a set of label value pairs, that will be added to every rule.
|
||||
// It has priority over the external labels.
|
||||
Labels map[string]string `yaml:"labels"`
|
||||
@@ -44,18 +38,13 @@ type Group struct {
|
||||
Params url.Values `yaml:"params"`
|
||||
// Headers contains optional HTTP headers added to each rule request
|
||||
Headers []Header `yaml:"headers,omitempty"`
|
||||
// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
|
||||
NotifierHeaders []Header `yaml:"notifier_headers,omitempty"`
|
||||
// EvalAlignment will make the timestamp of group query requests be aligned with interval
|
||||
EvalAlignment *bool `yaml:"eval_alignment,omitempty"`
|
||||
// Debug enables debug logs for the group
|
||||
Debug bool `yaml:"debug,omitempty"`
|
||||
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]any `yaml:",inline"`
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (g *Group) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type group Group
|
||||
if err := unmarshal((*group)(g)); err != nil {
|
||||
return err
|
||||
@@ -64,39 +53,22 @@ func (g *Group) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal group configuration for checksum: %w", err)
|
||||
}
|
||||
// change default value to prometheus datasource.
|
||||
if g.Type.Get() == "" {
|
||||
g.Type = NewRawType(*defaultRuleType)
|
||||
g.Type.Set(NewPrometheusType())
|
||||
}
|
||||
h := fnv.New64a()
|
||||
|
||||
h := md5.New()
|
||||
h.Write(b)
|
||||
g.Checksum = fmt.Sprintf("%x", h.Sum(nil))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate checks configuration errors for group and internal rules
|
||||
// Validate check for internal Group or Rule configuration errors
|
||||
func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool) error {
|
||||
if g.Name == "" {
|
||||
return fmt.Errorf("group name must be set")
|
||||
}
|
||||
if g.Interval.Duration() < 0 {
|
||||
return fmt.Errorf("interval shouldn't be lower than 0")
|
||||
}
|
||||
if g.EvalOffset.Duration() < 0 {
|
||||
return fmt.Errorf("eval_offset shouldn't be lower than 0")
|
||||
}
|
||||
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
|
||||
if g.EvalOffset.Duration() > g.Interval.Duration() {
|
||||
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
||||
}
|
||||
if g.EvalOffset != nil && g.EvalDelay != nil {
|
||||
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
||||
}
|
||||
if g.Limit < 0 {
|
||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
|
||||
}
|
||||
if g.Concurrency < 0 {
|
||||
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
||||
}
|
||||
|
||||
uniqueRules := map[uint64]struct{}{}
|
||||
for _, r := range g.Rules {
|
||||
@@ -105,26 +77,26 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
ruleName = r.Alert
|
||||
}
|
||||
if _, ok := uniqueRules[r.ID]; ok {
|
||||
return fmt.Errorf("%q is a duplicate in group", r.String())
|
||||
return fmt.Errorf("%q is a duplicate within the group %q", r.String(), g.Name)
|
||||
}
|
||||
uniqueRules[r.ID] = struct{}{}
|
||||
if err := r.Validate(); err != nil {
|
||||
return fmt.Errorf("invalid rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if validateExpressions {
|
||||
// its needed only for tests.
|
||||
// because correct types must be inherited after unmarshalling.
|
||||
exprValidator := g.Type.ValidateExpr
|
||||
if err := exprValidator(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
if validateTplFn != nil {
|
||||
if err := validateTplFn(r.Annotations); err != nil {
|
||||
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid annotations for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if err := validateTplFn(r.Labels); err != nil {
|
||||
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid labels for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -134,26 +106,24 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
// Rule describes entity that represent either
|
||||
// recording rule or alerting rule.
|
||||
type Rule struct {
|
||||
ID uint64
|
||||
Record string `yaml:"record,omitempty"`
|
||||
Alert string `yaml:"alert,omitempty"`
|
||||
Expr string `yaml:"expr"`
|
||||
For *promutil.Duration `yaml:"for,omitempty"`
|
||||
// Alert will continue firing for this long even when the alerting expression no longer has results.
|
||||
KeepFiringFor *promutil.Duration `yaml:"keep_firing_for,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
Debug *bool `yaml:"debug,omitempty"`
|
||||
ID uint64
|
||||
Record string `yaml:"record,omitempty"`
|
||||
Alert string `yaml:"alert,omitempty"`
|
||||
Expr string `yaml:"expr"`
|
||||
For *promutils.Duration `yaml:"for,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
Debug bool `yaml:"debug,omitempty"`
|
||||
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
|
||||
// Overrides `-rule.updateEntriesLimit`.
|
||||
UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"`
|
||||
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]any `yaml:",inline"`
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (r *Rule) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
func (r *Rule) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type rule Rule
|
||||
if err := unmarshal((*rule)(r)); err != nil {
|
||||
return err
|
||||
@@ -231,45 +201,21 @@ func (r *Rule) Validate() error {
|
||||
// ValidateTplFn must validate the given annotations
|
||||
type ValidateTplFn func(annotations map[string]string) error
|
||||
|
||||
// cLogger is a logger with support of logs suppressing.
|
||||
// it is used when logs emitted by config package needs
|
||||
// to be suppressed.
|
||||
var cLogger = &log.Logger{}
|
||||
|
||||
// ParseSilent parses rule configs from given file patterns without emitting logs
|
||||
func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
cLogger.Suppress(true)
|
||||
defer cLogger.Suppress(false)
|
||||
|
||||
files, err := ReadFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
}
|
||||
return parse(files, validateTplFn, validateExpressions)
|
||||
}
|
||||
|
||||
// Parse parses rule configs from given file patterns
|
||||
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
files, err := ReadFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
var fp []string
|
||||
for _, pattern := range pathPatterns {
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading file pattern %s: %w", pattern, err)
|
||||
}
|
||||
fp = append(fp, matches...)
|
||||
}
|
||||
groups, err := parse(files, validateTplFn, validateExpressions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %s: %w", pathPatterns, err)
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
}
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func parse(files map[string][]byte, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
errGroup := new(vmalertutil.ErrGroup)
|
||||
errGroup := new(utils.ErrGroup)
|
||||
var groups []Group
|
||||
for file, data := range files {
|
||||
for _, file := range fp {
|
||||
uniqueGroups := map[string]struct{}{}
|
||||
gr, err := parseConfig(data)
|
||||
gr, err := parseFile(file)
|
||||
if err != nil {
|
||||
errGroup.Add(fmt.Errorf("failed to parse file %q: %w", file, err))
|
||||
continue
|
||||
@@ -291,38 +237,34 @@ func parse(files map[string][]byte, validateTplFn ValidateTplFn, validateExpress
|
||||
if err := errGroup.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
logger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
}
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func parseConfig(data []byte) ([]Group, error) {
|
||||
data = envtemplate.ReplaceBytes(data)
|
||||
|
||||
var result []Group
|
||||
type cfgFile struct {
|
||||
func parseFile(path string) ([]Group, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading alert rule file %q: %w", path, err)
|
||||
}
|
||||
data, err = envtemplate.ReplaceBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot expand environment vars in %q: %w", path, err)
|
||||
}
|
||||
g := struct {
|
||||
Groups []Group `yaml:"groups"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]any `yaml:",inline"`
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}{}
|
||||
err = yaml.Unmarshal(data, &g)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
decoder := yaml.NewDecoder(bytes.NewReader(data))
|
||||
for {
|
||||
var cf cfgFile
|
||||
if err := decoder.Decode(&cf); err != nil {
|
||||
if err == io.EOF { // EOF indicates no more documents to read
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if err := checkOverflow(cf.XXX, "config"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result = append(result, cf.Groups...)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
return g.Groups, checkOverflow(g.XXX, "config")
|
||||
}
|
||||
|
||||
func checkOverflow(m map[string]any, ctx string) error {
|
||||
func checkOverflow(m map[string]interface{}, ctx string) error {
|
||||
if len(m) > 0 {
|
||||
var keys []string
|
||||
for k := range m {
|
||||
|
||||
@@ -1,495 +1,407 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
"gopkg.in/yaml.v2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if err := templates.Load([]string{"testdata/templates/*good.tmpl"}, url.URL{}); err != nil {
|
||||
if err := templates.Load([]string{"testdata/templates/*good.tmpl"}, true); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestParseFromURL(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/bad", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte("foo bar"))
|
||||
})
|
||||
mux.HandleFunc("/good-alert", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte(`
|
||||
groups:
|
||||
- name: TestGroup
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: vm_tcplistener_conns > 0`))
|
||||
})
|
||||
mux.HandleFunc("/good-rr", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte(`
|
||||
groups:
|
||||
- name: TestGroup
|
||||
rules:
|
||||
- record: conns
|
||||
expr: max(vm_tcplistener_conns)`))
|
||||
})
|
||||
mux.HandleFunc("/good-multi-doc", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte(`
|
||||
groups:
|
||||
- name: foo
|
||||
rules:
|
||||
- record: conns
|
||||
expr: max(vm_tcplistener_conns)
|
||||
---
|
||||
groups:
|
||||
- name: bar
|
||||
rules:
|
||||
- record: conns
|
||||
expr: max(vm_tcplistener_conns)`))
|
||||
})
|
||||
mux.HandleFunc("/bad-multi-doc", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte(`
|
||||
bad_field:
|
||||
- name: foo
|
||||
rules:
|
||||
- record: conns
|
||||
expr: max(vm_tcplistener_conns)
|
||||
---
|
||||
groups:
|
||||
- name: bar
|
||||
rules:
|
||||
- record: conns
|
||||
expr: max(vm_tcplistener_conns)`))
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
f := func(urls []string, expErr bool) {
|
||||
for i, u := range urls {
|
||||
urls[i] = srv.URL + u
|
||||
}
|
||||
_, err := Parse(urls, notifier.ValidateTemplates, true)
|
||||
if err != nil && !expErr {
|
||||
t.Fatalf("error parsing URLs %s", err)
|
||||
}
|
||||
if err == nil && expErr {
|
||||
t.Fatalf("expecting error parsing URLs but got none")
|
||||
}
|
||||
}
|
||||
|
||||
f([]string{"/good-alert", "/good-rr", "/good-multi-doc"}, false)
|
||||
f([]string{"/bad"}, true)
|
||||
f([]string{"/bad-multi-doc"}, true)
|
||||
f([]string{"/good-alert", "/bad"}, true)
|
||||
}
|
||||
|
||||
func TestParse_Success(t *testing.T) {
|
||||
_, err := Parse([]string{"testdata/rules/*good.rules", "testdata/dir/*good.*"}, notifier.ValidateTemplates, true)
|
||||
if err != nil {
|
||||
t.Fatalf("error parsing files %s", err)
|
||||
func TestParseGood(t *testing.T) {
|
||||
if _, err := Parse([]string{"testdata/rules/*good.rules", "testdata/dir/*good.*"}, notifier.ValidateTemplates, true); err != nil {
|
||||
t.Errorf("error parsing files %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParse_Failure(t *testing.T) {
|
||||
f := func(paths []string, errStrExpected string) {
|
||||
t.Helper()
|
||||
|
||||
_, err := Parse(paths, notifier.ValidateTemplates, true)
|
||||
func TestParseBad(t *testing.T) {
|
||||
testCases := []struct {
|
||||
path []string
|
||||
expErr string
|
||||
}{
|
||||
{
|
||||
[]string{"testdata/rules/rules0-bad.rules"},
|
||||
"unexpected token",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules0-bad.rules"},
|
||||
"error parsing annotation",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules1-bad.rules"},
|
||||
"duplicate in file",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules2-bad.rules"},
|
||||
"function \"unknown\" not defined",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules3-bad.rules"},
|
||||
"either `record` or `alert` must be set",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules4-bad.rules"},
|
||||
"either `record` or `alert` must be set",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/rules/rules1-bad.rules"},
|
||||
"bad graphite expr",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules6-bad.rules"},
|
||||
"missing ':' in header",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
_, err := Parse(tc.path, notifier.ValidateTemplates, true)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to get error")
|
||||
t.Errorf("expected to get error")
|
||||
return
|
||||
}
|
||||
if !strings.Contains(err.Error(), errStrExpected) {
|
||||
t.Fatalf("expected err to contain %q; got %q instead", errStrExpected, err)
|
||||
if !strings.Contains(err.Error(), tc.expErr) {
|
||||
t.Errorf("expected err to contain %q; got %q instead", tc.expErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
|
||||
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
|
||||
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
|
||||
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
|
||||
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
||||
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
||||
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
|
||||
f([]string{"testdata/rules/rules1-bad.rules"}, "bad graphite expr")
|
||||
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
|
||||
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
|
||||
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
|
||||
f([]string{"testdata/rules/rules-multi-doc-duplicates-bad.rules"}, "duplicate")
|
||||
f([]string{"http://unreachable-url"}, "failed to")
|
||||
}
|
||||
|
||||
func TestRuleValidate(t *testing.T) {
|
||||
func TestRule_Validate(t *testing.T) {
|
||||
if err := (&Rule{}).Validate(); err == nil {
|
||||
t.Fatalf("expected empty name error")
|
||||
t.Errorf("expected empty name error")
|
||||
}
|
||||
if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
|
||||
t.Fatalf("expected empty expr error")
|
||||
t.Errorf("expected empty expr error")
|
||||
}
|
||||
if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
|
||||
t.Fatalf("expected valid rule; got %s", err)
|
||||
t.Errorf("expected valid rule; got %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroupValidate_Failure(t *testing.T) {
|
||||
f := func(group *Group, validateExpressions bool, errStrExpected string) {
|
||||
t.Helper()
|
||||
|
||||
err := group.Validate(nil, validateExpressions)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
errStr := err.Error()
|
||||
if !strings.Contains(errStr, errStrExpected) {
|
||||
t.Fatalf("missing %q in the returned error %q", errStrExpected, errStr)
|
||||
}
|
||||
}
|
||||
|
||||
f(&Group{}, false, "group name must be set")
|
||||
|
||||
f(&Group{
|
||||
Name: "both record and alert are not set",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
},
|
||||
func TestGroup_Validate(t *testing.T) {
|
||||
testCases := []struct {
|
||||
group *Group
|
||||
rules []Rule
|
||||
validateAnnotations bool
|
||||
validateExpressions bool
|
||||
expErr string
|
||||
}{
|
||||
{
|
||||
group: &Group{},
|
||||
expErr: "group name must be set",
|
||||
},
|
||||
}, false, "invalid rule")
|
||||
|
||||
f(&Group{
|
||||
Name: "negative interval",
|
||||
Interval: promutil.NewDuration(-1),
|
||||
}, false, "interval shouldn't be lower than 0")
|
||||
|
||||
f(&Group{
|
||||
Name: "wrong eval_offset",
|
||||
Interval: promutil.NewDuration(time.Minute),
|
||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
|
||||
f(&Group{
|
||||
Name: "wrong limit",
|
||||
Limit: -1,
|
||||
}, false, "invalid limit")
|
||||
|
||||
f(&Group{
|
||||
Name: "wrong concurrency",
|
||||
Concurrency: -1,
|
||||
}, false, "invalid concurrency")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
},
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "record", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Record: "record", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test thanos",
|
||||
Type: NewRawType("thanos"),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, true, "unknown datasource type")
|
||||
|
||||
// validate expressions
|
||||
f(&Group{
|
||||
Name: "test prometheus expr",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
}, true, "bad prometheus expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test graphite expr",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "some-description",
|
||||
}},
|
||||
},
|
||||
}, true, "bad graphite expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs expr",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "stats count(*) as requests"},
|
||||
},
|
||||
}, true, "bad LogsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs expr",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "_time: 1m | stats by (path, _time: 1m) count(*) as requests"},
|
||||
},
|
||||
}, true, "bad LogsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test graphite with prometheus expr",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "r1",
|
||||
ID: 1,
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
{
|
||||
Record: "r2",
|
||||
ID: 2,
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
},
|
||||
},
|
||||
}, true, "bad graphite expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs with prometheus exp",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "r1",
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
},
|
||||
}, true, "bad LogsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test prometheus with vlogs exp",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "r1",
|
||||
Expr: "* | stats by (path) count()",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
},
|
||||
}, true, "bad prometheus expr")
|
||||
|
||||
}
|
||||
|
||||
func TestGroupValidate_Success(t *testing.T) {
|
||||
f := func(group *Group, validateAnnotations, validateExpressions bool) {
|
||||
t.Helper()
|
||||
|
||||
var validateTplFn ValidateTplFn
|
||||
if validateAnnotations {
|
||||
validateTplFn = notifier.ValidateTemplates
|
||||
}
|
||||
err := group.Validate(validateTplFn, validateExpressions)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
}, false, false)
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "",
|
||||
},
|
||||
}, false, false)
|
||||
|
||||
// validate annotations
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": `
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "invalid expression",
|
||||
validateExpressions: true,
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": `
|
||||
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" "localhost" | query }}
|
||||
{{ . | first | value | humanize1024 }}B
|
||||
{{ end }}`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
validateAnnotations: true,
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
},
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "duplicate",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
expErr: "duplicate",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "record", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Record: "record", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
expErr: "duplicate",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test thanos",
|
||||
Type: NewRawType("thanos"),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "unknown datasource type",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test graphite",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "some-description",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test prometheus",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "test graphite inherit",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
For: promutils.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, true, false)
|
||||
|
||||
// validate expressions
|
||||
f(&Group{
|
||||
Name: "test prometheus",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "test graphite prometheus bad expr",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
For: promutils.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "invalid rule",
|
||||
},
|
||||
}, false, true)
|
||||
f(&Group{
|
||||
Name: "test victorialogs",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: " _time: 1m | stats count(*) as requests", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, true)
|
||||
}
|
||||
|
||||
func TestHashRule_NotEqual(t *testing.T) {
|
||||
f := func(a, b Rule) {
|
||||
t.Helper()
|
||||
|
||||
aID, bID := HashRule(a), HashRule(b)
|
||||
if aID == bID {
|
||||
t.Fatalf("rule hashes mustn't be equal; got %d", aID)
|
||||
}
|
||||
}
|
||||
|
||||
f(Rule{Alert: "record", Expr: "up == 1"}, Rule{Record: "record", Expr: "up == 1"})
|
||||
|
||||
f(Rule{Record: "record", Expr: "up == 1"}, Rule{Record: "record", Expr: "up == 2"})
|
||||
|
||||
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"baz": "foo",
|
||||
"foo": "baz",
|
||||
}})
|
||||
|
||||
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"baz": "foo",
|
||||
}})
|
||||
|
||||
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}}, Rule{Alert: "alert", Expr: "up == 1"})
|
||||
}
|
||||
|
||||
func TestHashRule_Equal(t *testing.T) {
|
||||
f := func(a, b Rule) {
|
||||
t.Helper()
|
||||
|
||||
aID, bID := HashRule(a), HashRule(b)
|
||||
if aID != bID {
|
||||
t.Fatalf("rule hashes must be equal; got %d and %d", aID, bID)
|
||||
for _, tc := range testCases {
|
||||
var validateTplFn ValidateTplFn
|
||||
if tc.validateAnnotations {
|
||||
validateTplFn = notifier.ValidateTemplates
|
||||
}
|
||||
err := tc.group.Validate(validateTplFn, tc.validateExpressions)
|
||||
if err == nil {
|
||||
if tc.expErr != "" {
|
||||
t.Errorf("expected to get err %q; got nil insted", tc.expErr)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(err.Error(), tc.expErr) {
|
||||
t.Errorf("expected err to contain %q; got %q instead", tc.expErr, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f(Rule{Record: "record", Expr: "up == 1"}, Rule{Record: "record", Expr: "up == 1"})
|
||||
|
||||
f(Rule{Alert: "alert", Expr: "up == 1"}, Rule{Alert: "alert", Expr: "up == 1"})
|
||||
|
||||
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}})
|
||||
|
||||
f(Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}}, Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"baz": "foo",
|
||||
"foo": "bar",
|
||||
}})
|
||||
|
||||
f(Rule{Alert: "record", Expr: "up == 1"}, Rule{Alert: "record", Expr: "up == 1"})
|
||||
|
||||
f(Rule{
|
||||
Alert: "alert", Expr: "up == 1", For: promutil.NewDuration(time.Minute), KeepFiringFor: promutil.NewDuration(time.Minute),
|
||||
}, Rule{Alert: "alert", Expr: "up == 1"})
|
||||
func TestHashRule(t *testing.T) {
|
||||
testCases := []struct {
|
||||
a, b Rule
|
||||
equal bool
|
||||
}{
|
||||
{
|
||||
Rule{Record: "record", Expr: "up == 1"},
|
||||
Rule{Record: "record", Expr: "up == 1"},
|
||||
true,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1"},
|
||||
Rule{Alert: "alert", Expr: "up == 1"},
|
||||
true,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}},
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}},
|
||||
true,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}},
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"baz": "foo",
|
||||
"foo": "bar",
|
||||
}},
|
||||
true,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "record", Expr: "up == 1"},
|
||||
Rule{Alert: "record", Expr: "up == 1"},
|
||||
true,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1", For: promutils.NewDuration(time.Minute)},
|
||||
Rule{Alert: "alert", Expr: "up == 1"},
|
||||
true,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "record", Expr: "up == 1"},
|
||||
Rule{Record: "record", Expr: "up == 1"},
|
||||
false,
|
||||
},
|
||||
{
|
||||
Rule{Record: "record", Expr: "up == 1"},
|
||||
Rule{Record: "record", Expr: "up == 2"},
|
||||
false,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}},
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"baz": "foo",
|
||||
"foo": "baz",
|
||||
}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}},
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"baz": "foo",
|
||||
}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "foo",
|
||||
}},
|
||||
Rule{Alert: "alert", Expr: "up == 1"},
|
||||
false,
|
||||
},
|
||||
}
|
||||
for i, tc := range testCases {
|
||||
aID, bID := HashRule(tc.a), HashRule(tc.b)
|
||||
if tc.equal != (aID == bID) {
|
||||
t.Fatalf("missmatch for rule %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroupChecksum(t *testing.T) {
|
||||
@@ -626,24 +538,6 @@ rules:
|
||||
`)
|
||||
})
|
||||
|
||||
t.Run("`notifier_headers` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
notifier_headers:
|
||||
- "TenantID: foo"
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`, `
|
||||
name: TestGroup
|
||||
notifier_headers:
|
||||
- "TenantID: bar"
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`)
|
||||
})
|
||||
|
||||
t.Run("`debug` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/fslocal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/fsurl"
|
||||
)
|
||||
|
||||
// FS represent a file system abstract for reading files.
|
||||
type FS interface {
|
||||
// Init initializes FS.
|
||||
Init() error
|
||||
|
||||
// String must return human-readable representation of FS.
|
||||
String() string
|
||||
|
||||
// List returns the list of file names which will be read via Read fn
|
||||
List() ([]string, error)
|
||||
|
||||
// Read returns a list of read files in form of a map
|
||||
// where key is a file name and value is a content of read file.
|
||||
// Read must be called only after the successful Init call.
|
||||
Read(files []string) (map[string][]byte, error)
|
||||
}
|
||||
|
||||
var (
|
||||
fsRegistryMu sync.Mutex
|
||||
fsRegistry = make(map[string]FS)
|
||||
)
|
||||
|
||||
// ReadFromFS parses the given path list and inits FS for each item.
|
||||
// Once initialed, ReadFromFS will try to read and return files from each FS.
|
||||
// ReadFromFS returns an error if at least one FS failed to init.
|
||||
// The function can be called multiple times but each unique path
|
||||
// will be initialed only once.
|
||||
//
|
||||
// It is allowed to mix different FS types in path list.
|
||||
func ReadFromFS(paths []string) (map[string][]byte, error) {
|
||||
var err error
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range paths {
|
||||
fsRegistryMu.Lock()
|
||||
fs, ok := fsRegistry[path]
|
||||
if !ok {
|
||||
fs, err = newFS(path)
|
||||
if err != nil {
|
||||
fsRegistryMu.Unlock()
|
||||
return nil, fmt.Errorf("error while parsing path %q: %w", path, err)
|
||||
}
|
||||
if err := fs.Init(); err != nil {
|
||||
fsRegistryMu.Unlock()
|
||||
return nil, fmt.Errorf("error while initializing path %q: %w", path, err)
|
||||
}
|
||||
fsRegistry[path] = fs
|
||||
}
|
||||
fsRegistryMu.Unlock()
|
||||
|
||||
list, err := fs.List()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list files from %q", fs)
|
||||
}
|
||||
|
||||
cLogger.Infof("found %d files to read from %q", len(list), fs)
|
||||
|
||||
if len(list) < 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
ts := time.Now()
|
||||
files, err := fs.Read(list)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while reading files from %q: %w", fs, err)
|
||||
}
|
||||
cLogger.Infof("finished reading %d files in %v from %q", len(list), time.Since(ts), fs)
|
||||
|
||||
for k, v := range files {
|
||||
if _, ok := result[k]; ok {
|
||||
return nil, fmt.Errorf("duplicate found for file name %q: file names must be unique", k)
|
||||
}
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// newFS creates FS based on the give path.
|
||||
// Supported file systems are: fs
|
||||
func newFS(originPath string) (FS, error) {
|
||||
scheme := "fs"
|
||||
path := originPath
|
||||
n := strings.Index(path, "://")
|
||||
if n >= 0 {
|
||||
scheme = path[:n]
|
||||
path = path[n+len("://"):]
|
||||
}
|
||||
if len(path) == 0 {
|
||||
return nil, fmt.Errorf("path cannot be empty")
|
||||
}
|
||||
switch scheme {
|
||||
case "fs":
|
||||
return &fslocal.FS{Pattern: path}, nil
|
||||
case "http", "https":
|
||||
return &fsurl.FS{Path: originPath}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported scheme %q", scheme)
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewFS(t *testing.T) {
|
||||
f := func(path, expStr string) {
|
||||
t.Helper()
|
||||
fs, err := newFS(path)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
if fs.String() != expStr {
|
||||
t.Fatalf("expected FS %q; got %q", expStr, fs.String())
|
||||
}
|
||||
}
|
||||
|
||||
f("/foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
|
||||
f("fs:///foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
|
||||
}
|
||||
|
||||
func TestNewFSNegative(t *testing.T) {
|
||||
f := func(path, expErr string) {
|
||||
t.Helper()
|
||||
_, err := newFS(path)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to have err: %s", expErr)
|
||||
}
|
||||
if !strings.Contains(err.Error(), expErr) {
|
||||
t.Fatalf("expected to have err %q; got %q instead", expErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
f("", "path cannot be empty")
|
||||
f("fs://", "path cannot be empty")
|
||||
f("foobar://baz", `unsupported scheme "foobar"`)
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
package fslocal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
)
|
||||
|
||||
// FS represents a local file system
|
||||
type FS struct {
|
||||
// Pattern is used for matching one or multiple files.
|
||||
// The pattern may describe hierarchical names such as
|
||||
// /usr/*/bin/ed (assuming the Separator is '/').
|
||||
Pattern string
|
||||
}
|
||||
|
||||
// Init verifies that configured Pattern is correct
|
||||
func (fs *FS) Init() error {
|
||||
_, err := doublestar.FilepathGlob(fs.Pattern)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
func (fs *FS) String() string {
|
||||
return fmt.Sprintf("Local FS{MatchPattern: %q}", fs.Pattern)
|
||||
}
|
||||
|
||||
// List returns the list of file names which will be read via Read fn
|
||||
func (fs *FS) List() ([]string, error) {
|
||||
matches, err := doublestar.FilepathGlob(fs.Pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while matching files via pattern %s: %w", fs.Pattern, err)
|
||||
}
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
// Read returns a map of read files where
|
||||
// key is the file name and value is file's content.
|
||||
func (fs *FS) Read(files []string) (map[string][]byte, error) {
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range files {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while reading file %q: %w", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
package fsurl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
// FS represents a struct which can read content from URL Path
|
||||
type FS struct {
|
||||
// Path defines the URL to read the data from
|
||||
Path string
|
||||
}
|
||||
|
||||
// Init verifies that configured Path is correct
|
||||
func (fs *FS) Init() error {
|
||||
_, err := url.Parse(fs.Path)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
func (fs *FS) String() string {
|
||||
return fmt.Sprintf("URL {Path: %q}", fs.Path)
|
||||
}
|
||||
|
||||
// List returns the list of file names which will be read via Read fn
|
||||
// List isn't supported by FS and reads from Path only
|
||||
func (fs *FS) List() ([]string, error) {
|
||||
return []string{fs.Path}, nil
|
||||
}
|
||||
|
||||
// Read returns a map of read files where
|
||||
// key is the file name and value is file's content.
|
||||
func (fs *FS) Read(files []string) (map[string][]byte, error) {
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range files {
|
||||
resp, err := http.Get(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from %q: %w", path, err)
|
||||
}
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
if len(data) > 4*1024 {
|
||||
data = data[:4*1024]
|
||||
}
|
||||
return nil, fmt.Errorf("unexpected status code when fetching %q: %d, expecting %d; response: %q",
|
||||
path, resp.StatusCode, http.StatusOK, data)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read %q: %w", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
package log
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// Logger is using lib/logger for logging
|
||||
// but can be suppressed via Suppress method
|
||||
type Logger struct {
|
||||
mu sync.RWMutex
|
||||
disabled bool
|
||||
}
|
||||
|
||||
// Suppress whether to ignore message logging.
|
||||
// Once suppressed, logging continues to be ignored
|
||||
// until logger is un-suppressed.
|
||||
func (l *Logger) Suppress(v bool) {
|
||||
l.mu.Lock()
|
||||
l.disabled = v
|
||||
l.mu.Unlock()
|
||||
}
|
||||
|
||||
func (l *Logger) isDisabled() bool {
|
||||
l.mu.RLock()
|
||||
defer l.mu.RUnlock()
|
||||
return l.disabled
|
||||
}
|
||||
|
||||
// Errorf logs error message.
|
||||
func (l *Logger) Errorf(format string, args ...any) {
|
||||
if l.isDisabled() {
|
||||
return
|
||||
}
|
||||
logger.Errorf(format, args...)
|
||||
}
|
||||
|
||||
// Warnf logs warning message.
|
||||
func (l *Logger) Warnf(format string, args ...any) {
|
||||
if l.isDisabled() {
|
||||
return
|
||||
}
|
||||
logger.Warnf(format, args...)
|
||||
}
|
||||
|
||||
// Infof logs info message.
|
||||
func (l *Logger) Infof(format string, args ...any) {
|
||||
if l.isDisabled() {
|
||||
return
|
||||
}
|
||||
logger.Infof(format, args...)
|
||||
}
|
||||
|
||||
// Panicf logs panic message and panics.
|
||||
// Panicf can't be suppressed
|
||||
func (l *Logger) Panicf(format string, args ...any) {
|
||||
logger.Panicf(format, args...)
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
package log
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
func TestOutput(t *testing.T) {
|
||||
testOutput := &bytes.Buffer{}
|
||||
logger.SetOutputForTests(testOutput)
|
||||
defer logger.ResetOutputForTest()
|
||||
|
||||
log := &Logger{}
|
||||
|
||||
mustMatch := func(exp string) {
|
||||
t.Helper()
|
||||
|
||||
if exp == "" {
|
||||
if testOutput.String() != "" {
|
||||
t.Fatalf("expected output to be empty; got %q", testOutput.String())
|
||||
}
|
||||
}
|
||||
if !strings.Contains(testOutput.String(), exp) {
|
||||
t.Fatalf("output %q should contain %q", testOutput.String(), exp)
|
||||
}
|
||||
fmt.Println(testOutput.String())
|
||||
testOutput.Reset()
|
||||
}
|
||||
|
||||
log.Warnf("foo")
|
||||
mustMatch("foo")
|
||||
|
||||
log.Infof("info %d", 2)
|
||||
mustMatch("info 2")
|
||||
|
||||
log.Errorf("error %s %d", "baz", 5)
|
||||
mustMatch("error baz 5")
|
||||
|
||||
log.Suppress(true)
|
||||
|
||||
log.Warnf("foo")
|
||||
mustMatch("")
|
||||
|
||||
log.Infof("info %d", 2)
|
||||
mustMatch("")
|
||||
|
||||
log.Errorf("error %q %d", "baz", 5)
|
||||
mustMatch("")
|
||||
|
||||
}
|
||||
@@ -7,9 +7,9 @@ groups:
|
||||
labels:
|
||||
label: bar
|
||||
annotations:
|
||||
summary: "{{ }}"
|
||||
summary: "{{ $value }"
|
||||
description: "{{$labels}}"
|
||||
- alert: UnknownAnnotationsFunction
|
||||
- alert: UnkownAnnotationsFunction
|
||||
for: 5m
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
groups:
|
||||
- name: group
|
||||
rules:
|
||||
- alert: UnknownLabelFunction
|
||||
- alert: UnkownLabelFunction
|
||||
for: 5m
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
|
||||
@@ -1158,9 +1158,9 @@
|
||||
$labels.pod }}.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
||||
expr: |
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
|
||||
/
|
||||
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
|
||||
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)
|
||||
> ( 25 / 100 )
|
||||
for: 15m
|
||||
labels:
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
groups:
|
||||
- name: groupTest
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 1ms
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
invalid-field-1: invalid-value-1
|
||||
invalid-field-2: invalid-value-2
|
||||
---
|
||||
groups:
|
||||
- name: TestGroup
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
|
||||
for: 3m
|
||||
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
invalid-field-2: invalid-value-2
|
||||
invalid-field-3: invalid-value-3
|
||||
@@ -1,11 +0,0 @@
|
||||
groups:
|
||||
- name: foo
|
||||
rules:
|
||||
- alert: VMRows
|
||||
expr: vm_rows > 0
|
||||
---
|
||||
groups:
|
||||
- name: foo
|
||||
rules:
|
||||
- alert: VMRows
|
||||
expr: vm_rows > 0
|
||||
@@ -1,15 +0,0 @@
|
||||
|
||||
---
|
||||
groups:
|
||||
- name: groupTest
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 1ms
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
---
|
||||
groups:
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
groups:
|
||||
- name: groupTest
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 1ms
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
- name: groupTest-2
|
||||
rules:
|
||||
- alert: VMRows-2
|
||||
for: 1ms
|
||||
expr: vm_rows_2 > 0
|
||||
labels:
|
||||
label: bar2
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "\n markdown result is : \n---\n # header\n body: \n text \n----\n"
|
||||
---
|
||||
groups:
|
||||
- name: groupTest-3
|
||||
rules:
|
||||
- alert: VMRows-3
|
||||
for: 1ms
|
||||
expr: vm_rows_3 > 0
|
||||
labels:
|
||||
label: bar_3
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
- name: groupTest-4
|
||||
rules:
|
||||
- alert: VMRows-4
|
||||
for: 1ms
|
||||
expr: vm_rows_4 > 0
|
||||
labels:
|
||||
label: bar4
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
---
|
||||
groups:
|
||||
@@ -5,8 +5,6 @@ groups:
|
||||
limit: 1000
|
||||
headers:
|
||||
- "MyHeader: foo"
|
||||
notifier_headers:
|
||||
- "MyHeader: foo"
|
||||
params:
|
||||
denyPartialResponse: ["true"]
|
||||
rules:
|
||||
@@ -22,7 +20,6 @@ groups:
|
||||
{{ . | first | value }}
|
||||
{{ end }}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
link: http://localhost:3000/d/wNf0q_kZk?viewPanel=51&from={{($activeAt.Add (parseDurationTime "1h")).UnixMilli}}&to={{($activeAt.Add (parseDurationTime "-1h")).UnixMilli}}
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
update_entries_limit: -1
|
||||
expr: sum by(job)
|
||||
|
||||
@@ -15,7 +15,6 @@ groups:
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
eval_delay: 30s
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: sum(vm_tcplistener_conns) by (instance) > 1
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
groups:
|
||||
- name: groupTest
|
||||
## default interval is 1min, eval_offset shouldn't be greater than interval
|
||||
eval_offset: 2m
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 2s
|
||||
expr: sum(rate(vm_http_request_errors_total[2s])) > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
@@ -1,10 +0,0 @@
|
||||
groups:
|
||||
- name: InvalidStatsLogsql
|
||||
type: vlogs
|
||||
interval: 5m
|
||||
rules:
|
||||
- record: MissingFilter
|
||||
expr: 'stats count(*) as requests'
|
||||
- record: MissingStatsPipe
|
||||
expr: 'service: "nginx"'
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
groups:
|
||||
- name: RequestCount
|
||||
type: vlogs
|
||||
interval: 5m
|
||||
rules:
|
||||
- record: nginxRequestCount
|
||||
expr: 'env: "test" AND service: "nginx" | stats count(*) as requests'
|
||||
annotations:
|
||||
description: "Service nginx on env test accepted {{$labels.requests}} requests in the last 5 minutes"
|
||||
- record: prodRequestCount
|
||||
expr: 'env: "prod" | stats by (service) count(*) as requests'
|
||||
annotations:
|
||||
description: "Service {{$labels.service}} on env prod accepted {{$labels.requests}} requests in the last 5 minutes"
|
||||
- name: ServiceLog
|
||||
type: vlogs
|
||||
interval: 5m
|
||||
rules:
|
||||
- alert: HasErrorLog
|
||||
expr: 'env: "prod" AND status:~"error|warn" | stats by (service) count(*) as errorLog | filter errorLog:>0'
|
||||
annotations:
|
||||
description: "Service {{$labels.service}} generated {{$labels.errorLog}} error logs in the last 5 minutes"
|
||||
- name: ServiceRequest
|
||||
type: vlogs
|
||||
interval: 10m
|
||||
rules:
|
||||
- alert: TooManyFailedRequest
|
||||
expr: '* | extract "ip=<ip> " | extract "status_code=<code>;" | stats by (ip) count() if (code:!~200) as failed, count() as total| math failed / total as failed_percentage| filter failed_percentage :> 0.01 | fields ip,failed_percentage'
|
||||
annotations:
|
||||
description: "Connection from address {{$labels.ip}} has {{$value}} failed requests ratio in last 10 minutes"
|
||||
@@ -4,10 +4,8 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/graphiteql"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
// Type represents data source type
|
||||
@@ -29,13 +27,6 @@ func NewGraphiteType() Type {
|
||||
}
|
||||
}
|
||||
|
||||
// NewVLogsType returns victorialogs datasource type
|
||||
func NewVLogsType() Type {
|
||||
return Type{
|
||||
Name: "vlogs",
|
||||
}
|
||||
}
|
||||
|
||||
// NewRawType returns datasource type from raw string
|
||||
// without validation.
|
||||
func NewRawType(d string) Type {
|
||||
@@ -71,45 +62,32 @@ func (t *Type) ValidateExpr(expr string) error {
|
||||
if _, err := metricsql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
|
||||
}
|
||||
case "vlogs":
|
||||
q, err := logstorage.ParseStatsQuery(expr, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
fields, _ := q.GetStatsByFields()
|
||||
for i := range fields {
|
||||
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
|
||||
// making the result meaningless and may lead to cardinality issues.
|
||||
if fields[i] == "_time" {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||
}
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q", t.Name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SupportedType is true if given datasource type is supported
|
||||
func SupportedType(dsType string) bool {
|
||||
return dsType == "graphite" || dsType == "prometheus" || dsType == "vlogs"
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t *Type) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
func (t *Type) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
if !SupportedType(s) {
|
||||
return fmt.Errorf("unknown datasource type=%q, want prometheus, graphite or vlogs", s)
|
||||
if s == "" {
|
||||
s = "prometheus"
|
||||
}
|
||||
switch s {
|
||||
case "graphite", "prometheus":
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q, want %q or %q", s, "prometheus", "graphite")
|
||||
}
|
||||
t.Name = s
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t Type) MarshalYAML() (any, error) {
|
||||
func (t Type) MarshalYAML() (interface{}, error) {
|
||||
return t.Name, nil
|
||||
}
|
||||
|
||||
@@ -120,7 +98,7 @@ type Header struct {
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (h *Header) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
func (h *Header) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
|
||||
@@ -1,333 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
)
|
||||
|
||||
type datasourceType string
|
||||
|
||||
const (
|
||||
datasourcePrometheus datasourceType = "prometheus"
|
||||
datasourceGraphite datasourceType = "graphite"
|
||||
datasourceVLogs datasourceType = "vlogs"
|
||||
)
|
||||
|
||||
func toDatasourceType(s string) datasourceType {
|
||||
switch s {
|
||||
case string(datasourcePrometheus):
|
||||
return datasourcePrometheus
|
||||
case string(datasourceGraphite):
|
||||
return datasourceGraphite
|
||||
case string(datasourceVLogs):
|
||||
return datasourceVLogs
|
||||
default:
|
||||
logger.Panicf("BUG: unknown datasource type %q", s)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Client is a datasource entity for reading data,
|
||||
// supported clients are enumerated in datasourceType.
|
||||
// WARN: when adding a new field, remember to check if Clone() method needs to be updated.
|
||||
type Client struct {
|
||||
c *http.Client
|
||||
authCfg *promauth.Config
|
||||
datasourceURL string
|
||||
appendTypePrefix bool
|
||||
queryStep time.Duration
|
||||
dataSourceType datasourceType
|
||||
// ApplyIntervalAsTimeFilter is only valid for vlogs datasource.
|
||||
// Set to true if there is no [timeFilter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) in the rule expression,
|
||||
// and we will add evaluation interval as an additional timeFilter when querying.
|
||||
applyIntervalAsTimeFilter bool
|
||||
|
||||
// evaluationInterval will help setting request's `step` param,
|
||||
// or adding time filter for LogsQL expression.
|
||||
evaluationInterval time.Duration
|
||||
// extraParams contains params to be attached to each HTTP request
|
||||
extraParams url.Values
|
||||
// extraHeaders are headers to be attached to each HTTP request
|
||||
extraHeaders []keyValue
|
||||
|
||||
// whether to print additional log messages
|
||||
// for each sent request
|
||||
debug bool
|
||||
}
|
||||
|
||||
type keyValue struct {
|
||||
key string
|
||||
value string
|
||||
}
|
||||
|
||||
// Clone clones shared http client and other configuration to the new client.
|
||||
func (c *Client) Clone() *Client {
|
||||
ns := &Client{
|
||||
c: c.c,
|
||||
authCfg: c.authCfg,
|
||||
datasourceURL: c.datasourceURL,
|
||||
appendTypePrefix: c.appendTypePrefix,
|
||||
queryStep: c.queryStep,
|
||||
|
||||
dataSourceType: c.dataSourceType,
|
||||
evaluationInterval: c.evaluationInterval,
|
||||
|
||||
// init map so it can be populated below
|
||||
extraParams: url.Values{},
|
||||
|
||||
debug: c.debug,
|
||||
}
|
||||
if len(c.extraHeaders) > 0 {
|
||||
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
|
||||
copy(ns.extraHeaders, c.extraHeaders)
|
||||
}
|
||||
for k, v := range c.extraParams {
|
||||
ns.extraParams[k] = v
|
||||
}
|
||||
|
||||
return ns
|
||||
}
|
||||
|
||||
// ApplyParams - changes given querier params.
|
||||
func (c *Client) ApplyParams(params QuerierParams) *Client {
|
||||
if params.DataSourceType != "" {
|
||||
c.dataSourceType = toDatasourceType(params.DataSourceType)
|
||||
}
|
||||
c.evaluationInterval = params.EvaluationInterval
|
||||
c.applyIntervalAsTimeFilter = params.ApplyIntervalAsTimeFilter
|
||||
if params.QueryParams != nil {
|
||||
if c.extraParams == nil {
|
||||
c.extraParams = url.Values{}
|
||||
}
|
||||
for k, vl := range params.QueryParams {
|
||||
// custom query params are prior to default ones
|
||||
if c.extraParams.Has(k) {
|
||||
c.extraParams.Del(k)
|
||||
}
|
||||
for _, v := range vl {
|
||||
// don't use .Set() instead of Del/Add since it is allowed
|
||||
// for GET params to be duplicated
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
|
||||
c.extraParams.Add(k, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
if params.Headers != nil {
|
||||
for key, value := range params.Headers {
|
||||
kv := keyValue{key: key, value: value}
|
||||
c.extraHeaders = append(c.extraHeaders, kv)
|
||||
}
|
||||
}
|
||||
c.debug = params.Debug
|
||||
return c
|
||||
}
|
||||
|
||||
// BuildWithParams - implements interface.
|
||||
func (c *Client) BuildWithParams(params QuerierParams) Querier {
|
||||
return c.Clone().ApplyParams(params)
|
||||
}
|
||||
|
||||
// NewPrometheusClient returns a new prometheus datasource client.
|
||||
func NewPrometheusClient(baseURL string, authCfg *promauth.Config, appendTypePrefix bool, c *http.Client) *Client {
|
||||
return &Client{
|
||||
c: c,
|
||||
authCfg: authCfg,
|
||||
datasourceURL: strings.TrimSuffix(baseURL, "/"),
|
||||
appendTypePrefix: appendTypePrefix,
|
||||
queryStep: *queryStep,
|
||||
dataSourceType: datasourcePrometheus,
|
||||
extraParams: url.Values{},
|
||||
}
|
||||
}
|
||||
|
||||
// Query executes the given query and returns parsed response
|
||||
func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
|
||||
req, err := c.newQueryRequest(ctx, query, ts)
|
||||
if err != nil {
|
||||
return Result{}, nil, err
|
||||
}
|
||||
resp, err := c.do(req)
|
||||
if err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
|
||||
// Return unexpected error to the caller.
|
||||
return Result{}, nil, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = c.newQueryRequest(ctx, query, ts)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.do(req)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Process the received response.
|
||||
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
parseFn = parsePrometheusResponse
|
||||
case datasourceGraphite:
|
||||
parseFn = parseGraphiteResponse
|
||||
case datasourceVLogs:
|
||||
parseFn = parseVLogsResponse
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
|
||||
}
|
||||
result, err := parseFn(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return result, req, err
|
||||
}
|
||||
|
||||
// QueryRange executes the given query on the given time range.
|
||||
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
|
||||
// Graphite type isn't supported.
|
||||
func (c *Client) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
|
||||
if c.dataSourceType == datasourceGraphite {
|
||||
return res, fmt.Errorf("%q is not supported for QueryRange", c.dataSourceType)
|
||||
}
|
||||
// TODO: disable range query LogsQL with time filter now
|
||||
if c.dataSourceType == datasourceVLogs && !c.applyIntervalAsTimeFilter {
|
||||
return res, fmt.Errorf("range query is not supported for LogsQL expression %q because it contains time filter. Remove time filter from the expression and try again", query)
|
||||
}
|
||||
if start.IsZero() {
|
||||
return res, fmt.Errorf("start param is missing")
|
||||
}
|
||||
if end.IsZero() {
|
||||
return res, fmt.Errorf("end param is missing")
|
||||
}
|
||||
req, err := c.newQueryRangeRequest(ctx, query, start, end)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
resp, err := c.do(req)
|
||||
if err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
|
||||
// Return unexpected error to the caller.
|
||||
return res, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = c.newQueryRangeRequest(ctx, query, start, end)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.do(req)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Process the received response.
|
||||
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
parseFn = parsePrometheusResponse
|
||||
case datasourceVLogs:
|
||||
parseFn = parseVLogsResponse
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
|
||||
}
|
||||
res, err = parseFn(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return res, err
|
||||
}
|
||||
|
||||
func (c *Client) do(req *http.Request) (*http.Response, error) {
|
||||
ru := req.URL.Redacted()
|
||||
if *showDatasourceURL {
|
||||
ru = req.URL.String()
|
||||
}
|
||||
if c.debug {
|
||||
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
|
||||
}
|
||||
resp, err := c.c.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (c *Client) newQueryRangeRequest(ctx context.Context, query string, start, end time.Time) (*http.Request, error) {
|
||||
req, err := c.newRequest(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", c.datasourceURL, err)
|
||||
}
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
c.setPrometheusRangeReqParams(req, query, start, end)
|
||||
case datasourceVLogs:
|
||||
c.setVLogsRangeReqParams(req, query, start, end)
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to create range query request", c.dataSourceType)
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (c *Client) newQueryRequest(ctx context.Context, query string, ts time.Time) (*http.Request, error) {
|
||||
req, err := c.newRequest(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", c.datasourceURL, err)
|
||||
}
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
c.setPrometheusInstantReqParams(req, query, ts)
|
||||
case datasourceGraphite:
|
||||
c.setGraphiteReqParams(req, query)
|
||||
case datasourceVLogs:
|
||||
c.setVLogsInstantReqParams(req, query, ts)
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to create query request", c.dataSourceType)
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (c *Client) newRequest(ctx context.Context) (*http.Request, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.datasourceURL, nil)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.datasourceURL, err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.authCfg != nil {
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
for _, h := range c.extraHeaders {
|
||||
req.Header.Set(h.key, h.value)
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// setReqParams adds query and other extra params for the request.
|
||||
func (c *Client) setReqParams(r *http.Request, query string) {
|
||||
q := r.URL.Query()
|
||||
for k, vs := range c.extraParams {
|
||||
if q.Has(k) { // extraParams are prior to params in URL
|
||||
q.Del(k)
|
||||
}
|
||||
for _, v := range vs {
|
||||
q.Add(k, v)
|
||||
}
|
||||
}
|
||||
q.Set("query", query)
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type graphiteResponse []graphiteResponseTarget
|
||||
|
||||
type graphiteResponseTarget struct {
|
||||
Target string `json:"target"`
|
||||
Tags map[string]string `json:"tags"`
|
||||
DataPoints [][2]float64 `json:"datapoints"`
|
||||
}
|
||||
|
||||
func (r graphiteResponse) metrics() []Metric {
|
||||
var ms []Metric
|
||||
for _, res := range r {
|
||||
if len(res.DataPoints) < 1 {
|
||||
continue
|
||||
}
|
||||
var m Metric
|
||||
// add only last value to the result.
|
||||
last := res.DataPoints[len(res.DataPoints)-1]
|
||||
m.Values = append(m.Values, last[0])
|
||||
m.Timestamps = append(m.Timestamps, int64(last[1]))
|
||||
for k, v := range res.Tags {
|
||||
m.AddLabel(k, v)
|
||||
}
|
||||
ms = append(ms, m)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
|
||||
r := &graphiteResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
return Result{Data: r.metrics()}, nil
|
||||
}
|
||||
|
||||
const (
|
||||
graphitePath = "/render"
|
||||
graphitePrefix = "/graphite"
|
||||
)
|
||||
|
||||
func (c *Client) setGraphiteReqParams(r *http.Request, query string) {
|
||||
if c.appendTypePrefix {
|
||||
r.URL.Path += graphitePrefix
|
||||
}
|
||||
r.URL.Path += graphitePath
|
||||
q := r.URL.Query()
|
||||
from := "-5min"
|
||||
q.Set("from", from)
|
||||
q.Set("format", "json")
|
||||
q.Set("target", query)
|
||||
q.Set("until", "now")
|
||||
|
||||
for k, vs := range c.extraParams {
|
||||
if q.Has(k) { // extraParams are prior to params in URL
|
||||
q.Del(k)
|
||||
}
|
||||
for _, v := range vs {
|
||||
q.Add(k, v)
|
||||
}
|
||||
}
|
||||
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
@@ -1,265 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
|
||||
"github.com/valyala/fastjson"
|
||||
)
|
||||
|
||||
var (
|
||||
disablePathAppend = flag.Bool("remoteRead.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/query' or '/select/logsql/stats_query' path "+
|
||||
"to the configured -datasource.url and -remoteRead.url")
|
||||
disableStepParam = flag.Bool("datasource.disableStepParam", false, "Whether to disable adding 'step' param in instant queries to the configured -datasource.url and -remoteRead.url. "+
|
||||
"Only valid for prometheus datasource. "+
|
||||
"This might be useful when using vmalert with datasources that do not support 'step' param for instant queries, like Google Managed Prometheus. "+
|
||||
"It is not recommended to enable this flag if you use vmalert with VictoriaMetrics.")
|
||||
)
|
||||
|
||||
type promResponse struct {
|
||||
Status string `json:"status"`
|
||||
ErrorType string `json:"errorType"`
|
||||
Error string `json:"error"`
|
||||
Data struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
} `json:"data"`
|
||||
// Stats supported by VictoriaMetrics since v1.90
|
||||
Stats struct {
|
||||
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
||||
} `json:"stats,omitempty"`
|
||||
// IsPartial supported by VictoriaMetrics
|
||||
IsPartial *bool `json:"isPartial,omitempty"`
|
||||
}
|
||||
|
||||
// see https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
|
||||
type promInstant struct {
|
||||
// ms is populated after Unmarshal call
|
||||
ms []Metric
|
||||
}
|
||||
|
||||
// metrics returned parsed Metric slice
|
||||
// Must be called only after Unmarshal
|
||||
func (pi *promInstant) metrics() ([]Metric, error) {
|
||||
return pi.ms, nil
|
||||
}
|
||||
|
||||
var jsonParserPool fastjson.ParserPool
|
||||
|
||||
// Unmarshal unmarshals the given byte slice into promInstant
|
||||
// It is using fastjson to reduce number of allocations compared to
|
||||
// standard json.Unmarshal function.
|
||||
// Response example:
|
||||
//
|
||||
// [{"metric":{"__name__":"up","job":"prometheus"},value": [ 1435781451.781,"1"]},
|
||||
// {"metric":{"__name__":"up","job":"node"},value": [ 1435781451.781,"0"]}]
|
||||
func (pi *promInstant) Unmarshal(b []byte) error {
|
||||
var metrics []json.RawMessage
|
||||
// metrics slice could be large, so parsing it with fastjson could consume a lot of memory.
|
||||
// We parse the slice with standard lib to keep mem usage low.
|
||||
// And each metric object will be parsed with fastjson to reduce allocations.
|
||||
if err := json.Unmarshal(b, &metrics); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal metrics: %w", err)
|
||||
}
|
||||
|
||||
p := jsonParserPool.Get()
|
||||
defer jsonParserPool.Put(p)
|
||||
|
||||
pi.ms = make([]Metric, len(metrics))
|
||||
for i, data := range metrics {
|
||||
row, err := p.ParseBytes(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse metric object: %w", err)
|
||||
}
|
||||
metric := row.Get("metric")
|
||||
if metric == nil {
|
||||
return fmt.Errorf("can't find `metric` object in %q", row)
|
||||
}
|
||||
labels := metric.GetObject()
|
||||
|
||||
r := &pi.ms[i]
|
||||
r.Labels = make([]prompb.Label, 0, labels.Len())
|
||||
labels.Visit(func(key []byte, v *fastjson.Value) {
|
||||
lv, errLocal := v.StringBytes()
|
||||
if errLocal != nil {
|
||||
err = fmt.Errorf("error when parsing label value %q: %s", v, errLocal)
|
||||
return
|
||||
}
|
||||
r.Labels = append(r.Labels, prompb.Label{
|
||||
Name: string(key),
|
||||
Value: string(lv),
|
||||
})
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("error when parsing `metric` object in %q: %w", row, err)
|
||||
}
|
||||
|
||||
value := row.Get("value")
|
||||
if value == nil {
|
||||
return fmt.Errorf("can't find `value` object in %q", row)
|
||||
}
|
||||
sample := value.GetArray()
|
||||
if len(sample) != 2 {
|
||||
return fmt.Errorf("object `value` in %q should contain 2 values, but contains %d instead", row, len(sample))
|
||||
}
|
||||
r.Timestamps = []int64{sample[0].GetInt64()}
|
||||
val, err := sample[1].StringBytes()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error when parsing `value` object %q: %s", sample[1], err)
|
||||
}
|
||||
f, err := strconv.ParseFloat(bytesutil.ToUnsafeString(val), 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error when parsing float64 from %s in %q: %w", sample[1], row, err)
|
||||
}
|
||||
r.Values = []float64{f}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type promRange struct {
|
||||
Result []struct {
|
||||
Labels map[string]string `json:"metric"`
|
||||
TVs [][2]any `json:"values"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
func (r promRange) metrics() ([]Metric, error) {
|
||||
var result []Metric
|
||||
for i, res := range r.Result {
|
||||
var m Metric
|
||||
for _, tv := range res.TVs {
|
||||
f, err := strconv.ParseFloat(tv[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, tv[1], err)
|
||||
}
|
||||
m.Values = append(m.Values, f)
|
||||
m.Timestamps = append(m.Timestamps, int64(tv[0].(float64)))
|
||||
}
|
||||
if len(m.Values) < 1 || len(m.Timestamps) < 1 {
|
||||
return nil, fmt.Errorf("metric %v contains no values", res)
|
||||
}
|
||||
m.Labels = nil
|
||||
for k, v := range r.Result[i].Labels {
|
||||
m.AddLabel(k, v)
|
||||
}
|
||||
result = append(result, m)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type promScalar [2]any
|
||||
|
||||
func (r promScalar) metrics() ([]Metric, error) {
|
||||
var m Metric
|
||||
f, err := strconv.ParseFloat(r[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", r, r[1], err)
|
||||
}
|
||||
m.Values = append(m.Values, f)
|
||||
m.Timestamps = append(m.Timestamps, int64(r[0].(float64)))
|
||||
return []Metric{m}, nil
|
||||
}
|
||||
|
||||
const (
|
||||
statusSuccess, statusError = "success", "error"
|
||||
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
||||
)
|
||||
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||
r := &promResponse{}
|
||||
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
|
||||
}
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
var pi promInstant
|
||||
if err := pi.Unmarshal(r.Data.Result); err != nil {
|
||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
var pr promRange
|
||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||
return res, err
|
||||
}
|
||||
parseFn = pr.metrics
|
||||
case rScalar:
|
||||
var ps promScalar
|
||||
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
||||
return res, err
|
||||
}
|
||||
parseFn = ps.metrics
|
||||
default:
|
||||
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||
}
|
||||
|
||||
ms, err := parseFn()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res = Result{Data: ms, IsPartial: r.IsPartial}
|
||||
if r.Stats.SeriesFetched != nil {
|
||||
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
|
||||
}
|
||||
res.SeriesFetched = &intV
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||
if c.appendTypePrefix {
|
||||
r.URL.Path += "/prometheus"
|
||||
}
|
||||
if !*disablePathAppend {
|
||||
r.URL.Path += "/api/v1/query"
|
||||
}
|
||||
q := r.URL.Query()
|
||||
q.Set("time", timestamp.Format(time.RFC3339))
|
||||
if !*disableStepParam && c.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(c.evaluationInterval.Seconds())))
|
||||
}
|
||||
if !*disableStepParam && c.queryStep > 0 { // override step with user-specified value
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(c.queryStep.Seconds())))
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
c.setReqParams(r, query)
|
||||
}
|
||||
|
||||
func (c *Client) setPrometheusRangeReqParams(r *http.Request, query string, start, end time.Time) {
|
||||
if c.appendTypePrefix {
|
||||
r.URL.Path += "/prometheus"
|
||||
}
|
||||
if !*disablePathAppend {
|
||||
r.URL.Path += "/api/v1/query_range"
|
||||
}
|
||||
q := r.URL.Query()
|
||||
q.Add("start", start.Format(time.RFC3339))
|
||||
q.Add("end", end.Format(time.RFC3339))
|
||||
if c.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(c.evaluationInterval.Seconds())))
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
c.setReqParams(r, query)
|
||||
}
|
||||
@@ -1,844 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
var (
|
||||
ctx = context.Background()
|
||||
basicAuthName = "foo"
|
||||
basicAuthPass = "bar"
|
||||
baCfg = &promauth.BasicAuthConfig{
|
||||
Username: basicAuthName,
|
||||
Password: promauth.NewSecret(basicAuthPass),
|
||||
}
|
||||
vmQuery = "vm_rows"
|
||||
queryRender = "constantLine(10)"
|
||||
vlogsQuery = "_time: 5m | stats by (foo) count() total"
|
||||
vlogsRangeQuery = "* | stats by (foo) count() total"
|
||||
)
|
||||
|
||||
func TestVMInstantQuery(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Fatalf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
t.Fatalf("expected POST method got %s", r.Method)
|
||||
}
|
||||
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
|
||||
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
|
||||
}
|
||||
if r.URL.Query().Get("query") != vmQuery {
|
||||
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
|
||||
}
|
||||
timeParam := r.URL.Query().Get("time")
|
||||
if timeParam == "" {
|
||||
t.Fatalf("expected 'time' in query param, got nil instead")
|
||||
}
|
||||
if _, err := time.Parse(time.RFC3339, timeParam); err != nil {
|
||||
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
w.WriteHeader(500)
|
||||
case 1:
|
||||
w.Write([]byte("[]"))
|
||||
case 2:
|
||||
w.Write([]byte(`{"status":"error", "errorType":"type:", "error":"some error msg"}`))
|
||||
case 3:
|
||||
w.Write([]byte(`{"status":"unknown"}`))
|
||||
case 4:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
|
||||
case 5:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
case 6:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
case 7:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||
case 8:
|
||||
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 9:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/select/logsql/stats_query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
t.Fatalf("expected POST method got %s", r.Method)
|
||||
}
|
||||
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
|
||||
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
|
||||
}
|
||||
if r.URL.Query().Get("query") != vlogsQuery {
|
||||
t.Fatalf("expected %s in query param, got %s", vlogsQuery, r.URL.Query().Get("query"))
|
||||
}
|
||||
timeParam := r.URL.Query().Get("time")
|
||||
if timeParam == "" {
|
||||
t.Fatalf("expected 'time' in query param, got nil instead")
|
||||
}
|
||||
if _, err := time.Parse(time.RFC3339, timeParam); err != nil {
|
||||
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
||||
}
|
||||
switch c {
|
||||
case 10:
|
||||
w.Write([]byte("[]"))
|
||||
case 11:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
}
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
authCfg, err := baCfg.NewConfig(".")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected: %s", err)
|
||||
}
|
||||
s := NewPrometheusClient(srv.URL, authCfg, false, srv.Client())
|
||||
|
||||
p := datasourcePrometheus
|
||||
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(p), EvaluationInterval: 15 * time.Second})
|
||||
ts := time.Now()
|
||||
|
||||
expErr := func(query, err string) {
|
||||
_, _, gotErr := pq.Query(ctx, query, ts)
|
||||
if gotErr == nil {
|
||||
t.Fatalf("expected %q got nil", err)
|
||||
}
|
||||
if !strings.Contains(gotErr.Error(), err) {
|
||||
t.Fatalf("expected err %q; got %q", err, gotErr)
|
||||
}
|
||||
}
|
||||
|
||||
expErr(vmQuery, "500") // 0
|
||||
expErr(vmQuery, "error parsing response") // 1
|
||||
expErr(vmQuery, "response error") // 2
|
||||
expErr(vmQuery, "unknown status") // 3
|
||||
expErr(vmQuery, "unexpected end of JSON input") // 4
|
||||
|
||||
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(res.Data) != 2 {
|
||||
t.Fatalf("expected 2 metrics got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
expected := []Metric{
|
||||
{
|
||||
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}, {Value: "bar", Name: "foo"}},
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{13763},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{{Value: "vm_requests", Name: "__name__"}, {Value: "baz", Name: "foo"}},
|
||||
Timestamps: []int64{1583786140},
|
||||
Values: []float64{2000},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, res.Data, expected)
|
||||
|
||||
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if req == nil {
|
||||
t.Fatalf("expected request to be non-nil")
|
||||
}
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
expected = []Metric{
|
||||
{
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{1},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(res.Data, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", res.Data, expected)
|
||||
}
|
||||
|
||||
if res.SeriesFetched != nil {
|
||||
t.Fatalf("expected `seriesFetched` field to be nil when it is missing in datasource response; got %v instead",
|
||||
res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(res.Data), res)
|
||||
}
|
||||
expected = []Metric{
|
||||
{
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{1},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(res.Data, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", res.Data, expected)
|
||||
}
|
||||
if *res.SeriesFetched != 42 {
|
||||
t.Fatalf("expected `seriesFetched` field to be 42; got %d instead",
|
||||
*res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if res.IsPartial != nil && !*res.IsPartial {
|
||||
t.Fatalf("unexpected metric isPartial want %+v", true)
|
||||
}
|
||||
|
||||
// test graphite
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
exp := []Metric{
|
||||
{
|
||||
Labels: []prompb.Label{{Value: "constantLine(10)", Name: "name"}},
|
||||
Timestamps: []int64{1611758403},
|
||||
Values: []float64{10},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, res.Data, exp)
|
||||
|
||||
// test victorialogs
|
||||
vlogs := datasourceVLogs
|
||||
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
|
||||
|
||||
expErr(vlogsQuery, "error parsing response") // 10
|
||||
|
||||
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(res.Data) != 2 {
|
||||
t.Fatalf("expected 2 metrics got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
expected = []Metric{
|
||||
{
|
||||
Labels: []prompb.Label{{Value: "total", Name: "stats_result"}, {Value: "bar", Name: "foo"}},
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{13763},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{{Value: "total", Name: "stats_result"}, {Value: "baz", Name: "foo"}},
|
||||
Timestamps: []int64{1583786140},
|
||||
Values: []float64{2000},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, res.Data, expected)
|
||||
}
|
||||
|
||||
func TestVMInstantQueryWithRetry(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Fatalf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.URL.Query().Get("query") != vmQuery {
|
||||
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
case 1:
|
||||
conn, _, _ := w.(http.Hijacker).Hijack()
|
||||
_ = conn.Close()
|
||||
case 2:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "2"]}}`))
|
||||
case 3:
|
||||
conn, _, _ := w.(http.Hijacker).Hijack()
|
||||
_ = conn.Close()
|
||||
case 4:
|
||||
conn, _, _ := w.(http.Hijacker).Hijack()
|
||||
_ = conn.Close()
|
||||
}
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
s := NewPrometheusClient(srv.URL, nil, false, srv.Client())
|
||||
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus)})
|
||||
|
||||
expErr := func(err string) {
|
||||
_, _, gotErr := pq.Query(ctx, vmQuery, time.Now())
|
||||
if gotErr == nil {
|
||||
t.Fatalf("expected %q got nil", err)
|
||||
}
|
||||
if !strings.Contains(gotErr.Error(), err) {
|
||||
t.Fatalf("expected err %q; got %q", err, gotErr)
|
||||
}
|
||||
}
|
||||
|
||||
expValue := func(v float64) {
|
||||
res, _, err := pq.Query(ctx, vmQuery, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(m), m)
|
||||
}
|
||||
expected := []Metric{
|
||||
{
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{v},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(m, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m, expected)
|
||||
}
|
||||
}
|
||||
|
||||
expValue(1) // 0
|
||||
expValue(2) // 1 - fail, 2 - retry
|
||||
expErr("EOF") // 3, 4 - retries
|
||||
}
|
||||
|
||||
func metricsEqual(t *testing.T, gotM, expectedM []Metric) {
|
||||
for i, exp := range expectedM {
|
||||
got := gotM[i]
|
||||
gotTS, expTS := got.Timestamps, exp.Timestamps
|
||||
if !reflect.DeepEqual(gotTS, expTS) {
|
||||
t.Fatalf("unexpected timestamps %+v want %+v", gotTS, expTS)
|
||||
}
|
||||
gotV, expV := got.Values, exp.Values
|
||||
if !reflect.DeepEqual(gotV, expV) {
|
||||
t.Fatalf("unexpected values %+v want %+v", gotV, expV)
|
||||
}
|
||||
sort.Slice(got.Labels, func(i, j int) bool {
|
||||
return got.Labels[i].Name < got.Labels[j].Name
|
||||
})
|
||||
sort.Slice(exp.Labels, func(i, j int) bool {
|
||||
return exp.Labels[i].Name < exp.Labels[j].Name
|
||||
})
|
||||
if !reflect.DeepEqual(exp.Labels, got.Labels) {
|
||||
t.Fatalf("unexpected labels %+v want %+v", got.Labels, exp.Labels)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVMRangeQuery(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Fatalf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/api/v1/query_range", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
t.Fatalf("expected POST method got %s", r.Method)
|
||||
}
|
||||
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
|
||||
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
|
||||
}
|
||||
if r.URL.Query().Get("query") != vmQuery {
|
||||
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
|
||||
}
|
||||
startTS := r.URL.Query().Get("start")
|
||||
if startTS == "" {
|
||||
t.Fatalf("expected 'start' in query param, got nil instead")
|
||||
}
|
||||
if _, err := time.Parse(time.RFC3339, startTS); err != nil {
|
||||
t.Fatalf("failed to parse 'start' query param: %s", err)
|
||||
}
|
||||
endTS := r.URL.Query().Get("end")
|
||||
if endTS == "" {
|
||||
t.Fatalf("expected 'end' in query param, got nil instead")
|
||||
}
|
||||
if _, err := time.Parse(time.RFC3339, endTS); err != nil {
|
||||
t.Fatalf("failed to parse 'end' query param: %s", err)
|
||||
}
|
||||
step := r.URL.Query().Get("step")
|
||||
if step != "15s" {
|
||||
t.Fatalf("expected 'step' query param to be 15s; got %q instead", step)
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
t.Fatalf("expected POST method got %s", r.Method)
|
||||
}
|
||||
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
|
||||
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
|
||||
}
|
||||
if r.URL.Query().Get("query") != vlogsRangeQuery {
|
||||
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
|
||||
}
|
||||
startTS := r.URL.Query().Get("start")
|
||||
if startTS == "" {
|
||||
t.Fatalf("expected 'start' in query param, got nil instead")
|
||||
}
|
||||
if _, err := time.Parse(time.RFC3339, startTS); err != nil {
|
||||
t.Fatalf("failed to parse 'start' query param: %s", err)
|
||||
}
|
||||
endTS := r.URL.Query().Get("end")
|
||||
if endTS == "" {
|
||||
t.Fatalf("expected 'end' in query param, got nil instead")
|
||||
}
|
||||
if _, err := time.Parse(time.RFC3339, endTS); err != nil {
|
||||
t.Fatalf("failed to parse 'end' query param: %s", err)
|
||||
}
|
||||
step := r.URL.Query().Get("step")
|
||||
if step != "60s" {
|
||||
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
|
||||
}
|
||||
switch c {
|
||||
case 1:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
|
||||
}
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
authCfg, err := baCfg.NewConfig(".")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected: %s", err)
|
||||
}
|
||||
s := NewPrometheusClient(srv.URL, authCfg, false, srv.Client())
|
||||
|
||||
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus), EvaluationInterval: 15 * time.Second})
|
||||
|
||||
_, err = pq.QueryRange(ctx, vmQuery, time.Now(), time.Time{})
|
||||
expectError(t, err, "is missing")
|
||||
|
||||
_, err = pq.QueryRange(ctx, vmQuery, time.Time{}, time.Now())
|
||||
expectError(t, err, "is missing")
|
||||
|
||||
start, end := time.Now().Add(-time.Minute), time.Now()
|
||||
|
||||
res, err := pq.QueryRange(ctx, vmQuery, start, end)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected := Metric{
|
||||
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{13763},
|
||||
}
|
||||
if !reflect.DeepEqual(m[0], expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
|
||||
// test unsupported graphite
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
_, err = gq.QueryRange(ctx, queryRender, start, end)
|
||||
expectError(t, err, "is not supported")
|
||||
|
||||
// unsupported logsql
|
||||
gq = s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceVLogs), EvaluationInterval: 60 * time.Second})
|
||||
|
||||
res, err = gq.QueryRange(ctx, vlogsRangeQuery, start, end)
|
||||
expectError(t, err, "is not supported")
|
||||
|
||||
// supported logsql
|
||||
gq = s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceVLogs), EvaluationInterval: 60 * time.Second, ApplyIntervalAsTimeFilter: true})
|
||||
res, err = gq.QueryRange(ctx, vlogsRangeQuery, start, end)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m = res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected = Metric{
|
||||
Labels: []prompb.Label{{Value: "total", Name: "stats_result"}},
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{10},
|
||||
}
|
||||
if !reflect.DeepEqual(m[0], expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestParams(t *testing.T) {
|
||||
query := "up"
|
||||
vlogsQuery := "_time: 5m | stats count() total"
|
||||
timestamp := time.Date(2001, 2, 3, 4, 5, 6, 0, time.UTC)
|
||||
|
||||
f := func(isQueryRange bool, c *Client, checkFn func(t *testing.T, r *http.Request)) {
|
||||
t.Helper()
|
||||
|
||||
req, err := c.newRequest(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("error in newRequest: %s", err)
|
||||
}
|
||||
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
if isQueryRange {
|
||||
c.setPrometheusRangeReqParams(req, query, timestamp, timestamp)
|
||||
} else {
|
||||
c.setPrometheusInstantReqParams(req, query, timestamp)
|
||||
}
|
||||
case datasourceGraphite:
|
||||
c.setGraphiteReqParams(req, query)
|
||||
case datasourceVLogs:
|
||||
if isQueryRange {
|
||||
c.setVLogsRangeReqParams(req, vlogsQuery, timestamp, timestamp)
|
||||
} else {
|
||||
c.setVLogsInstantReqParams(req, vlogsQuery, timestamp)
|
||||
}
|
||||
}
|
||||
|
||||
checkFn(t, req)
|
||||
}
|
||||
|
||||
authCfg, err := baCfg.NewConfig(".")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
storage := Client{
|
||||
extraParams: url.Values{"round_digits": {"10"}},
|
||||
}
|
||||
|
||||
// prometheus path
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
checkEqualString(t, "/api/v1/query", r.URL.Path)
|
||||
})
|
||||
|
||||
// prometheus prefix
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
appendTypePrefix: true,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
checkEqualString(t, "/prometheus/api/v1/query", r.URL.Path)
|
||||
})
|
||||
|
||||
// prometheus range path
|
||||
f(true, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
checkEqualString(t, "/api/v1/query_range", r.URL.Path)
|
||||
})
|
||||
|
||||
// prometheus range prefix
|
||||
f(true, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
appendTypePrefix: true,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
checkEqualString(t, "/prometheus/api/v1/query_range", r.URL.Path)
|
||||
})
|
||||
|
||||
// graphite path
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourceGraphite,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
checkEqualString(t, graphitePath, r.URL.Path)
|
||||
})
|
||||
|
||||
// graphite prefix
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourceGraphite,
|
||||
appendTypePrefix: true,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
checkEqualString(t, graphitePrefix+graphitePath, r.URL.Path)
|
||||
})
|
||||
|
||||
// default params
|
||||
f(false, &Client{dataSourceType: datasourcePrometheus}, func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{"query": {query}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
f(false, &Client{dataSourceType: datasourcePrometheus, applyIntervalAsTimeFilter: true}, func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{"query": {query}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// default range params
|
||||
f(true, &Client{dataSourceType: datasourcePrometheus}, func(t *testing.T, r *http.Request) {
|
||||
ts := timestamp.Format(time.RFC3339)
|
||||
exp := url.Values{"query": {query}, "start": {ts}, "end": {ts}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// basic auth
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
authCfg: authCfg,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
u, p, _ := r.BasicAuth()
|
||||
checkEqualString(t, "foo", u)
|
||||
checkEqualString(t, "bar", p)
|
||||
})
|
||||
|
||||
// basic auth range
|
||||
f(true, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
authCfg: authCfg,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
u, p, _ := r.BasicAuth()
|
||||
checkEqualString(t, "foo", u)
|
||||
checkEqualString(t, "bar", p)
|
||||
})
|
||||
|
||||
// evaluation interval
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
evaluationInterval: 15 * time.Second,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 15 * time.Second
|
||||
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// step override
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
queryStep: time.Minute,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{
|
||||
"query": {query},
|
||||
"step": {fmt.Sprintf("%ds", int(time.Minute.Seconds()))},
|
||||
"time": {timestamp.Format(time.RFC3339)},
|
||||
}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// step to seconds
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
evaluationInterval: 3 * time.Hour,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 3 * time.Hour
|
||||
exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// prometheus extra params
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
extraParams: url.Values{"round_digits": {"10"}},
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{"query": {query}, "round_digits": {"10"}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// prometheus extra params range
|
||||
f(true, &Client{
|
||||
dataSourceType: datasourcePrometheus,
|
||||
extraParams: url.Values{
|
||||
"nocache": {"1"},
|
||||
"max_lookback": {"1h"},
|
||||
},
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{
|
||||
"query": {query},
|
||||
"end": {timestamp.Format(time.RFC3339)},
|
||||
"start": {timestamp.Format(time.RFC3339)},
|
||||
"nocache": {"1"},
|
||||
"max_lookback": {"1h"},
|
||||
}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// custom params overrides the original params
|
||||
f(false, storage.Clone().ApplyParams(QuerierParams{
|
||||
DataSourceType: string(datasourcePrometheus),
|
||||
QueryParams: url.Values{"round_digits": {"2"}},
|
||||
}), func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{"query": {query}, "round_digits": {"2"}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// allow duplicates in query params
|
||||
f(false, storage.Clone().ApplyParams(QuerierParams{
|
||||
DataSourceType: string(datasourcePrometheus),
|
||||
QueryParams: url.Values{"extra_labels": {"env=dev", "foo=bar"}},
|
||||
}), func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{"query": {query}, "round_digits": {"10"}, "extra_labels": {"env=dev", "foo=bar"}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// graphite extra params
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourceGraphite,
|
||||
extraParams: url.Values{
|
||||
"nocache": {"1"},
|
||||
"max_lookback": {"1h"},
|
||||
},
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
exp := fmt.Sprintf("format=json&from=-5min&max_lookback=1h&nocache=1&target=%s&until=now", query)
|
||||
checkEqualString(t, exp, r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// graphite extra params allows to override from
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourceGraphite,
|
||||
extraParams: url.Values{
|
||||
"from": {"-10m"},
|
||||
},
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
exp := fmt.Sprintf("format=json&from=-10m&target=%s&until=now", query)
|
||||
checkEqualString(t, exp, r.URL.RawQuery)
|
||||
})
|
||||
|
||||
// test vlogs
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourceVLogs,
|
||||
evaluationInterval: time.Minute,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
exp := url.Values{"query": {vlogsQuery}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
f(false, &Client{
|
||||
dataSourceType: datasourceVLogs,
|
||||
evaluationInterval: time.Minute,
|
||||
applyIntervalAsTimeFilter: true,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
ts := timestamp.Format(time.RFC3339)
|
||||
exp := url.Values{"query": {vlogsQuery}, "time": {ts}, "start": {timestamp.Add(-time.Minute).Format(time.RFC3339)}, "end": {ts}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
|
||||
f(true, &Client{
|
||||
dataSourceType: datasourceVLogs,
|
||||
evaluationInterval: time.Minute,
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
ts := timestamp.Format(time.RFC3339)
|
||||
exp := url.Values{"query": {vlogsQuery}, "start": {ts}, "end": {ts}, "step": {"60s"}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
})
|
||||
}
|
||||
|
||||
func TestHeaders(t *testing.T) {
|
||||
f := func(vmFn func() *Client, checkFn func(t *testing.T, r *http.Request)) {
|
||||
t.Helper()
|
||||
|
||||
vm := vmFn()
|
||||
req, err := vm.newQueryRequest(ctx, "foo", time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("error in newQueryRequest: %s", err)
|
||||
}
|
||||
checkFn(t, req)
|
||||
}
|
||||
|
||||
// basic auth
|
||||
f(func() *Client {
|
||||
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
|
||||
if err != nil {
|
||||
t.Fatalf("Error get auth config: %s", err)
|
||||
}
|
||||
return NewPrometheusClient("", cfg, false, nil)
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
u, p, _ := r.BasicAuth()
|
||||
checkEqualString(t, "foo", u)
|
||||
checkEqualString(t, "bar", p)
|
||||
})
|
||||
|
||||
// bearer auth
|
||||
f(func() *Client {
|
||||
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBearer("foo", ""))
|
||||
if err != nil {
|
||||
t.Fatalf("Error get auth config: %s", err)
|
||||
}
|
||||
return NewPrometheusClient("", cfg, false, nil)
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
reqToken := r.Header.Get("Authorization")
|
||||
splitToken := strings.Split(reqToken, "Bearer ")
|
||||
if len(splitToken) != 2 {
|
||||
t.Fatalf("expected two items got %d", len(splitToken))
|
||||
}
|
||||
token := splitToken[1]
|
||||
checkEqualString(t, "foo", token)
|
||||
})
|
||||
|
||||
// custom extraHeaders
|
||||
f(func() *Client {
|
||||
c := NewPrometheusClient("", nil, false, nil)
|
||||
c.extraHeaders = []keyValue{
|
||||
{key: "Foo", value: "bar"},
|
||||
{key: "Baz", value: "qux"},
|
||||
}
|
||||
return c
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
h1 := r.Header.Get("Foo")
|
||||
checkEqualString(t, "bar", h1)
|
||||
h2 := r.Header.Get("Baz")
|
||||
checkEqualString(t, "qux", h2)
|
||||
})
|
||||
|
||||
// custom header overrides basic auth
|
||||
f(func() *Client {
|
||||
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
|
||||
if err != nil {
|
||||
t.Fatalf("Error get auth config: %s", err)
|
||||
}
|
||||
c := NewPrometheusClient("", cfg, false, nil)
|
||||
c.extraHeaders = []keyValue{
|
||||
{key: "Authorization", value: "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="},
|
||||
}
|
||||
return c
|
||||
}, func(t *testing.T, r *http.Request) {
|
||||
u, p, _ := r.BasicAuth()
|
||||
checkEqualString(t, "Aladdin", u)
|
||||
checkEqualString(t, "open sesame", p)
|
||||
})
|
||||
}
|
||||
|
||||
func checkEqualString(t *testing.T, exp, got string) {
|
||||
t.Helper()
|
||||
|
||||
if got != exp {
|
||||
t.Fatalf("expected to get: \n%q; \ngot: \n%q", exp, got)
|
||||
}
|
||||
}
|
||||
|
||||
func expectError(t *testing.T, err error, exp string) {
|
||||
t.Helper()
|
||||
|
||||
if err == nil {
|
||||
t.Fatalf("expected non-nil error")
|
||||
}
|
||||
if !strings.Contains(err.Error(), exp) {
|
||||
t.Fatalf("expected error %q to contain %q", err, exp)
|
||||
}
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (c *Client) setVLogsInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||
// there is no type path prefix in victorialogs APIs right now, ignore appendTypePrefix.
|
||||
if !*disablePathAppend {
|
||||
r.URL.Path += "/select/logsql/stats_query"
|
||||
}
|
||||
q := r.URL.Query()
|
||||
// set `time` param explicitly, it will be used as the timestamp of query results.
|
||||
q.Set("time", timestamp.Format(time.RFC3339))
|
||||
// set the `start` and `end` params if applyIntervalAsTimeFilter is enabled(time filter is missing in the rule expr),
|
||||
// so the query will be executed in time range [timestamp - evaluationInterval, timestamp].
|
||||
if c.applyIntervalAsTimeFilter && c.evaluationInterval > 0 {
|
||||
q.Set("start", timestamp.Add(-c.evaluationInterval).Format(time.RFC3339))
|
||||
q.Set("end", timestamp.Format(time.RFC3339))
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
c.setReqParams(r, query)
|
||||
}
|
||||
|
||||
func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, end time.Time) {
|
||||
// there is no type path prefix in victorialogs APIs right now, ignore appendTypePrefix.
|
||||
if !*disablePathAppend {
|
||||
r.URL.Path += "/select/logsql/stats_query_range"
|
||||
}
|
||||
q := r.URL.Query()
|
||||
q.Add("start", start.Format(time.RFC3339))
|
||||
q.Add("end", end.Format(time.RFC3339))
|
||||
// set step as evaluationInterval by default
|
||||
if c.evaluationInterval > 0 {
|
||||
q.Set("step", fmt.Sprintf("%ds", int(c.evaluationInterval.Seconds())))
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
c.setReqParams(r, query)
|
||||
}
|
||||
|
||||
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||
res, err = parsePrometheusResponse(req, resp)
|
||||
if err != nil {
|
||||
return Result{}, err
|
||||
}
|
||||
for i := range res.Data {
|
||||
m := &res.Data[i]
|
||||
for j := range m.Labels {
|
||||
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
|
||||
// since there could be multiple stats results in a single query, for instance:
|
||||
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
|
||||
if m.Labels[j].Name == "__name__" {
|
||||
m.Labels[j].Name = "stats_result"
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -1,15 +1,10 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
// Querier interface wraps Query and QueryRange methods
|
||||
@@ -18,25 +13,11 @@ type Querier interface {
|
||||
// It returns list of Metric in response, the http.Request used for sending query
|
||||
// and error if any. Returned http.Request can't be reused and its body is already read.
|
||||
// Query should stop once ctx is cancelled.
|
||||
Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error)
|
||||
Query(ctx context.Context, query string, ts time.Time) ([]Metric, *http.Request, error)
|
||||
// QueryRange executes range request with the given query on the given time range.
|
||||
// It returns list of Metric in response and error if any.
|
||||
// QueryRange should stop once ctx is cancelled.
|
||||
QueryRange(ctx context.Context, query string, from, to time.Time) (Result, error)
|
||||
}
|
||||
|
||||
// Result represents expected response from the datasource
|
||||
type Result struct {
|
||||
// Data contains list of received Metric
|
||||
Data []Metric
|
||||
// SeriesFetched contains amount of time series processed by datasource
|
||||
// during query evaluation.
|
||||
// If nil, then this feature is not supported by the datasource.
|
||||
// SeriesFetched is supported by VictoriaMetrics since v1.90.
|
||||
SeriesFetched *int
|
||||
// IsPartial is used by VictoriaMetrics to indicate
|
||||
// whether response data is partial.
|
||||
IsPartial *bool
|
||||
QueryRange(ctx context.Context, query string, from, to time.Time) ([]Metric, error)
|
||||
}
|
||||
|
||||
// QuerierBuilder builds Querier with given params.
|
||||
@@ -47,20 +28,16 @@ type QuerierBuilder interface {
|
||||
|
||||
// QuerierParams params for Querier.
|
||||
type QuerierParams struct {
|
||||
DataSourceType string
|
||||
// ApplyIntervalAsTimeFilter is only valid for vlogs datasource.
|
||||
// Set to true if there is no [timeFilter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) in the rule expression,
|
||||
// and we will add evaluation interval as an additional timeFilter when querying.
|
||||
ApplyIntervalAsTimeFilter bool
|
||||
EvaluationInterval time.Duration
|
||||
QueryParams url.Values
|
||||
Headers map[string]string
|
||||
Debug bool
|
||||
DataSourceType string
|
||||
EvaluationInterval time.Duration
|
||||
QueryParams url.Values
|
||||
Headers map[string]string
|
||||
Debug bool
|
||||
}
|
||||
|
||||
// Metric is the basic entity which should be return by datasource
|
||||
type Metric struct {
|
||||
Labels []prompb.Label
|
||||
Labels []Label
|
||||
Timestamps []int64
|
||||
Values []float64
|
||||
}
|
||||
@@ -77,9 +54,22 @@ func (m *Metric) SetLabel(key, value string) {
|
||||
m.AddLabel(key, value)
|
||||
}
|
||||
|
||||
// SetLabels sets the given map as Metric labels
|
||||
func (m *Metric) SetLabels(ls map[string]string) {
|
||||
var i int
|
||||
m.Labels = make([]Label, len(ls))
|
||||
for k, v := range ls {
|
||||
m.Labels[i] = Label{
|
||||
Name: k,
|
||||
Value: v,
|
||||
}
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
// AddLabel appends the given label to the label set
|
||||
func (m *Metric) AddLabel(key, value string) {
|
||||
m.Labels = append(m.Labels, prompb.Label{Name: key, Value: value})
|
||||
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
||||
}
|
||||
|
||||
// DelLabel deletes the given label from the label set
|
||||
@@ -102,68 +92,8 @@ func (m *Metric) Label(key string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Labels is collection of Label
|
||||
type Labels []prompb.Label
|
||||
|
||||
func (ls Labels) Len() int { return len(ls) }
|
||||
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
|
||||
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
|
||||
|
||||
func (ls Labels) String() string {
|
||||
var b bytes.Buffer
|
||||
|
||||
b.WriteByte('{')
|
||||
for i, l := range ls {
|
||||
if i > 0 {
|
||||
b.WriteByte(',')
|
||||
b.WriteByte(' ')
|
||||
}
|
||||
b.WriteString(l.Name)
|
||||
b.WriteByte('=')
|
||||
b.WriteString(strconv.Quote(l.Value))
|
||||
}
|
||||
b.WriteByte('}')
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// LabelCompare return negative if a is less than b, return 0 if they are the same
|
||||
// eg.
|
||||
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "b", Value: "1"}}, return -1
|
||||
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
|
||||
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
|
||||
func LabelCompare(a, b Labels) int {
|
||||
l := len(a)
|
||||
if len(b) < l {
|
||||
l = len(b)
|
||||
}
|
||||
|
||||
for i := 0; i < l; i++ {
|
||||
if a[i].Name != b[i].Name {
|
||||
if a[i].Name < b[i].Name {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
if a[i].Value != b[i].Value {
|
||||
if a[i].Value < b[i].Value {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
}
|
||||
// if all labels so far were in common, the set with fewer labels comes first.
|
||||
return len(a) - len(b)
|
||||
}
|
||||
|
||||
// ConvertToLabels convert map to Labels
|
||||
func ConvertToLabels(m map[string]string) (labelset Labels) {
|
||||
for k, v := range m {
|
||||
labelset = append(labelset, prompb.Label{
|
||||
Name: k,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
// sort label
|
||||
sort.Slice(labelset, func(i, j int) bool { return labelset[i].Name < labelset[j].Name })
|
||||
return
|
||||
// Label represents metric's label
|
||||
type Label struct {
|
||||
Name string
|
||||
Value string
|
||||
}
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// FakeQuerier is a mock querier that return predefined results and error message
|
||||
type FakeQuerier struct {
|
||||
sync.Mutex
|
||||
metrics []Metric
|
||||
err error
|
||||
isPartial *bool
|
||||
}
|
||||
|
||||
// SetErr sets query error message
|
||||
func (fq *FakeQuerier) SetErr(err error) {
|
||||
fq.Lock()
|
||||
fq.err = err
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
// SetPartialResponse marks query response as partial
|
||||
func (fq *FakeQuerier) SetPartialResponse(partial bool) {
|
||||
fq.Lock()
|
||||
fq.isPartial = &partial
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
// Reset reset querier's error message and results
|
||||
func (fq *FakeQuerier) Reset() {
|
||||
fq.Lock()
|
||||
fq.err = nil
|
||||
fq.metrics = fq.metrics[:0]
|
||||
fq.isPartial = nil
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
// Add appends metrics to querier result metrics
|
||||
func (fq *FakeQuerier) Add(metrics ...Metric) {
|
||||
fq.Lock()
|
||||
fq.metrics = append(fq.metrics, metrics...)
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
// BuildWithParams return FakeQuerier itself
|
||||
func (fq *FakeQuerier) BuildWithParams(_ QuerierParams) Querier {
|
||||
return fq
|
||||
}
|
||||
|
||||
// QueryRange performs query
|
||||
func (fq *FakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
|
||||
req, _, err := fq.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
// Query returns metrics restored in querier
|
||||
func (fq *FakeQuerier) Query(_ context.Context, _ string, _ time.Time) (Result, *http.Request, error) {
|
||||
fq.Lock()
|
||||
defer fq.Unlock()
|
||||
if fq.err != nil {
|
||||
return Result{}, nil, fq.err
|
||||
}
|
||||
cp := make([]Metric, len(fq.metrics))
|
||||
copy(cp, fq.metrics)
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
return Result{Data: cp, IsPartial: fq.isPartial}, req, nil
|
||||
}
|
||||
|
||||
// FakeQuerierWithRegistry can store different results for different query expr
|
||||
type FakeQuerierWithRegistry struct {
|
||||
sync.Mutex
|
||||
registry map[string][]Metric
|
||||
}
|
||||
|
||||
// Set stores query result for given key
|
||||
func (fqr *FakeQuerierWithRegistry) Set(key string, metrics ...Metric) {
|
||||
fqr.Lock()
|
||||
if fqr.registry == nil {
|
||||
fqr.registry = make(map[string][]Metric)
|
||||
}
|
||||
fqr.registry[key] = metrics
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
// Reset clean querier's results registry
|
||||
func (fqr *FakeQuerierWithRegistry) Reset() {
|
||||
fqr.Lock()
|
||||
fqr.registry = nil
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
// BuildWithParams returns itself
|
||||
func (fqr *FakeQuerierWithRegistry) BuildWithParams(_ QuerierParams) Querier {
|
||||
return fqr
|
||||
}
|
||||
|
||||
// QueryRange performs query
|
||||
func (fqr *FakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
|
||||
req, _, err := fqr.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
// Query returns metrics restored in querier registry
|
||||
func (fqr *FakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (Result, *http.Request, error) {
|
||||
fqr.Lock()
|
||||
defer fqr.Unlock()
|
||||
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
metrics, ok := fqr.registry[expr]
|
||||
if !ok {
|
||||
return Result{}, req, nil
|
||||
}
|
||||
cp := make([]Metric, len(metrics))
|
||||
copy(cp, metrics)
|
||||
return Result{Data: cp}, req, nil
|
||||
}
|
||||
|
||||
// FakeQuerierWithDelay mock querier with given delay duration
|
||||
type FakeQuerierWithDelay struct {
|
||||
FakeQuerier
|
||||
Delay time.Duration
|
||||
}
|
||||
|
||||
// Query returns query result after delay duration
|
||||
func (fqd *FakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (Result, *http.Request, error) {
|
||||
timer := time.NewTimer(fqd.Delay)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-timer.C:
|
||||
}
|
||||
return fqd.FakeQuerier.Query(ctx, expr, ts)
|
||||
}
|
||||
|
||||
// BuildWithParams returns itself
|
||||
func (fqd *FakeQuerierWithDelay) BuildWithParams(_ QuerierParams) Querier {
|
||||
return fqd
|
||||
}
|
||||
@@ -8,18 +8,15 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("datasource.url", "", "Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect endpoint. Required parameter. "+
|
||||
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
|
||||
"See also -remoteRead.disablePathAppend and -datasource.showURL")
|
||||
addr = flag.String("datasource.url", "", "Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. "+
|
||||
"E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL")
|
||||
appendTypePrefix = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.")
|
||||
showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to avoid stripping sensitive information such as auth headers or passwords from URLs in log messages or UI and exported metrics. "+
|
||||
showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to show -datasource.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
|
||||
headers = flag.String("datasource.headers", "", "Optional HTTP extraHeaders to send with each request to the corresponding -datasource.url. "+
|
||||
@@ -39,23 +36,23 @@ var (
|
||||
tlsCAFile = flag.String("datasource.tlsCAFile", "", `Optional path to TLS CA file to use for verifying connections to -datasource.url. By default, system CA is used`)
|
||||
tlsServerName = flag.String("datasource.tlsServerName", "", `Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used`)
|
||||
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url")
|
||||
oauth2EndpointParams = flag.String("datasource.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -datasource.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url. ")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url.")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url. ")
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url.")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
|
||||
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries to the configured -datasource.url and -remoteRead.url. Only valid for prometheus datasource. "+
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
|
||||
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
|
||||
"If set to 0, rule's evaluation interval will be used instead.")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
|
||||
idleConnectionTimeout = flag.Duration("datasource.idleConnTimeout", 50*time.Second, `Defines a duration for idle (keep-alive connections) to exist. Consider setting this value less than "-http.idleConnTimeout". It must prevent possible "write: broken pipe" and "read: connection reset by peer" errors.`)
|
||||
disableKeepAlive = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
|
||||
`If true, disables HTTP keep-alive and will only use the connection to the server for a single HTTP request.`)
|
||||
roundDigits = flag.Int("datasource.roundDigits", 0, `Adds "round_digits" GET param to datasource requests which limits the number of digits after the decimal point in response values. `+
|
||||
`Only valid for VictoriaMetrics as the datasource.`)
|
||||
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Whether to align "time" parameter with evaluation interval.`+
|
||||
"Alignment supposed to produce deterministic results despite of number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
|
||||
disableKeepAlive = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
|
||||
`If true, disables HTTP keep-alives and will only use the connection to the server for a single HTTP request.`)
|
||||
roundDigits = flag.Int("datasource.roundDigits", 0, `Adds "round_digits" GET param to datasource requests. `+
|
||||
`In VM "round_digits" limits the number of digits after the decimal point in response values.`)
|
||||
)
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||
@@ -65,11 +62,6 @@ func InitSecretFlags() {
|
||||
}
|
||||
}
|
||||
|
||||
// ShowDatasourceURL whether to show -datasource.url with sensitive information
|
||||
func ShowDatasourceURL() bool {
|
||||
return *showDatasourceURL
|
||||
}
|
||||
|
||||
// Param represents an HTTP GET param
|
||||
type Param struct {
|
||||
Key, Value string
|
||||
@@ -79,19 +71,19 @@ type Param struct {
|
||||
// Provided extraParams will be added as GET params for
|
||||
// each request.
|
||||
func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
if err := httputil.CheckURL(*addr); err != nil {
|
||||
return nil, fmt.Errorf("invalid -datasource.url: %w", err)
|
||||
if *addr == "" {
|
||||
return nil, fmt.Errorf("datasource.url is empty")
|
||||
}
|
||||
tr, err := promauth.NewTLSTransport(*tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify, "vmalert_datasource")
|
||||
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport for -datasource.url=%q: %w", *addr, err)
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
tr.DisableKeepAlives = *disableKeepAlive
|
||||
tr.MaxIdleConnsPerHost = *maxIdleConnections
|
||||
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
|
||||
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
|
||||
}
|
||||
tr.IdleConnTimeout = *idleConnectionTimeout
|
||||
|
||||
if extraParams == nil {
|
||||
extraParams = url.Values{}
|
||||
@@ -100,29 +92,23 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
extraParams.Set("round_digits", fmt.Sprintf("%d", *roundDigits))
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := vmalertutil.AuthConfig(
|
||||
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
vmalertutil.WithHeaders(*headers))
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
}
|
||||
_, err = authCfg.GetAuthHeader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
|
||||
}
|
||||
|
||||
return &Client{
|
||||
return &VMStorage{
|
||||
c: &http.Client{Transport: tr},
|
||||
authCfg: authCfg,
|
||||
datasourceURL: strings.TrimSuffix(*addr, "/"),
|
||||
appendTypePrefix: *appendTypePrefix,
|
||||
lookBack: *lookBack,
|
||||
queryStep: *queryStep,
|
||||
dataSourceType: datasourcePrometheus,
|
||||
extraParams: extraParams,
|
||||
}, nil
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user