mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-30 15:21:20 +03:00
Compare commits
187 Commits
streaming-
...
v1.93.13
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
26266a4639 | ||
|
|
5827ff7e30 | ||
|
|
3061790d85 | ||
|
|
003d1fecd6 | ||
|
|
3bb36bb510 | ||
|
|
1258c22881 | ||
|
|
196b5ffa65 | ||
|
|
11fb95f25b | ||
|
|
708551399c | ||
|
|
5f028e54b5 | ||
|
|
83cf46719e | ||
|
|
1cb512fd0a | ||
|
|
cf60be4a0b | ||
|
|
b08ca0b9ae | ||
|
|
cc4aff863d | ||
|
|
95407d8e9b | ||
|
|
3a831da481 | ||
|
|
d918a7267d | ||
|
|
c07c9ad028 | ||
|
|
7fef8ba908 | ||
|
|
70eead1dbc | ||
|
|
246a7758ac | ||
|
|
0a4654af3d | ||
|
|
b3f057f75a | ||
|
|
c7b3969b43 | ||
|
|
25f83e6c54 | ||
|
|
442bd6dc7a | ||
|
|
4d08a0327f | ||
|
|
248e84754d | ||
|
|
be959a5c01 | ||
|
|
ca67926952 | ||
|
|
48a851d7d1 | ||
|
|
26980cd76d | ||
|
|
18f0776ade | ||
|
|
2d4755cc1b | ||
|
|
c79ef7fbde | ||
|
|
30284e9fd4 | ||
|
|
abd3641328 | ||
|
|
3e80ace2f8 | ||
|
|
8777864167 | ||
|
|
d277977e7e | ||
|
|
25564e66e6 | ||
|
|
94a17d792d | ||
|
|
4b59047c8a | ||
|
|
fc42900eca | ||
|
|
07fe834f54 | ||
|
|
66bafee651 | ||
|
|
4542d86819 | ||
|
|
30ae1b7901 | ||
|
|
3240fed7d7 | ||
|
|
f043908156 | ||
|
|
b3b2909925 | ||
|
|
d371848c2f | ||
|
|
08ba65a728 | ||
|
|
3dd8fc9f23 | ||
|
|
e0902e4928 | ||
|
|
11998ecde9 | ||
|
|
186bfa9085 | ||
|
|
53259a7983 | ||
|
|
29c085495d | ||
|
|
52c41abaad | ||
|
|
c217fc1bc6 | ||
|
|
0e615a4263 | ||
|
|
b0de9c874a | ||
|
|
bbe78235d5 | ||
|
|
fd50f7122b | ||
|
|
53bd68440c | ||
|
|
15c491bc63 | ||
|
|
591f626a33 | ||
|
|
bfdf9f1e71 | ||
|
|
2d1cd538cb | ||
|
|
6a45349759 | ||
|
|
e27d69caeb | ||
|
|
6eee95edcf | ||
|
|
8e7996f2f1 | ||
|
|
e2f566878f | ||
|
|
6cb8a35a7d | ||
|
|
e2f74f3ba0 | ||
|
|
bdaaea6820 | ||
|
|
87eeac43cc | ||
|
|
9c22c87f18 | ||
|
|
b0a254bec9 | ||
|
|
a9ad7f45d4 | ||
|
|
236404eb1f | ||
|
|
86fd89e524 | ||
|
|
83dd5e4e72 | ||
|
|
25cf57910d | ||
|
|
eea354f5bb | ||
|
|
2e4165f551 | ||
|
|
76e34b5ac3 | ||
|
|
6177975aa7 | ||
|
|
5b4381ac14 | ||
|
|
04a6324162 | ||
|
|
5e826af497 | ||
|
|
13f40cac61 | ||
|
|
1c83e2c28c | ||
|
|
67284377f3 | ||
|
|
3362774ab2 | ||
|
|
c25fe07cd1 | ||
|
|
8be5ebe809 | ||
|
|
27a2e119cf | ||
|
|
5d726b07ae | ||
|
|
a1063a5ef3 | ||
|
|
445e4b1c73 | ||
|
|
e61b3cd612 | ||
|
|
80862282ca | ||
|
|
3acd660678 | ||
|
|
c56c2df117 | ||
|
|
84b4e6344b | ||
|
|
93bb28edb8 | ||
|
|
65a37c0113 | ||
|
|
05103d91da | ||
|
|
50ebc3a749 | ||
|
|
77f7518981 | ||
|
|
fb02fb696d | ||
|
|
aaf9c8891d | ||
|
|
a99a05ea23 | ||
|
|
919593a163 | ||
|
|
f1367236cb | ||
|
|
7385766805 | ||
|
|
be88c2c85b | ||
|
|
ecd523662f | ||
|
|
9979f6796e | ||
|
|
3efbb0af2b | ||
|
|
a14198ba97 | ||
|
|
bfbd0b478e | ||
|
|
15f9e77e4c | ||
|
|
1dd4c72e27 | ||
|
|
c9f1a573e6 | ||
|
|
88e385e273 | ||
|
|
63384291eb | ||
|
|
f36d20de08 | ||
|
|
b11d312a6b | ||
|
|
081f9e8e1c | ||
|
|
9b4b77975b | ||
|
|
db57fbc5d6 | ||
|
|
3045ba01f5 | ||
|
|
77756c7acc | ||
|
|
51e4fd652d | ||
|
|
63ca7b62e5 | ||
|
|
cad2c28c6d | ||
|
|
ca3513bdb3 | ||
|
|
ed2484ecfa | ||
|
|
1e689800c4 | ||
|
|
e8a11780ef | ||
|
|
cd5300555e | ||
|
|
15d49169fb | ||
|
|
c1f0a2b5fa | ||
|
|
b3050356eb | ||
|
|
7cd79e0eba | ||
|
|
c72570179e | ||
|
|
9133a5acb1 | ||
|
|
aabc4dc492 | ||
|
|
761510c054 | ||
|
|
c607a6126d | ||
|
|
e1dd49b739 | ||
|
|
4a6a3a1506 | ||
|
|
3665c16444 | ||
|
|
28cc553cac | ||
|
|
73d33bc96b | ||
|
|
b2019a5fac | ||
|
|
cc874daa1d | ||
|
|
e5d14ef682 | ||
|
|
6489dc387c | ||
|
|
3f33f9ce6f | ||
|
|
9fa5a07887 | ||
|
|
f9604bcf3a | ||
|
|
3d8dec3e4c | ||
|
|
7002c8d74b | ||
|
|
0459aaf8f9 | ||
|
|
f0849ff7fa | ||
|
|
7f6ae70bac | ||
|
|
c8c20b7f7a | ||
|
|
35263983a6 | ||
|
|
a2c901423b | ||
|
|
382721a3ac | ||
|
|
d688f9a744 | ||
|
|
c060c6d839 | ||
|
|
927ded6c3b | ||
|
|
d4123e135f | ||
|
|
4b86a18105 | ||
|
|
c6154f8f52 | ||
|
|
b4c79fc606 | ||
|
|
b4529df08d | ||
|
|
a63fb21ab2 | ||
|
|
7a19b2a14c | ||
|
|
e06d855636 |
4
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
4
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -6,7 +6,7 @@ body:
|
||||
attributes:
|
||||
value: |
|
||||
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
|
||||
- type: textarea
|
||||
@@ -60,7 +60,7 @@ body:
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics-single-node/)
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
|
||||
See how to setup monitoring here:
|
||||
|
||||
24
.github/workflows/check-licenses.yml
vendored
24
.github/workflows/check-licenses.yml
vendored
@@ -14,25 +14,13 @@ jobs:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
- name: Check License
|
||||
run: make check-licenses
|
||||
run: |
|
||||
make check-licenses
|
||||
|
||||
2
.github/workflows/codeql-analysis-js.yml
vendored
2
.github/workflows/codeql-analysis-js.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
|
||||
19
.github/workflows/codeql-analysis.yml
vendored
19
.github/workflows/codeql-analysis.yml
vendored
@@ -52,25 +52,14 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
check-latest: true
|
||||
cache: true
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
|
||||
100
.github/workflows/main.yml
vendored
100
.github/workflows/main.yml
vendored
@@ -7,8 +7,6 @@ on:
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
@@ -16,8 +14,6 @@ on:
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -31,58 +27,21 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-all-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-all-
|
||||
|
||||
- name: Run check-all
|
||||
- name: Dependencies
|
||||
run: |
|
||||
make install-golangci-lint
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
|
||||
build:
|
||||
needs: lint
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-crossbuild-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-crossbuild-
|
||||
|
||||
- name: Build
|
||||
run: make crossbuild
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
@@ -92,29 +51,46 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: run tests
|
||||
run: make ${{ matrix.scenario}}
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
build:
|
||||
needs: test
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: stable
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
||||
19
.github/workflows/sync-docs.yml
vendored
19
.github/workflows/sync-docs.yml
vendored
@@ -6,9 +6,6 @@ on:
|
||||
paths:
|
||||
- 'docs/**'
|
||||
workflow_dispatch: {}
|
||||
env:
|
||||
PAGEFIND_VERSION: "1.0.4"
|
||||
HUGO_VERSION: "latest"
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout and to commit back image update
|
||||
deployments: write
|
||||
@@ -18,25 +15,16 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
path: main
|
||||
- name: Checkout private code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: VictoriaMetrics/vmdocs
|
||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||
path: docs
|
||||
- uses: peaceiris/actions-hugo@v2
|
||||
with:
|
||||
hugo-version: ${{env.HUGO_VERSION}}
|
||||
extended: true
|
||||
- name: Install PageFind #install the static search engine for index build
|
||||
uses: supplypike/setup-bin@v3
|
||||
with:
|
||||
uri: "https://github.com/CloudCannon/pagefind/releases/download/v${{env.PAGEFIND_VERSION}}/pagefind-v${{env.PAGEFIND_VERSION}}-x86_64-unknown-linux-musl.tar.gz"
|
||||
name: "pagefind"
|
||||
version: ${{env.PAGEFIND_VERSION}}
|
||||
|
||||
- name: Import GPG key
|
||||
uses: crazy-max/ghaction-import-gpg@v5
|
||||
with:
|
||||
@@ -57,7 +45,6 @@ jobs:
|
||||
rm -rf content
|
||||
cp -r ../main/docs content
|
||||
make clean-after-copy
|
||||
make build-search-index
|
||||
git config --global user.name "${{ steps.import-gpg.outputs.email }}"
|
||||
git config --global user.email "${{ steps.import-gpg.outputs.email }}"
|
||||
git add .
|
||||
|
||||
80
.github/workflows/update-sandbox.yml
vendored
Normal file
80
.github/workflows/update-sandbox.yml
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
name: sandbox-release
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
permissions:
|
||||
contents: write
|
||||
jobs:
|
||||
deploy-sandbox:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check inputs
|
||||
if: github.event.release.tag_name == ''
|
||||
run: exit 1
|
||||
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: VictoriaMetrics/ops
|
||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||
|
||||
- name: Import GPG key
|
||||
id: import-gpg
|
||||
uses: crazy-max/ghaction-import-gpg@v5
|
||||
with:
|
||||
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
|
||||
passphrase: ${{ secrets.VM_BOT_PASSPHRASE }}
|
||||
git_user_signingkey: true
|
||||
git_commit_gpgsign: true
|
||||
|
||||
- name: update image tag
|
||||
uses: fjogeleit/yaml-update-action@main
|
||||
with:
|
||||
valueFile: 'gcp-test/sandbox/manifests/benchmark-vm/vmcluster.yaml'
|
||||
commitChange: false
|
||||
createPR: false
|
||||
changes: |
|
||||
{
|
||||
"gcp-test/sandbox/manifests/benchmark-vm/vmcluster.yaml": {
|
||||
"spec.vminsert.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
|
||||
"spec.vmselect.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
|
||||
"spec.vmstorage.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster"
|
||||
},
|
||||
"gcp-test/sandbox/manifests/benchmark-vm/vmsingle.yaml": {
|
||||
"spec.image.tag": "${{ github.event.release.tag_name }}-enterprise"
|
||||
},
|
||||
"gcp-test/sandbox/manifests/monitoring/monitoring-vmagent.yaml": {
|
||||
"spec.image.tag": "${{ github.event.release.tag_name }}"
|
||||
},
|
||||
"gcp-test/sandbox/manifests/monitoring/monitoring-vmcluster.yaml": {
|
||||
"spec.vminsert.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
|
||||
"spec.vmselect.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
|
||||
"spec.vmstorage.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster"
|
||||
},
|
||||
"gcp-test/sandbox/manifests/monitoring/vmalert.yaml": {
|
||||
"spec.image.tag": "${{ github.event.release.tag_name }}-enterprise"
|
||||
}
|
||||
}
|
||||
|
||||
- name: commit changes
|
||||
run: |
|
||||
git config --global user.name "${{ steps.import-gpg.outputs.email }}"
|
||||
git config --global user.email "${{ steps.import-gpg.outputs.email }}"
|
||||
git add .
|
||||
git commit -S -m "Deploy image tag ${RELEASE_TAG} to sandbox"
|
||||
env:
|
||||
RELEASE_TAG: ${{ github.event.release.tag_name }}
|
||||
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v5
|
||||
with:
|
||||
author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
|
||||
branch: release-automation
|
||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||
delete-branch: true
|
||||
title: "release ${{ github.event.release.tag_name }}"
|
||||
body: |
|
||||
Release [${{ github.event.release.tag_name }}](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/${{ github.event.release.tag_name }}) to sandbox
|
||||
|
||||
> Auto-generated by `Github Actions Bot`
|
||||
|
||||
77
Makefile
77
Makefile
@@ -1,7 +1,5 @@
|
||||
PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
MAKE_CONCURRENCY ?= $(shell cat /proc/cpuinfo | grep -c processor)
|
||||
MAKE_PARALLEL := $(MAKE) -j $(MAKE_CONCURRENCY)
|
||||
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
|
||||
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
|
||||
@@ -17,9 +15,7 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TA
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
include app/*/Makefile
|
||||
include docs/Makefile
|
||||
include deployment/*/Makefile
|
||||
include dashboards/Makefile
|
||||
include snap/local/Makefile
|
||||
include package/release/Makefile
|
||||
|
||||
@@ -28,7 +24,6 @@ all: \
|
||||
victoria-logs-prod \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmalert-tool-prod \
|
||||
vmauth-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod \
|
||||
@@ -37,11 +32,10 @@ all: \
|
||||
clean:
|
||||
rm -rf bin/*
|
||||
|
||||
publish: \
|
||||
publish: package-base \
|
||||
publish-victoria-metrics \
|
||||
publish-vmagent \
|
||||
publish-vmalert \
|
||||
publish-vmalert-tool \
|
||||
publish-vmauth \
|
||||
publish-vmbackup \
|
||||
publish-vmrestore \
|
||||
@@ -52,7 +46,6 @@ package: \
|
||||
package-victoria-logs \
|
||||
package-vmagent \
|
||||
package-vmalert \
|
||||
package-vmalert-tool \
|
||||
package-vmauth \
|
||||
package-vmbackup \
|
||||
package-vmrestore \
|
||||
@@ -61,7 +54,6 @@ package: \
|
||||
vmutils: \
|
||||
vmagent \
|
||||
vmalert \
|
||||
vmalert-tool \
|
||||
vmauth \
|
||||
vmbackup \
|
||||
vmrestore \
|
||||
@@ -70,7 +62,6 @@ vmutils: \
|
||||
vmutils-pure: \
|
||||
vmagent-pure \
|
||||
vmalert-pure \
|
||||
vmalert-tool-pure \
|
||||
vmauth-pure \
|
||||
vmbackup-pure \
|
||||
vmrestore-pure \
|
||||
@@ -79,7 +70,6 @@ vmutils-pure: \
|
||||
vmutils-linux-amd64: \
|
||||
vmagent-linux-amd64 \
|
||||
vmalert-linux-amd64 \
|
||||
vmalert-tool-linux-amd64 \
|
||||
vmauth-linux-amd64 \
|
||||
vmbackup-linux-amd64 \
|
||||
vmrestore-linux-amd64 \
|
||||
@@ -88,7 +78,6 @@ vmutils-linux-amd64: \
|
||||
vmutils-linux-arm64: \
|
||||
vmagent-linux-arm64 \
|
||||
vmalert-linux-arm64 \
|
||||
vmalert-tool-linux-arm64 \
|
||||
vmauth-linux-arm64 \
|
||||
vmbackup-linux-arm64 \
|
||||
vmrestore-linux-arm64 \
|
||||
@@ -97,7 +86,6 @@ vmutils-linux-arm64: \
|
||||
vmutils-linux-arm: \
|
||||
vmagent-linux-arm \
|
||||
vmalert-linux-arm \
|
||||
vmalert-tool-linux-arm \
|
||||
vmauth-linux-arm \
|
||||
vmbackup-linux-arm \
|
||||
vmrestore-linux-arm \
|
||||
@@ -106,7 +94,6 @@ vmutils-linux-arm: \
|
||||
vmutils-linux-386: \
|
||||
vmagent-linux-386 \
|
||||
vmalert-linux-386 \
|
||||
vmalert-tool-linux-386 \
|
||||
vmauth-linux-386 \
|
||||
vmbackup-linux-386 \
|
||||
vmrestore-linux-386 \
|
||||
@@ -115,7 +102,6 @@ vmutils-linux-386: \
|
||||
vmutils-linux-ppc64le: \
|
||||
vmagent-linux-ppc64le \
|
||||
vmalert-linux-ppc64le \
|
||||
vmalert-tool-linux-ppc64le \
|
||||
vmauth-linux-ppc64le \
|
||||
vmbackup-linux-ppc64le \
|
||||
vmrestore-linux-ppc64le \
|
||||
@@ -124,7 +110,6 @@ vmutils-linux-ppc64le: \
|
||||
vmutils-darwin-amd64: \
|
||||
vmagent-darwin-amd64 \
|
||||
vmalert-darwin-amd64 \
|
||||
vmalert-tool-darwin-amd64 \
|
||||
vmauth-darwin-amd64 \
|
||||
vmbackup-darwin-amd64 \
|
||||
vmrestore-darwin-amd64 \
|
||||
@@ -133,7 +118,6 @@ vmutils-darwin-amd64: \
|
||||
vmutils-darwin-arm64: \
|
||||
vmagent-darwin-arm64 \
|
||||
vmalert-darwin-arm64 \
|
||||
vmalert-tool-darwin-arm64 \
|
||||
vmauth-darwin-arm64 \
|
||||
vmbackup-darwin-arm64 \
|
||||
vmrestore-darwin-arm64 \
|
||||
@@ -142,7 +126,6 @@ vmutils-darwin-arm64: \
|
||||
vmutils-freebsd-amd64: \
|
||||
vmagent-freebsd-amd64 \
|
||||
vmalert-freebsd-amd64 \
|
||||
vmalert-tool-freebsd-amd64 \
|
||||
vmauth-freebsd-amd64 \
|
||||
vmbackup-freebsd-amd64 \
|
||||
vmrestore-freebsd-amd64 \
|
||||
@@ -151,7 +134,6 @@ vmutils-freebsd-amd64: \
|
||||
vmutils-openbsd-amd64: \
|
||||
vmagent-openbsd-amd64 \
|
||||
vmalert-openbsd-amd64 \
|
||||
vmalert-tool-openbsd-amd64 \
|
||||
vmauth-openbsd-amd64 \
|
||||
vmbackup-openbsd-amd64 \
|
||||
vmrestore-openbsd-amd64 \
|
||||
@@ -160,15 +142,11 @@ vmutils-openbsd-amd64: \
|
||||
vmutils-windows-amd64: \
|
||||
vmagent-windows-amd64 \
|
||||
vmalert-windows-amd64 \
|
||||
vmalert-tool-windows-amd64 \
|
||||
vmauth-windows-amd64 \
|
||||
vmbackup-windows-amd64 \
|
||||
vmrestore-windows-amd64 \
|
||||
vmctl-windows-amd64
|
||||
|
||||
crossbuild:
|
||||
$(MAKE_PARALLEL) victoria-metrics-crossbuild vmutils-crossbuild
|
||||
|
||||
victoria-metrics-crossbuild: \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-amd64 \
|
||||
@@ -194,15 +172,14 @@ vmutils-crossbuild: \
|
||||
|
||||
publish-release:
|
||||
rm -rf bin/*
|
||||
git checkout $(TAG) && $(MAKE) release && LATEST_TAG=stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release && LATEST_TAG=cluster-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release && LATEST_TAG=enterprise-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release && LATEST_TAG=enterprise-cluster-stable $(MAKE) publish
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
|
||||
release:
|
||||
$(MAKE_PARALLEL) \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-386 \
|
||||
@@ -261,16 +238,16 @@ release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod
|
||||
cd bin && rm -rf \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe
|
||||
|
||||
release-victoria-logs:
|
||||
$(MAKE_PARALLEL) release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
release-victoria-logs: \
|
||||
release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
|
||||
release-victoria-logs-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-logs-goos-goarch
|
||||
@@ -359,7 +336,6 @@ release-vmutils-windows-amd64:
|
||||
release-vmutils-goos-goarch: \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
@@ -368,7 +344,6 @@ release-vmutils-goos-goarch: \
|
||||
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
@@ -376,7 +351,6 @@ release-vmutils-goos-goarch: \
|
||||
&& sha256sum vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
@@ -385,7 +359,6 @@ release-vmutils-goos-goarch: \
|
||||
cd bin && rm -rf \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
@@ -394,7 +367,6 @@ release-vmutils-goos-goarch: \
|
||||
release-vmutils-windows-goarch: \
|
||||
vmagent-windows-$(GOARCH)-prod \
|
||||
vmalert-windows-$(GOARCH)-prod \
|
||||
vmalert-tool-windows-$(GOARCH)-prod \
|
||||
vmauth-windows-$(GOARCH)-prod \
|
||||
vmbackup-windows-$(GOARCH)-prod \
|
||||
vmrestore-windows-$(GOARCH)-prod \
|
||||
@@ -403,7 +375,6 @@ release-vmutils-windows-goarch: \
|
||||
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
@@ -411,7 +382,6 @@ release-vmutils-windows-goarch: \
|
||||
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
@@ -420,7 +390,6 @@ release-vmutils-windows-goarch: \
|
||||
cd bin && rm -rf \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
@@ -465,7 +434,7 @@ benchmark-pure:
|
||||
vendor-update:
|
||||
go get -u -d ./lib/...
|
||||
go get -u -d ./app/...
|
||||
go mod tidy -compat=1.20
|
||||
go mod tidy -compat=1.21
|
||||
go mod vendor
|
||||
|
||||
app-local:
|
||||
@@ -534,3 +503,11 @@ copy-docs:
|
||||
docs-sync:
|
||||
SRC=README.md DST=docs/README.md OLD_URL='' ORDER=0 TITLE=VictoriaMetrics $(MAKE) copy-docs
|
||||
SRC=README.md DST=docs/Single-server-VictoriaMetrics.md OLD_URL='/Single-server-VictoriaMetrics.html' TITLE=VictoriaMetrics ORDER=1 $(MAKE) copy-docs
|
||||
SRC=app/vmagent/README.md DST=docs/vmagent.md OLD_URL='/vmagent.html' ORDER=3 TITLE=vmagent $(MAKE) copy-docs
|
||||
SRC=app/vmalert/README.md DST=docs/vmalert.md OLD_URL='/vmalert.html' ORDER=4 TITLE=vmalert $(MAKE) copy-docs
|
||||
SRC=app/vmauth/README.md DST=docs/vmauth.md OLD_URL='/vmauth.html' ORDER=5 TITLE=vmauth $(MAKE) copy-docs
|
||||
SRC=app/vmbackup/README.md DST=docs/vmbackup.md OLD_URL='/vmbackup.html' ORDER=6 TITLE=vmbackup $(MAKE) copy-docs
|
||||
SRC=app/vmrestore/README.md DST=docs/vmrestore.md OLD_URL='/vmrestore.html' ORDER=7 TITLE=vmrestore $(MAKE) copy-docs
|
||||
SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
|
||||
SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
|
||||
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -119,7 +119,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"expand-with-exprs", "WITH expressions' tutorial"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"stream-agg", "streaming aggregation status"},
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
{"api/v1/status/tsdb", "tsdb status page"},
|
||||
|
||||
@@ -7,12 +7,12 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -55,14 +55,15 @@ var (
|
||||
)
|
||||
|
||||
type test struct {
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
ResultQuery Query `json:"result_query"`
|
||||
Issue string `json:"issue"`
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
ResultQuery Query `json:"result_query"`
|
||||
ResultQueryRange QueryRange `json:"result_query_range"`
|
||||
Issue string `json:"issue"`
|
||||
}
|
||||
|
||||
type Metric struct {
|
||||
@@ -80,90 +81,42 @@ type Series struct {
|
||||
Status string `json:"status"`
|
||||
Data []map[string]string `json:"data"`
|
||||
}
|
||||
|
||||
type Query struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
} `json:"data"`
|
||||
Status string `json:"status"`
|
||||
Data QueryData `json:"data"`
|
||||
}
|
||||
type QueryData struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []QueryDataResult `json:"result"`
|
||||
}
|
||||
|
||||
const rtVector, rtMatrix = "vector", "matrix"
|
||||
|
||||
func (q *Query) metrics() ([]Metric, error) {
|
||||
switch q.Data.ResultType {
|
||||
case rtVector:
|
||||
var r QueryInstant
|
||||
if err := json.Unmarshal(q.Data.Result, &r.Result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.metrics()
|
||||
case rtMatrix:
|
||||
var r QueryRange
|
||||
if err := json.Unmarshal(q.Data.Result, &r.Result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.metrics()
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown result type %q", q.Data.ResultType)
|
||||
}
|
||||
type QueryDataResult struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Value []interface{} `json:"value"`
|
||||
}
|
||||
|
||||
type QueryInstant struct {
|
||||
Result []struct {
|
||||
Labels map[string]string `json:"metric"`
|
||||
TV [2]interface{} `json:"value"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
func (q QueryInstant) metrics() ([]Metric, error) {
|
||||
result := make([]Metric, len(q.Result))
|
||||
for i, res := range q.Result {
|
||||
f, err := strconv.ParseFloat(res.TV[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
|
||||
}
|
||||
var m Metric
|
||||
m.Metric = res.Labels
|
||||
m.Timestamps = append(m.Timestamps, int64(res.TV[0].(float64)))
|
||||
m.Values = append(m.Values, f)
|
||||
result[i] = m
|
||||
}
|
||||
return result, nil
|
||||
func (r *QueryDataResult) UnmarshalJSON(b []byte) error {
|
||||
type plain QueryDataResult
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(r))
|
||||
}
|
||||
|
||||
type QueryRange struct {
|
||||
Result []struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Values [][]interface{} `json:"values"`
|
||||
} `json:"result"`
|
||||
Status string `json:"status"`
|
||||
Data QueryRangeData `json:"data"`
|
||||
}
|
||||
type QueryRangeData struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []QueryRangeDataResult `json:"result"`
|
||||
}
|
||||
|
||||
func (q QueryRange) metrics() ([]Metric, error) {
|
||||
var result []Metric
|
||||
for i, res := range q.Result {
|
||||
var m Metric
|
||||
for _, tv := range res.Values {
|
||||
f, err := strconv.ParseFloat(tv[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, tv[1], err)
|
||||
}
|
||||
m.Values = append(m.Values, f)
|
||||
m.Timestamps = append(m.Timestamps, int64(tv[0].(float64)))
|
||||
}
|
||||
if len(m.Values) < 1 || len(m.Timestamps) < 1 {
|
||||
return nil, fmt.Errorf("metric %v contains no values", res)
|
||||
}
|
||||
m.Metric = q.Result[i].Metric
|
||||
result = append(result, m)
|
||||
}
|
||||
return result, nil
|
||||
type QueryRangeDataResult struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Values [][]interface{} `json:"values"`
|
||||
}
|
||||
|
||||
func (q *Query) UnmarshalJSON(b []byte) error {
|
||||
type plain Query
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(q))
|
||||
func (r *QueryRangeDataResult) UnmarshalJSON(b []byte) error {
|
||||
type plain QueryRangeDataResult
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(r))
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
@@ -245,9 +198,6 @@ func TestWriteRead(t *testing.T) {
|
||||
func testWrite(t *testing.T) {
|
||||
t.Run("prometheus", func(t *testing.T) {
|
||||
for _, test := range readIn("prometheus", t, insertionTime) {
|
||||
if test.Data == nil {
|
||||
continue
|
||||
}
|
||||
s := newSuite(t)
|
||||
r := testutil.WriteRequest{}
|
||||
s.noError(json.Unmarshal([]byte(strings.Join(test.Data, "\n")), &r.Timeseries))
|
||||
@@ -323,19 +273,17 @@ func testRead(t *testing.T) {
|
||||
if err := checkSeriesResult(s, test.ResultSeries); err != nil {
|
||||
t.Fatalf("Series. %s fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
case strings.HasPrefix(q, "/api/v1/query_range"):
|
||||
queryResult := QueryRange{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryRangeResult(queryResult, test.ResultQueryRange); err != nil {
|
||||
t.Fatalf("Query Range. %s fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
case strings.HasPrefix(q, "/api/v1/query"):
|
||||
queryResult := Query{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
gotMetrics, err := queryResult.metrics()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse query response: %s", err)
|
||||
}
|
||||
expMetrics, err := test.ResultQuery.metrics()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse expected response: %s", err)
|
||||
}
|
||||
if err := checkMetricsResult(gotMetrics, expMetrics); err != nil {
|
||||
t.Fatalf("%q fails with error %s.%s", q, err, test.Issue)
|
||||
if err := checkQueryResult(queryResult, test.ResultQuery); err != nil {
|
||||
t.Fatalf("Query. %s fails with error: %s.%s", q, err, test.Issue)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported read query %s", q)
|
||||
@@ -413,6 +361,7 @@ func httpReadMetrics(t *testing.T, address, query string) []Metric {
|
||||
}
|
||||
return rows
|
||||
}
|
||||
|
||||
func httpReadStruct(t *testing.T, address, query string, dst interface{}) {
|
||||
t.Helper()
|
||||
s := newSuite(t)
|
||||
@@ -470,6 +419,60 @@ func removeIfFoundSeries(r map[string]string, contains []map[string]string) []ma
|
||||
return contains
|
||||
}
|
||||
|
||||
func checkQueryResult(got, want Query) error {
|
||||
if got.Status != want.Status {
|
||||
return fmt.Errorf("status mismatch %q - %q", want.Status, got.Status)
|
||||
}
|
||||
if got.Data.ResultType != want.Data.ResultType {
|
||||
return fmt.Errorf("result type mismatch %q - %q", want.Data.ResultType, got.Data.ResultType)
|
||||
}
|
||||
wantData := append([]QueryDataResult(nil), want.Data.Result...)
|
||||
for _, r := range got.Data.Result {
|
||||
wantData = removeIfFoundQueryData(r, wantData)
|
||||
}
|
||||
if len(wantData) > 0 {
|
||||
return fmt.Errorf("expected query result %+v not found in %+v", wantData, got.Data.Result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeIfFoundQueryData(r QueryDataResult, contains []QueryDataResult) []QueryDataResult {
|
||||
for i, item := range contains {
|
||||
if reflect.DeepEqual(r.Metric, item.Metric) && reflect.DeepEqual(r.Value[0], item.Value[0]) && reflect.DeepEqual(r.Value[1], item.Value[1]) {
|
||||
contains[i] = contains[len(contains)-1]
|
||||
return contains[:len(contains)-1]
|
||||
}
|
||||
}
|
||||
return contains
|
||||
}
|
||||
|
||||
func checkQueryRangeResult(got, want QueryRange) error {
|
||||
if got.Status != want.Status {
|
||||
return fmt.Errorf("status mismatch %q - %q", want.Status, got.Status)
|
||||
}
|
||||
if got.Data.ResultType != want.Data.ResultType {
|
||||
return fmt.Errorf("result type mismatch %q - %q", want.Data.ResultType, got.Data.ResultType)
|
||||
}
|
||||
wantData := append([]QueryRangeDataResult(nil), want.Data.Result...)
|
||||
for _, r := range got.Data.Result {
|
||||
wantData = removeIfFoundQueryRangeData(r, wantData)
|
||||
}
|
||||
if len(wantData) > 0 {
|
||||
return fmt.Errorf("expected query range result %+v not found in %+v", wantData, got.Data.Result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeIfFoundQueryRangeData(r QueryRangeDataResult, contains []QueryRangeDataResult) []QueryRangeDataResult {
|
||||
for i, item := range contains {
|
||||
if reflect.DeepEqual(r.Metric, item.Metric) && reflect.DeepEqual(r.Values, item.Values) {
|
||||
contains[i] = contains[len(contains)-1]
|
||||
return contains[:len(contains)-1]
|
||||
}
|
||||
}
|
||||
return contains
|
||||
}
|
||||
|
||||
type suite struct{ t *testing.T }
|
||||
|
||||
func newSuite(t *testing.T) *suite { return &suite{t: t} }
|
||||
@@ -497,3 +500,73 @@ func (s *suite) greaterThan(a, b int) {
|
||||
s.t.FailNow()
|
||||
}
|
||||
}
|
||||
|
||||
func TestImportJSONLines(t *testing.T) {
|
||||
f := func(labelsCount, labelLen int) {
|
||||
t.Helper()
|
||||
|
||||
reqURL := fmt.Sprintf("http://localhost%s/api/v1/import", testHTTPListenAddr)
|
||||
line := generateJSONLine(labelsCount, labelLen)
|
||||
req, err := http.NewRequest("POST", reqURL, bytes.NewBufferString(line))
|
||||
if err != nil {
|
||||
t.Fatalf("cannot create request: %s", err)
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot perform request for labelsCount=%d, labelLen=%d: %s", labelsCount, labelLen, err)
|
||||
}
|
||||
if resp.StatusCode != 204 {
|
||||
t.Fatalf("unexpected statusCode for labelsCount=%d, labelLen=%d; got %d; want 204", labelsCount, labelLen, resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
// labels with various lengths
|
||||
for i := 0; i < 500; i++ {
|
||||
f(10, i*5)
|
||||
}
|
||||
|
||||
// Too many labels
|
||||
f(1000, 100)
|
||||
|
||||
// Too long labels
|
||||
f(1, 100_000)
|
||||
f(10, 100_000)
|
||||
f(10, 10_000)
|
||||
}
|
||||
|
||||
func generateJSONLine(labelsCount, labelLen int) string {
|
||||
m := make(map[string]string, labelsCount)
|
||||
m["__name__"] = generateSizedRandomString(labelLen)
|
||||
for j := 1; j < labelsCount; j++ {
|
||||
labelName := generateSizedRandomString(labelLen)
|
||||
labelValue := generateSizedRandomString(labelLen)
|
||||
m[labelName] = labelValue
|
||||
}
|
||||
|
||||
type jsonLine struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Values []float64 `json:"values"`
|
||||
Timestamps []int64 `json:"timestamps"`
|
||||
}
|
||||
line := &jsonLine{
|
||||
Metric: m,
|
||||
Values: []float64{1.34},
|
||||
Timestamps: []int64{time.Now().UnixNano() / 1e6},
|
||||
}
|
||||
data, err := json.Marshal(&line)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("cannot marshal JSON: %w", err))
|
||||
}
|
||||
data = append(data, '\n')
|
||||
return string(data)
|
||||
}
|
||||
|
||||
const alphabetSample = `qwertyuiopasdfghjklzxcvbnm`
|
||||
|
||||
func generateSizedRandomString(size int) string {
|
||||
dst := make([]byte, size)
|
||||
for i := range dst {
|
||||
dst[i] = alphabetSample[rand.Intn(len(alphabetSample))]
|
||||
}
|
||||
return string(dst)
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/appmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
@@ -49,16 +50,8 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
var mrs []storage.MetricRow
|
||||
var labels []prompb.Label
|
||||
t := time.NewTicker(scrapeInterval)
|
||||
var currentTimestamp int64
|
||||
for {
|
||||
select {
|
||||
case <-selfScraperStopCh:
|
||||
t.Stop()
|
||||
logger.Infof("stopped self-scraping `/metrics` page")
|
||||
return
|
||||
case currentTime := <-t.C:
|
||||
currentTimestamp = currentTime.UnixNano() / 1e6
|
||||
}
|
||||
f := func(currentTime time.Time, sendStaleMarkers bool) {
|
||||
currentTimestamp := currentTime.UnixNano() / 1e6
|
||||
bb.Reset()
|
||||
appmetrics.WritePrometheusMetrics(&bb)
|
||||
s := bytesutil.ToUnsafeString(bb.B)
|
||||
@@ -83,12 +76,27 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
mr := &mrs[len(mrs)-1]
|
||||
mr.MetricNameRaw = storage.MarshalMetricNameRaw(mr.MetricNameRaw[:0], labels)
|
||||
mr.Timestamp = currentTimestamp
|
||||
mr.Value = r.Value
|
||||
if sendStaleMarkers {
|
||||
mr.Value = decimal.StaleNaN
|
||||
} else {
|
||||
mr.Value = r.Value
|
||||
}
|
||||
}
|
||||
if err := vmstorage.AddRows(mrs); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
||||
}
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-selfScraperStopCh:
|
||||
f(time.Now(), true)
|
||||
t.Stop()
|
||||
logger.Infof("stopped self-scraping `/metrics` page")
|
||||
return
|
||||
case currentTime := <-t.C:
|
||||
f(currentTime, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func addLabel(dst []prompb.Label, key, value string) []prompb.Label {
|
||||
@@ -98,7 +106,7 @@ func addLabel(dst []prompb.Label, key, value string) []prompb.Label {
|
||||
dst = append(dst, prompb.Label{})
|
||||
}
|
||||
lb := &dst[len(dst)-1]
|
||||
lb.Name = key
|
||||
lb.Value = value
|
||||
lb.Name = bytesutil.ToUnsafeBytes(key)
|
||||
lb.Value = bytesutil.ToUnsafeBytes(value)
|
||||
return dst
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"not_nan_not_inf;item=y 3 {TIME_S-1m}",
|
||||
"not_nan_not_inf;item=y 1 {TIME_S-2m}"],
|
||||
"query": ["/api/v1/query_range?query=1/(not_nan_not_inf-1)!=inf!=nan&start={TIME_S-3m}&end={TIME_S}&step=60"],
|
||||
"result_query": {
|
||||
"result_query_range": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"empty_label_match;foo=bar 2 {TIME_S-1m}",
|
||||
"empty_label_match;foo=baz 3 {TIME_S-1m}"],
|
||||
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S-1m}&end={TIME_S}&step=60"],
|
||||
"result_query": {
|
||||
"result_query_range": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"max_lookback_set 4 {TIME_S-150s}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=max_lookback_set&start={TIME_S-150s}&end={TIME_S}&step=10s&max_lookback=1s"],
|
||||
"result_query": {
|
||||
"result_query_range": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"max_lookback_set"},"values":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"max_lookback_unset 4 {TIME_S-150s}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=max_lookback_unset&start={TIME_S-150s}&end={TIME_S}&step=10s"],
|
||||
"result_query": {
|
||||
"result_query_range": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"max_lookback_unset"},"values":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"not_nan_as_missing_data;item=y 3 {TIME_S-1m}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=not_nan_as_missing_data>1&start={TIME_S-2m}&end={TIME_S}&step=60"],
|
||||
"result_query": {
|
||||
"result_query_range": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"name": "instant query with look-behind window",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"foo\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}]}]"],
|
||||
"query": ["/api/v1/query?query=foo[5m]"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"foo"},"values":[["{TIME_S-60s}", "1"]]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
{
|
||||
"name": "instant scalar query",
|
||||
"query": ["/api/v1/query?query=42&time={TIME_S}"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"vector",
|
||||
"result":[{"metric":{},"value":["{TIME_S}", "42"]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"name": "too big look-behind window",
|
||||
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5553",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"foo\"},{\"name\":\"issue\",\"value\":\"5553\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}]}]"],
|
||||
"query": ["/api/v1/query?query=foo{issue=\"5553\"}[100y]"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"foo", "issue": "5553"},"values":[["{TIME_S-60s}", "1"]]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"name": "query range",
|
||||
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5553",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"bar\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}, {\"value\":2,\"timestamp\":\"{TIME_MS-120s}\"}, {\"value\":1,\"timestamp\":\"{TIME_MS-180s}\"}]}]"],
|
||||
"query": ["/api/v1/query_range?query=bar&step=30s&start={TIME_MS-180s}"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[
|
||||
{
|
||||
"metric":{"__name__":"bar"},
|
||||
"values":[["{TIME_S-180s}", "1"],["{TIME_S-150s}", "1"],["{TIME_S-120s}", "2"],["{TIME_S-90s}", "2"], ["{TIME_S-60s}", "1"], ["{TIME_S-30s}", "1"], ["{TIME_S}", "1"]]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,8 +12,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
|
||||
@@ -24,6 +22,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -94,32 +93,22 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
if err := vlstorage.CanWriteData(); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
|
||||
processLogMessage := cp.GetProcessLogMessageFunc(lr)
|
||||
isGzip := r.Header.Get("Content-Encoding") == "gzip"
|
||||
n, err := readBulkRequest(r.Body, isGzip, cp.TimeField, cp.MsgField, processLogMessage)
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
if err != nil {
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s, stream fields: %s", n, err, cp.StreamFields)
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
|
||||
return true
|
||||
}
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
|
||||
tookMs := time.Since(startTime).Milliseconds()
|
||||
bw := bufferedwriter.Get(w)
|
||||
defer bufferedwriter.Put(bw)
|
||||
WriteBulkResponse(bw, n, tookMs)
|
||||
_ = bw.Flush()
|
||||
|
||||
// update bulkRequestDuration only for successfully parsed requests
|
||||
// There is no need in updating bulkRequestDuration for request errors,
|
||||
// since their timings are usually much smaller than the timing for successful request parsing.
|
||||
bulkRequestDuration.UpdateDuration(startTime)
|
||||
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
@@ -127,9 +116,7 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
|
||||
var (
|
||||
bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
|
||||
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
|
||||
bulkRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/elasticsearch/_bulk"}`)
|
||||
bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
|
||||
)
|
||||
|
||||
func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,
|
||||
@@ -175,6 +162,8 @@ func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,
|
||||
|
||||
var lineBufferPool bytesutil.ByteBufferPool
|
||||
|
||||
var rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
|
||||
|
||||
func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
|
||||
processLogMessage func(timestamp int64, fields []logstorage.Field),
|
||||
) (bool, error) {
|
||||
@@ -225,7 +214,6 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
|
||||
p.RenameField(msgField, "_msg")
|
||||
processLogMessage(ts, p.Fields)
|
||||
logjson.PutParser(p)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -120,10 +120,10 @@ func compressData(s string) string {
|
||||
var bb bytes.Buffer
|
||||
zw := gzip.NewWriter(&bb)
|
||||
if _, err := zw.Write([]byte(s)); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when compressing data: %w", err))
|
||||
panic(fmt.Errorf("unexpected error when compressing data: %s", err))
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when closing gzip writer: %w", err))
|
||||
panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err))
|
||||
}
|
||||
return bb.String()
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
|
||||
r.Reset(dataBytes)
|
||||
_, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -3,13 +3,12 @@ package insertutils
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// CommonParams contains common HTTP parameters used by log ingestion APIs.
|
||||
@@ -74,19 +73,12 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
|
||||
// GetProcessLogMessageFunc returns a function, which adds parsed log messages to lr.
|
||||
func (cp *CommonParams) GetProcessLogMessageFunc(lr *logstorage.LogRows) func(timestamp int64, fields []logstorage.Field) {
|
||||
return func(timestamp int64, fields []logstorage.Field) {
|
||||
if len(fields) > *MaxFieldsPerLine {
|
||||
rf := logstorage.RowFormatter(fields)
|
||||
logger.Warnf("dropping log line with %d fields; it exceeds -insert.maxFieldsPerLine=%d; %s", len(fields), *MaxFieldsPerLine, rf)
|
||||
rowsDroppedTotalTooManyFields.Inc()
|
||||
return
|
||||
}
|
||||
|
||||
lr.MustAdd(cp.TenantID, timestamp, fields)
|
||||
if cp.Debug {
|
||||
s := lr.GetRowString(0)
|
||||
lr.ResetKeepSettings()
|
||||
logger.Infof("remoteAddr=%s; requestURI=%s; ignoring log entry because of `debug` query arg: %s", cp.DebugRemoteAddr, cp.DebugRequestURI, s)
|
||||
rowsDroppedTotalDebug.Inc()
|
||||
rowsDroppedTotal.Inc()
|
||||
return
|
||||
}
|
||||
if lr.NeedFlush() {
|
||||
@@ -96,5 +88,4 @@ func (cp *CommonParams) GetProcessLogMessageFunc(lr *logstorage.LogRows) func(ti
|
||||
}
|
||||
}
|
||||
|
||||
var rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
|
||||
var rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
|
||||
var rowsDroppedTotal = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
package insertutils
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
)
|
||||
|
||||
var (
|
||||
// MaxLineSizeBytes is the maximum length of a single line for /insert/* handlers
|
||||
MaxLineSizeBytes = flagutil.NewBytes("insert.maxLineSizeBytes", 256*1024, "The maximum size of a single line, which can be read by /insert/* handlers")
|
||||
|
||||
// MaxFieldsPerLine is the maximum number of fields per line for /insert/* handlers
|
||||
MaxFieldsPerLine = flag.Int("insert.maxFieldsPerLine", 1000, "The maximum number of log fields per line, which can be read by /insert/* handlers")
|
||||
)
|
||||
|
||||
@@ -21,7 +21,6 @@ import (
|
||||
|
||||
// RequestHandler processes jsonline insert requests
|
||||
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
startTime := time.Now()
|
||||
w.Header().Add("Content-Type", "application/json")
|
||||
|
||||
if r.Method != "POST" {
|
||||
@@ -36,10 +35,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
if err := vlstorage.CanWriteData(); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
|
||||
processLogMessage := cp.GetProcessLogMessageFunc(lr)
|
||||
|
||||
@@ -82,11 +77,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
|
||||
// update jsonlineRequestDuration only for successfully parsed requests.
|
||||
// There is no need in updating jsonlineRequestDuration for request errors,
|
||||
// since their timings are usually much smaller than the timing for successful request parsing.
|
||||
jsonlineRequestDuration.UpdateDuration(startTime)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -119,7 +109,6 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
|
||||
p.RenameField(msgField, "_msg")
|
||||
processLogMessage(ts, p.Fields)
|
||||
logjson.PutParser(p)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -155,7 +144,6 @@ func parseISO8601Timestamp(s string) (int64, error) {
|
||||
var lineBufferPool bytesutil.ByteBufferPool
|
||||
|
||||
var (
|
||||
requestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/jsonline"}`)
|
||||
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="jsonline"}`)
|
||||
jsonlineRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/jsonline"}`)
|
||||
requestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/jsonline"}`)
|
||||
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="jsonline"}`)
|
||||
)
|
||||
|
||||
@@ -5,31 +5,29 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
lokiRequestsJSONTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="json"}`)
|
||||
lokiRequestsProtobufTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="protobuf"}`)
|
||||
)
|
||||
|
||||
// RequestHandler processes Loki insert requests
|
||||
//
|
||||
// See https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
|
||||
func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
|
||||
switch path {
|
||||
case "/api/v1/push":
|
||||
return handleInsert(r, w)
|
||||
case "/ready":
|
||||
// See https://grafana.com/docs/loki/latest/api/#identify-ready-loki-instance
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("ready"))
|
||||
return true
|
||||
default:
|
||||
if path != "/api/v1/push" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// See https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
|
||||
func handleInsert(r *http.Request, w http.ResponseWriter) bool {
|
||||
contentType := r.Header.Get("Content-Type")
|
||||
switch contentType {
|
||||
case "application/json":
|
||||
lokiRequestsJSONTotal.Inc()
|
||||
return handleJSON(r, w)
|
||||
default:
|
||||
// Protobuf request body should be handled by default according to https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
|
||||
// Protobuf request body should be handled by default accoring to https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
|
||||
lokiRequestsProtobufTotal.Inc()
|
||||
return handleProtobuf(r, w)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,11 +18,12 @@ import (
|
||||
"github.com/valyala/fastjson"
|
||||
)
|
||||
|
||||
var parserPool fastjson.ParserPool
|
||||
var (
|
||||
rowsIngestedJSONTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="json"}`)
|
||||
parserPool fastjson.ParserPool
|
||||
)
|
||||
|
||||
func handleJSON(r *http.Request, w http.ResponseWriter) bool {
|
||||
startTime := time.Now()
|
||||
lokiRequestsJSONTotal.Inc()
|
||||
reader := r.Body
|
||||
if r.Header.Get("Content-Encoding") == "gzip" {
|
||||
zr, err := common.GetGzipReader(reader)
|
||||
@@ -47,36 +48,19 @@ func handleJSON(r *http.Request, w http.ResponseWriter) bool {
|
||||
httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
|
||||
return true
|
||||
}
|
||||
if err := vlstorage.CanWriteData(); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
|
||||
processLogMessage := cp.GetProcessLogMessageFunc(lr)
|
||||
n, err := parseJSONRequest(data, processLogMessage)
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot parse Loki json request: %s", err)
|
||||
httpserver.Errorf(w, r, "cannot parse Loki request: %s", err)
|
||||
return true
|
||||
}
|
||||
|
||||
rowsIngestedJSONTotal.Add(n)
|
||||
|
||||
// update lokiRequestJSONDuration only for successfully parsed requests
|
||||
// There is no need in updating lokiRequestJSONDuration for request errors,
|
||||
// since their timings are usually much smaller than the timing for successful request parsing.
|
||||
lokiRequestJSONDuration.UpdateDuration(startTime)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
lokiRequestsJSONTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="json"}`)
|
||||
rowsIngestedJSONTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="json"}`)
|
||||
lokiRequestJSONDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/loki/api/v1/push",format="json"}`)
|
||||
)
|
||||
|
||||
func parseJSONRequest(data []byte, processLogMessage func(timestamp int64, fields []logstorage.Field)) (int, error) {
|
||||
p := parserPool.Get()
|
||||
defer parserPool.Put(p)
|
||||
@@ -171,6 +155,7 @@ func parseJSONRequest(data []byte, processLogMessage func(timestamp int64, field
|
||||
Value: bytesutil.ToUnsafeString(msg),
|
||||
})
|
||||
processLogMessage(ts, fields)
|
||||
|
||||
}
|
||||
rowsIngested += len(lines)
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ func benchmarkParseJSONRequest(b *testing.B, streams, rows, labels int) {
|
||||
for pb.Next() {
|
||||
_, err := parseJSONRequest(data, func(timestamp int64, fields []logstorage.Field) {})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -19,13 +19,12 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
bytesBufPool bytesutil.ByteBufferPool
|
||||
pushReqsPool sync.Pool
|
||||
rowsIngestedProtobufTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="protobuf"}`)
|
||||
bytesBufPool bytesutil.ByteBufferPool
|
||||
pushReqsPool sync.Pool
|
||||
)
|
||||
|
||||
func handleProtobuf(r *http.Request, w http.ResponseWriter) bool {
|
||||
startTime := time.Now()
|
||||
lokiRequestsProtobufTotal.Inc()
|
||||
wcr := writeconcurrencylimiter.GetReader(r.Body)
|
||||
data, err := io.ReadAll(wcr)
|
||||
writeconcurrencylimiter.PutReader(wcr)
|
||||
@@ -39,36 +38,19 @@ func handleProtobuf(r *http.Request, w http.ResponseWriter) bool {
|
||||
httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
|
||||
return true
|
||||
}
|
||||
if err := vlstorage.CanWriteData(); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
|
||||
processLogMessage := cp.GetProcessLogMessageFunc(lr)
|
||||
n, err := parseProtobufRequest(data, processLogMessage)
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot parse Loki protobuf request: %s", err)
|
||||
httpserver.Errorf(w, r, "cannot parse loki request: %s", err)
|
||||
return true
|
||||
}
|
||||
|
||||
rowsIngestedProtobufTotal.Add(n)
|
||||
|
||||
// update lokiRequestProtobufDuration only for successfully parsed requests
|
||||
// There is no need in updating lokiRequestProtobufDuration for request errors,
|
||||
// since their timings are usually much smaller than the timing for successful request parsing.
|
||||
lokiRequestProtobufDuration.UpdateDuration(startTime)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
lokiRequestsProtobufTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="protobuf"}`)
|
||||
rowsIngestedProtobufTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="protobuf"}`)
|
||||
lokiRequestProtobufDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/loki/api/v1/push",format="protobuf"}`)
|
||||
)
|
||||
|
||||
func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, fields []logstorage.Field)) (int, error) {
|
||||
bb := bytesBufPool.Get()
|
||||
defer bytesBufPool.Put(bb)
|
||||
@@ -84,7 +66,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
|
||||
|
||||
err = req.Unmarshal(bb.B)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse request body: %w", err)
|
||||
return 0, fmt.Errorf("cannot parse request body: %s", err)
|
||||
}
|
||||
|
||||
var commonFields []logstorage.Field
|
||||
@@ -97,7 +79,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
|
||||
// Labels are same for all entries in the stream.
|
||||
commonFields, err = parsePromLabels(commonFields[:0], stream.Labels)
|
||||
if err != nil {
|
||||
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %w", stream.Labels, err)
|
||||
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %s", stream.Labels, err)
|
||||
}
|
||||
fields := commonFields
|
||||
|
||||
|
||||
@@ -6,9 +6,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func BenchmarkParseProtobufRequest(b *testing.B) {
|
||||
@@ -31,7 +30,7 @@ func benchmarkParseProtobufRequest(b *testing.B, streams, rows, labels int) {
|
||||
for pb.Next() {
|
||||
_, err := parseProtobufRequest(body, func(timestamp int64, fields []logstorage.Field) {})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -88,12 +88,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/vmui/") {
|
||||
if strings.HasPrefix(path, "/vmui/static/") {
|
||||
// Allow clients caching static contents for long period of time, since it shouldn't change over time.
|
||||
// Path to static contents (such as js and css) must be changed whenever its contents is changed.
|
||||
// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
|
||||
w.Header().Set("Cache-Control", "max-age=31536000")
|
||||
}
|
||||
r.URL.Path = path
|
||||
vmuiFileServer.ServeHTTP(w, r)
|
||||
return true
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
{
|
||||
"files": {
|
||||
"main.css": "./static/css/main.d1313636.css",
|
||||
"main.js": "./static/js/main.1919fefe.js",
|
||||
"static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
|
||||
"main.css": "./static/css/main.5f91b1c5.css",
|
||||
"main.js": "./static/js/main.7226aaff.js",
|
||||
"static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
|
||||
"static/media/Lato-Regular.ttf": "./static/media/Lato-Regular.d714fec1633b69a9c2e9.ttf",
|
||||
"static/media/Lato-Bold.ttf": "./static/media/Lato-Bold.32360ba4b57802daa4d6.ttf",
|
||||
"index.html": "./index.html"
|
||||
},
|
||||
"entrypoints": [
|
||||
"static/css/main.d1313636.css",
|
||||
"static/js/main.1919fefe.js"
|
||||
"static/css/main.5f91b1c5.css",
|
||||
"static/js/main.7226aaff.js"
|
||||
]
|
||||
}
|
||||
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
@@ -1 +1 @@
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.1919fefe.js"></script><link href="./static/css/main.d1313636.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.7226aaff.js"></script><link href="./static/css/main.5f91b1c5.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
1
app/vlselect/vmui/static/css/main.5f91b1c5.css
Normal file
1
app/vlselect/vmui/static/css/main.5f91b1c5.css
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
2
app/vlselect/vmui/static/js/main.7226aaff.js
Normal file
2
app/vlselect/vmui/static/js/main.7226aaff.js
Normal file
File diff suppressed because one or more lines are too long
@@ -7,7 +7,7 @@
|
||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
||||
|
||||
/**
|
||||
* @remix-run/router v1.10.0
|
||||
* @remix-run/router v1.7.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router DOM v6.17.0
|
||||
* React Router DOM v6.14.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -29,7 +29,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router v6.17.0
|
||||
* React Router v6.14.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -3,17 +3,14 @@ package vlstorage
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -32,8 +29,6 @@ var (
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows")
|
||||
logIngestedRows = flag.Bool("logIngestedRows", false, "Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/ ; see also -logNewStreams")
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which "+
|
||||
"the storage stops accepting new data")
|
||||
)
|
||||
|
||||
// Init initializes vlstorage.
|
||||
@@ -44,16 +39,15 @@ func Init() {
|
||||
logger.Panicf("BUG: Init() has been already called")
|
||||
}
|
||||
|
||||
if retentionPeriod.Duration() < 24*time.Hour {
|
||||
if retentionPeriod.Msecs < 24*3600*1000 {
|
||||
logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod)
|
||||
}
|
||||
cfg := &logstorage.StorageConfig{
|
||||
Retention: retentionPeriod.Duration(),
|
||||
FlushInterval: *inmemoryDataFlushInterval,
|
||||
FutureRetention: futureRetention.Duration(),
|
||||
LogNewStreams: *logNewStreams,
|
||||
LogIngestedRows: *logIngestedRows,
|
||||
MinFreeDiskSpaceBytes: minFreeDiskSpaceBytes.N,
|
||||
Retention: time.Millisecond * time.Duration(retentionPeriod.Msecs),
|
||||
FlushInterval: *inmemoryDataFlushInterval,
|
||||
FutureRetention: time.Millisecond * time.Duration(futureRetention.Msecs),
|
||||
LogNewStreams: *logNewStreams,
|
||||
LogIngestedRows: *logIngestedRows,
|
||||
}
|
||||
logger.Infof("opening storage at -storageDataPath=%s", *storageDataPath)
|
||||
startTime := time.Now()
|
||||
@@ -80,21 +74,7 @@ func Stop() {
|
||||
var strg *logstorage.Storage
|
||||
var storageMetrics *metrics.Set
|
||||
|
||||
// CanWriteData returns non-nil error if it cannot write data to vlstorage.
|
||||
func CanWriteData() error {
|
||||
if strg.IsReadOnly() {
|
||||
return &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot add rows into storage in read-only mode; the storage can be in read-only mode "+
|
||||
"because of lack of free disk space at -storageDataPath=%s", *storageDataPath),
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MustAddRows adds lr to vlstorage
|
||||
//
|
||||
// It is advised to call CanWriteData() before calling MustAddRows()
|
||||
func MustAddRows(lr *logstorage.LogRows) {
|
||||
strg.MustAddRows(lr)
|
||||
}
|
||||
@@ -127,12 +107,6 @@ func initStorageMetrics(strg *logstorage.Storage) *metrics.Set {
|
||||
ms.NewGauge(fmt.Sprintf(`vl_free_disk_space_bytes{path=%q}`, *storageDataPath), func() float64 {
|
||||
return float64(fs.MustGetFreeSpace(*storageDataPath))
|
||||
})
|
||||
ms.NewGauge(fmt.Sprintf(`vl_storage_is_read_only{path=%q}`, *storageDataPath), func() float64 {
|
||||
if m().IsReadOnly {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
|
||||
ms.NewGauge(`vl_active_merges{type="inmemory"}`, func() float64 {
|
||||
return float64(m().InmemoryActiveMerges)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -65,9 +65,7 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package datadogv1
|
||||
package datadog
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
@@ -8,32 +8,33 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1/stream"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv1"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv1"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv1"}`)
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadog"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadog"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadog"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, func(series []datadogv1.Series) error {
|
||||
return stream.Parse(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadogv1.Series, extraLabels []prompbmarshal.Label) error {
|
||||
func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -62,7 +63,7 @@ func insertRows(at *auth.Token, series []datadogv1.Series, extraLabels []prompbm
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadogutils.SplitTag(tag)
|
||||
name, value := parser.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
@@ -87,9 +88,7 @@ func insertRows(at *auth.Token, series []datadogv1.Series, extraLabels []prompbm
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
@@ -1,102 +0,0 @@
|
||||
package datadogv2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv2"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv2"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv2"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v2/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ct := req.Header.Get("Content-Type")
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, ct, func(series []datadogv2.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadogv2.Series, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range series {
|
||||
ss := &series[i]
|
||||
rowsTotal += len(ss.Points)
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: ss.Metric,
|
||||
})
|
||||
for _, rs := range ss.Resources {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: rs.Type,
|
||||
Value: rs.Name,
|
||||
})
|
||||
}
|
||||
if ss.SourceTypeName != "" {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "source_type_name",
|
||||
Value: ss.SourceTypeName,
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadogutils.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for _, pt := range ss.Points {
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Timestamp: pt.Timestamp * 1000,
|
||||
Value: pt.Value,
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
|
||||
@@ -21,12 +20,10 @@ var (
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return stream.Parse(r, false, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows)
|
||||
})
|
||||
return stream.Parse(r, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
func insertRows(rows []parser.Row) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -59,9 +56,7 @@ func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -36,9 +36,9 @@ var (
|
||||
// InsertHandlerForReader processes remote write for influx line protocol.
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, isGzipped bool) error {
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return stream.Parse(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, nil)
|
||||
return insertRows(nil, db, rows, nil)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -130,9 +130,7 @@ func insertRows(at *auth.Token, db string, rows []parser.Row, extraLabels []prom
|
||||
ctx.ctx.Labels = labels
|
||||
ctx.ctx.Samples = samples
|
||||
ctx.commonLabels = commonLabels
|
||||
if !remotewrite.TryPush(at, &ctx.ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -11,13 +11,13 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/newrelic"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentelemetry"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
|
||||
@@ -41,14 +41,12 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -125,7 +123,7 @@ func main() {
|
||||
common.StartUnmarshalWorkers()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(nil, r, false)
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
})
|
||||
}
|
||||
if len(*graphiteListenAddr) > 0 {
|
||||
@@ -140,7 +138,7 @@ func main() {
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
|
||||
promscrape.Init(remotewrite.Push)
|
||||
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
@@ -230,7 +228,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"metric-relabel-debug", "debug metric relabeling"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"stream-agg", "streaming aggregation status"},
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
{"-/reload", "reload configuration"},
|
||||
@@ -314,7 +311,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
influxQueryRequests.Inc()
|
||||
influxutils.WriteDatabaseNames(w)
|
||||
return true
|
||||
case "/opentelemetry/api/v1/push":
|
||||
case "/opentelemetry/api/v1/push", "/opentelemetry/v1/metrics":
|
||||
opentelemetryPushRequests.Inc()
|
||||
if err := opentelemetry.InsertHandler(nil, r); err != nil {
|
||||
opentelemetryPushErrors.Inc()
|
||||
@@ -323,44 +320,10 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "/newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/newrelic/inventory/deltas":
|
||||
newrelicInventoryRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
|
||||
return true
|
||||
case "/newrelic/infra/v2/metrics/events/bulk":
|
||||
newrelicWriteRequests.Inc()
|
||||
if err := newrelic.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
newrelicWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/v1/series":
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -446,9 +409,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "/stream-agg":
|
||||
streamaggr.WriteHumanReadableState(w, r, remotewrite.GetAggregators())
|
||||
return true
|
||||
case "/ready":
|
||||
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
|
||||
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)
|
||||
@@ -551,7 +511,7 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
influxQueryRequests.Inc()
|
||||
influxutils.WriteDatabaseNames(w)
|
||||
return true
|
||||
case "opentelemetry/api/v1/push":
|
||||
case "opentelemetry/api/v1/push", "opentelemetry/v1/metrics":
|
||||
opentelemetryPushRequests.Inc()
|
||||
if err := opentelemetry.InsertHandler(at, r); err != nil {
|
||||
opentelemetryPushErrors.Inc()
|
||||
@@ -560,43 +520,10 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "newrelic/inventory/deltas":
|
||||
newrelicInventoryRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
|
||||
return true
|
||||
case "newrelic/infra/v2/metrics/events/bulk":
|
||||
newrelicWriteRequests.Inc()
|
||||
if err := newrelic.InsertHandlerForHTTP(at, r); err != nil {
|
||||
newrelicWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/v1/series":
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -654,25 +581,16 @@ var (
|
||||
|
||||
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
|
||||
|
||||
datadogv1WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogv1WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
|
||||
datadogv2WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
datadogv2WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
datadogWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
|
||||
datadogValidateRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/validate", protocol="datadog"}`)
|
||||
datadogCheckRunRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/check_run", protocol="datadog"}`)
|
||||
datadogIntakeRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/intake", protocol="datadog"}`)
|
||||
datadogMetadataRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/metadata", protocol="datadog"}`)
|
||||
|
||||
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/api/v1/push", protocol="opentelemetry"}`)
|
||||
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/api/v1/push", protocol="opentelemetry"}`)
|
||||
|
||||
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
|
||||
newrelicInventoryRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/inventory/deltas", protocol="newrelic"}`)
|
||||
newrelicCheckRequest = metrics.NewCounter(`vm_http_requests_total{path="/newrelic", protocol="newrelic"}`)
|
||||
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
|
||||
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
|
||||
promscrapeServiceDiscoveryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/service-discovery"}`)
|
||||
|
||||
@@ -84,8 +84,6 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompbmarshal
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
package newrelic
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="newrelic"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="newrelic"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="newrelic"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for NewRelic POST /infra/v2/metrics/events/bulk request.
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
isGzip := ce == "gzip"
|
||||
return stream.Parse(req.Body, isGzip, func(rows []newrelic.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
samplesCount := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
tags := r.Tags
|
||||
srcSamples := r.Samples
|
||||
for j := range srcSamples {
|
||||
s := &srcSamples[j]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: bytesutil.ToUnsafeString(s.Name),
|
||||
})
|
||||
for k := range tags {
|
||||
t := &tags[k]
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(t.Key),
|
||||
Value: bytesutil.ToUnsafeString(t.Value),
|
||||
})
|
||||
}
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: s.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
labels = append(labels, extraLabels...)
|
||||
}
|
||||
samplesCount += len(srcSamples)
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
}
|
||||
rowsPerInsert.Update(float64(samplesCount))
|
||||
return nil
|
||||
}
|
||||
@@ -59,9 +59,7 @@ func insertRows(at *auth.Token, tss []prompbmarshal.TimeSeries, extraLabels []pr
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -56,9 +56,7 @@ func insertRows(rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(nil, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -64,9 +64,7 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -32,7 +32,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
return err
|
||||
}
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return stream.Parse(req.Body, defaultTimestamp, isGzipped, true, func(rows []parser.Row) error {
|
||||
return stream.Parse(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, func(s string) {
|
||||
httpserver.LogError(req, s)
|
||||
@@ -73,9 +73,7 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
@@ -47,8 +48,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
|
||||
for i := range ts.Labels {
|
||||
label := &ts.Labels[i]
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: label.Name,
|
||||
Value: label.Value,
|
||||
Name: bytesutil.ToUnsafeString(label.Name),
|
||||
Value: bytesutil.ToUnsafeString(label.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
@@ -68,9 +69,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -58,10 +58,8 @@ var (
|
||||
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("remoteWrite.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
|
||||
awsUseSigv4 = flagutil.NewArrayBool("remoteWrite.aws.useSigv4", "Enables SigV4 request signing for the corresponding -remoteWrite.url. "+
|
||||
"It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled")
|
||||
@@ -108,15 +106,12 @@ type client struct {
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
authCfg, err := getAuthConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tlsCfg, err := authCfg.NewTLSConfig()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize tls config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tlsCfg := authCfg.NewTLSConfig()
|
||||
awsCfg, err := getAWSAPIConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize AWS Config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Fatalf("FATAL: cannot initialize AWS Config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tr := &http.Transport{
|
||||
DialContext: statDial,
|
||||
@@ -236,16 +231,10 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
clientSecret := oauth2ClientSecret.GetOptionalArg(argIdx)
|
||||
clientSecretFile := oauth2ClientSecretFile.GetOptionalArg(argIdx)
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(argIdx)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
oauth2Cfg = &promauth.OAuth2Config{
|
||||
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
|
||||
}
|
||||
@@ -313,7 +302,7 @@ func (c *client) runWorker() {
|
||||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
c.fq.MustWriteBlock(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
@@ -322,11 +311,11 @@ func (c *client) runWorker() {
|
||||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
c.fq.MustWriteBlock(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
c.fq.MustWriteBlock(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -334,42 +323,26 @@ func (c *client) runWorker() {
|
||||
}
|
||||
|
||||
func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
|
||||
req, err := c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req := c.newRequest(url, body)
|
||||
resp, err := c.hc.Do(req)
|
||||
if err == nil {
|
||||
return resp, nil
|
||||
if err != nil && errors.Is(err, io.EOF) {
|
||||
// it is likely connection became stale.
|
||||
// So we do one more attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req = c.newRequest(url, body)
|
||||
resp, err = c.hc.Do(req)
|
||||
}
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return nil, err
|
||||
}
|
||||
// It is likely connection became stale or timed out during the first request.
|
||||
// Make another attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, err = c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
return resp, err
|
||||
}
|
||||
|
||||
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
func (c *client) newRequest(url string, body []byte) *http.Request {
|
||||
reqBody := bytes.NewBuffer(body)
|
||||
req, err := http.NewRequest(http.MethodPost, url, reqBody)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
|
||||
}
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.authCfg.SetHeaders(req, true)
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vmagent")
|
||||
h.Set("Content-Type", "application/x-protobuf")
|
||||
@@ -383,10 +356,11 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
if c.awsCfg != nil {
|
||||
sigv4Hash := awsapi.HashHex(body)
|
||||
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
|
||||
return nil, fmt.Errorf("cannot sign remoteWrite request with AWS sigv4: %w", err)
|
||||
// there is no need in retry, request will be rejected by client.Do and retried by code below
|
||||
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
|
||||
}
|
||||
}
|
||||
return req, nil
|
||||
return req
|
||||
}
|
||||
|
||||
// sendBlockHTTP sends the given block to c.remoteWriteURL.
|
||||
|
||||
@@ -37,9 +37,9 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
func newPendingSeries(pushBlock func(block []byte), isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.fq = fq
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.isVMRemoteWrite = isVMRemoteWrite
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
@@ -57,11 +57,10 @@ func (ps *pendingSeries) MustStop() {
|
||||
ps.periodicFlusherWG.Wait()
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
|
||||
ps.mu.Lock()
|
||||
ok := ps.wr.tryPush(tss)
|
||||
ps.wr.push(tss)
|
||||
ps.mu.Unlock()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) periodicFlusher() {
|
||||
@@ -71,20 +70,18 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
}
|
||||
ticker := time.NewTicker(*flushInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
mustStop := false
|
||||
for !mustStop {
|
||||
select {
|
||||
case <-ps.stopCh:
|
||||
ps.mu.Lock()
|
||||
ps.wr.mustFlushOnStop()
|
||||
ps.mu.Unlock()
|
||||
return
|
||||
mustStop = true
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
ps.mu.Lock()
|
||||
_ = ps.wr.tryFlush()
|
||||
ps.wr.flush()
|
||||
ps.mu.Unlock()
|
||||
}
|
||||
}
|
||||
@@ -93,16 +90,16 @@ type writeRequest struct {
|
||||
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
|
||||
lastFlushTime uint64
|
||||
|
||||
// The queue to send blocks to.
|
||||
fq *persistentqueue.FastQueue
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite bool
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to fq.
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
significantFigures int
|
||||
|
||||
// How many decimal digits after point must be left before sending the writeRequest to fq.
|
||||
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
|
||||
roundDigits int
|
||||
|
||||
wr prompbmarshal.WriteRequest
|
||||
@@ -115,7 +112,7 @@ type writeRequest struct {
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset lastFlushTime, fq, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
// Do not reset lastFlushTime, pushBlock, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
@@ -133,40 +130,23 @@ func (wr *writeRequest) reset() {
|
||||
wr.buf = wr.buf[:0]
|
||||
}
|
||||
|
||||
// mustFlushOnStop force pushes wr data into wr.fq
|
||||
//
|
||||
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
|
||||
func (wr *writeRequest) mustFlushOnStop() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
if !tryPushWriteRequest(&wr.wr, wr.mustWriteBlock, wr.isVMRemoteWrite) {
|
||||
logger.Panicf("BUG: final flush must always return true")
|
||||
}
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
func (wr *writeRequest) mustWriteBlock(block []byte) bool {
|
||||
wr.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryFlush() bool {
|
||||
func (wr *writeRequest) flush() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
if !tryPushWriteRequest(&wr.wr, wr.fq.TryWriteBlock, wr.isVMRemoteWrite) {
|
||||
return false
|
||||
}
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock, wr.isVMRemoteWrite)
|
||||
wr.reset()
|
||||
return true
|
||||
}
|
||||
|
||||
func adjustSampleValues(samples []prompbmarshal.Sample, significantFigures, roundDigits int) {
|
||||
if n := significantFigures; n > 0 {
|
||||
func (wr *writeRequest) adjustSampleValues() {
|
||||
samples := wr.samples
|
||||
if n := wr.significantFigures; n > 0 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
|
||||
}
|
||||
}
|
||||
if n := roundDigits; n < 100 {
|
||||
if n := wr.roundDigits; n < 100 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
|
||||
@@ -174,27 +154,21 @@ func adjustSampleValues(samples []prompbmarshal.Sample, significantFigures, roun
|
||||
}
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryPush(src []prompbmarshal.TimeSeries) bool {
|
||||
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
|
||||
tssDst := wr.tss
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
for i := range src {
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
|
||||
if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
|
||||
wr.tss = tssDst
|
||||
if !wr.tryFlush() {
|
||||
return false
|
||||
}
|
||||
wr.flush()
|
||||
tssDst = wr.tss
|
||||
}
|
||||
tsSrc := &src[i]
|
||||
adjustSampleValues(tsSrc.Samples, wr.significantFigures, wr.roundDigits)
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], tsSrc)
|
||||
}
|
||||
|
||||
wr.tss = tssDst
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
@@ -222,13 +196,13 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
wr.buf = buf
|
||||
}
|
||||
|
||||
func tryPushWriteRequest(wr *prompbmarshal.WriteRequest, tryPushBlock func(block []byte) bool, isVMRemoteWrite bool) bool {
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte), isVMRemoteWrite bool) {
|
||||
if len(wr.Timeseries) == 0 {
|
||||
// Nothing to push
|
||||
return true
|
||||
return
|
||||
}
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = wr.MarshalProtobuf(bb.B[:0])
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
|
||||
zb := snappyBufPool.Get()
|
||||
if isVMRemoteWrite {
|
||||
@@ -238,13 +212,11 @@ func tryPushWriteRequest(wr *prompbmarshal.WriteRequest, tryPushBlock func(block
|
||||
}
|
||||
writeRequestBufPool.Put(bb)
|
||||
if len(zb.B) <= persistentqueue.MaxBlockSize {
|
||||
if !tryPushBlock(zb.B) {
|
||||
return false
|
||||
}
|
||||
pushBlock(zb.B)
|
||||
blockSizeRows.Update(float64(len(wr.Timeseries)))
|
||||
blockSizeBytes.Update(float64(len(zb.B)))
|
||||
snappyBufPool.Put(zb)
|
||||
return true
|
||||
return
|
||||
}
|
||||
snappyBufPool.Put(zb)
|
||||
} else {
|
||||
@@ -257,36 +229,23 @@ func tryPushWriteRequest(wr *prompbmarshal.WriteRequest, tryPushBlock func(block
|
||||
samples := wr.Timeseries[0].Samples
|
||||
if len(samples) == 1 {
|
||||
logger.Warnf("dropping a sample for metric with too long labels exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
|
||||
return true
|
||||
return
|
||||
}
|
||||
n := len(samples) / 2
|
||||
wr.Timeseries[0].Samples = samples[:n]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries[0].Samples = samples[n:]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return true
|
||||
return
|
||||
}
|
||||
timeseries := wr.Timeseries
|
||||
n := len(timeseries) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries = timeseries[n:]
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries = timeseries
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
|
||||
@@ -26,16 +26,13 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
t.Helper()
|
||||
wr := newTestWriteRequest(rowsCount, 20)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) bool {
|
||||
pushBlock := func(block []byte) {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
return true
|
||||
}
|
||||
if !tryPushWriteRequest(wr, pushBlock, isVMRemoteWrite) {
|
||||
t.Fatalf("cannot push data to to remote storage")
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
|
||||
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
|
||||
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
|
||||
@@ -43,7 +40,7 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
}
|
||||
|
||||
// Check Prometheus remote write
|
||||
f(false, expectedBlockLenProm, 3)
|
||||
f(false, expectedBlockLenProm, 0)
|
||||
|
||||
// Check VictoriaMetrics remote write
|
||||
f(true, expectedBlockLenVM, 15)
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/klauspost/compress/s2"
|
||||
)
|
||||
@@ -21,7 +22,7 @@ func benchmarkCompressWriteRequest(b *testing.B, compressFunc func(dst, src []by
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
b.Run(fmt.Sprintf("rows_%d", rowsCount), func(b *testing.B) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
data := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(rowsCount))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
|
||||
@@ -3,7 +3,6 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
@@ -93,7 +92,6 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
rctx.reset()
|
||||
tssDst := tss[:0]
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
@@ -122,7 +120,6 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
||||
if len(extraLabels) == 0 {
|
||||
return
|
||||
}
|
||||
rctx.reset()
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
@@ -142,34 +139,6 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) tenantToLabels(tss []prompbmarshal.TimeSeries, accountID, projectID uint32) {
|
||||
rctx.reset()
|
||||
accountIDStr := strconv.FormatUint(uint64(accountID), 10)
|
||||
projectIDStr := strconv.FormatUint(uint64(projectID), 10)
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labelsLen := len(labels)
|
||||
for _, label := range ts.Labels {
|
||||
labelName := label.Name
|
||||
if labelName == "vm_account_id" || labelName == "vm_project_id" {
|
||||
continue
|
||||
}
|
||||
labels = append(labels, label)
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_account_id",
|
||||
Value: accountIDStr,
|
||||
})
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_project_id",
|
||||
Value: projectIDStr,
|
||||
})
|
||||
ts.Labels = labels[labelsLen:]
|
||||
}
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
type relabelCtx struct {
|
||||
// pool for labels, which are used during the relabeling.
|
||||
labels []prompbmarshal.Label
|
||||
@@ -191,7 +160,7 @@ func getRelabelCtx() *relabelCtx {
|
||||
}
|
||||
|
||||
func putRelabelCtx(rctx *relabelCtx) {
|
||||
rctx.reset()
|
||||
rctx.labels = rctx.labels[:0]
|
||||
relabelCtxPool.Put(rctx)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -11,8 +10,6 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
@@ -26,7 +23,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -37,22 +33,15 @@ var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
|
||||
"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. "+
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set")
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set. "+
|
||||
"See also -remoteWrite.multitenantURL")
|
||||
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. "+
|
||||
"This flag is deprecated in favor of -enableMultitenantHandlers . See https://docs.victoriametrics.com/vmagent.html#multitenancy")
|
||||
enableMultitenantHandlers = flag.Bool("enableMultitenantHandlers", false, "Whether to process incoming data via multitenant insert handlers according to "+
|
||||
"https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format . By default incoming data is processed via single-node insert handlers "+
|
||||
"according to https://docs.victoriametrics.com/#how-to-import-time-series-data ."+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details")
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
|
||||
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
|
||||
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
@@ -91,11 +80,6 @@ var (
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before being aggregated. "+
|
||||
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
|
||||
disableOnDiskQueue = flag.Bool("remoteWrite.disableOnDiskQueue", false, "Whether to disable storing pending data to -remoteWrite.tmpDataPath "+
|
||||
"when the configured remote storage systems cannot keep up with the data ingestion rate. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence ."+
|
||||
"See also -remoteWrite.dropSamplesOnOverload")
|
||||
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
||||
"cannot be pushed into the configured remote storage systems in a timely manner. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -108,19 +92,11 @@ var (
|
||||
|
||||
// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
|
||||
defaultAuthToken = &auth.Token{}
|
||||
|
||||
// ErrQueueFullHTTPRetry must be returned when TryPush() returns false.
|
||||
ErrQueueFullHTTPRetry = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("remote storage systems cannot keep up with the data ingestion rate; retry the request later " +
|
||||
"or remove -remoteWrite.disableOnDiskQueue from vmagent command-line flags, so it could save pending data to -remoteWrite.tmpDataPath; " +
|
||||
"see https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence"),
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
}
|
||||
)
|
||||
|
||||
// MultitenancyEnabled returns true if -enableMultitenantHandlers or -remoteWrite.multitenantURL is specified.
|
||||
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
|
||||
func MultitenancyEnabled() bool {
|
||||
return *enableMultitenantHandlers || len(*remoteWriteMultitenantURLs) > 0
|
||||
return len(*remoteWriteMultitenantURLs) > 0
|
||||
}
|
||||
|
||||
// Contains the current relabelConfigs.
|
||||
@@ -140,8 +116,6 @@ func InitSecretFlags() {
|
||||
}
|
||||
}
|
||||
|
||||
var shardByURLLabelsMap map[string]struct{}
|
||||
|
||||
// Init initializes remotewrite.
|
||||
//
|
||||
// It must be called after flag.Parse().
|
||||
@@ -178,13 +152,6 @@ func Init() {
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
if len(*shardByURLLabels) > 0 {
|
||||
m := make(map[string]struct{}, len(*shardByURLLabels))
|
||||
for _, label := range *shardByURLLabels {
|
||||
m[label] = struct{}{}
|
||||
}
|
||||
shardByURLLabelsMap = m
|
||||
}
|
||||
initLabelsGlobal()
|
||||
|
||||
// Register SIGHUP handler for config reload before loadRelabelConfigs.
|
||||
@@ -276,7 +243,7 @@ func reloadRelabelConfigs() {
|
||||
var (
|
||||
relabelConfigReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
|
||||
relabelConfigReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
|
||||
relabelConfigSuccess = metrics.NewGauge(`vmagent_relabel_config_last_reload_successful`, nil)
|
||||
relabelConfigSuccess = metrics.NewCounter(`vmagent_relabel_config_last_reload_successful`)
|
||||
relabelConfigTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
@@ -338,7 +305,7 @@ var configReloaderWG sync.WaitGroup
|
||||
|
||||
// Stop stops remotewrite.
|
||||
//
|
||||
// It is expected that nobody calls TryPush during and after the call to this func.
|
||||
// It is expected that nobody calls Push during and after the call to this func.
|
||||
func Stop() {
|
||||
close(configReloaderStopCh)
|
||||
configReloaderWG.Wait()
|
||||
@@ -348,7 +315,7 @@ func Stop() {
|
||||
}
|
||||
rwctxsDefault = nil
|
||||
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call TryPush during the Stop call.
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call Push during the Stop call.
|
||||
for _, rwctxs := range rwctxsMap {
|
||||
for _, rwctx := range rwctxs {
|
||||
rwctx.MustStop()
|
||||
@@ -364,47 +331,24 @@ func Stop() {
|
||||
}
|
||||
}
|
||||
|
||||
// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
// If at is nil, then the data is pushed to the configured `-remoteWrite.url`.
|
||||
// If at isn't nil, the data is pushed to the configured `-remoteWrite.multitenantURL`.
|
||||
//
|
||||
// PushDropSamplesOnFailure can modify wr contents.
|
||||
func PushDropSamplesOnFailure(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
_ = tryPush(at, wr, true)
|
||||
}
|
||||
|
||||
// TryPush tries sending wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// TryPush can modify wr contents, so the caller must re-initialize wr before calling TryPush() after unsuccessful attempt.
|
||||
// TryPush may send partial data from wr on unsuccessful attempt, so repeated call for the same wr may send the data multiple times.
|
||||
//
|
||||
// The caller must return ErrQueueFullHTTPRetry to the client, which sends wr, if TryPush returns false.
|
||||
func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
|
||||
return tryPush(at, wr, *dropSamplesOnOverload)
|
||||
}
|
||||
|
||||
func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
|
||||
tss := wr.Timeseries
|
||||
|
||||
if at == nil && MultitenancyEnabled() {
|
||||
// Write data to default tenant if at isn't set when multitenancy is enabled.
|
||||
// Note that wr may be modified by Push because of relabeling and rounding.
|
||||
func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
|
||||
at = defaultAuthToken
|
||||
}
|
||||
|
||||
var tenantRctx *relabelCtx
|
||||
var rwctxs []*remoteWriteCtx
|
||||
if at == nil {
|
||||
rwctxs = rwctxsDefault
|
||||
} else if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
// Convert at to (vm_account_id, vm_project_id) labels.
|
||||
tenantRctx = getRelabelCtx()
|
||||
defer putRelabelCtx(tenantRctx)
|
||||
rwctxs = rwctxsDefault
|
||||
} else {
|
||||
if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
logger.Panicf("BUG: -remoteWrite.multitenantURL command-line flag must be set when __tenant_id__=%q label is set", at)
|
||||
}
|
||||
rwctxsMapLock.Lock()
|
||||
tenantID := tenantmetrics.TenantID{
|
||||
AccountID: at.AccountID,
|
||||
@@ -418,37 +362,18 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
|
||||
rwctxsMapLock.Unlock()
|
||||
}
|
||||
|
||||
rowsCount := getRowsCount(tss)
|
||||
|
||||
if *disableOnDiskQueue {
|
||||
// Quick check whether writes to configured remote storage systems are blocked.
|
||||
// This allows saving CPU time spent on relabeling and block compression
|
||||
// if some of remote storage systems cannot keep up with the data ingestion rate.
|
||||
for _, rwctx := range rwctxs {
|
||||
if rwctx.fq.IsWriteBlocked() {
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
// Just drop samples
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var rctx *relabelCtx
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcsGlobal := rcs.global
|
||||
if pcsGlobal.Len() > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
defer putRelabelCtx(rctx)
|
||||
}
|
||||
tss := wr.Timeseries
|
||||
rowsCount := getRowsCount(tss)
|
||||
globalRowsPushedBeforeRelabel.Add(rowsCount)
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
|
||||
for len(tss) > 0 {
|
||||
// Process big tss in smaller blocks in order to reduce the maximum memory usage
|
||||
samplesCount := 0
|
||||
@@ -456,7 +381,7 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
|
||||
i := 0
|
||||
for i < len(tss) {
|
||||
samplesCount += len(tss[i].Samples)
|
||||
labelsCount += len(tss[i].Samples) * len(tss[i].Labels)
|
||||
labelsCount += len(tss[i].Labels)
|
||||
i++
|
||||
if samplesCount >= maxSamplesPerBlock || labelsCount >= maxLabelsPerBlock {
|
||||
break
|
||||
@@ -469,9 +394,6 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
|
||||
} else {
|
||||
tss = nil
|
||||
}
|
||||
if tenantRctx != nil {
|
||||
tenantRctx.tenantToLabels(tssBlock, at.AccountID, at.ProjectID)
|
||||
}
|
||||
if rctx != nil {
|
||||
rowsCountBeforeRelabel := getRowsCount(tssBlock)
|
||||
tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal)
|
||||
@@ -480,35 +402,25 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
|
||||
}
|
||||
sortLabelsIfNeeded(tssBlock)
|
||||
tssBlock = limitSeriesCardinality(tssBlock)
|
||||
if !tryPushBlockToRemoteStorages(rwctxs, tssBlock) {
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushBlockToRemoteStorages must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
pushBlockToRemoteStorages(rwctxs, tssBlock)
|
||||
if rctx != nil {
|
||||
rctx.reset()
|
||||
}
|
||||
}
|
||||
return true
|
||||
if rctx != nil {
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
samplesDropped = metrics.NewCounter(`vmagent_remotewrite_samples_dropped_total`)
|
||||
pushFailures = metrics.NewCounter(`vmagent_remotewrite_push_failures_total`)
|
||||
)
|
||||
|
||||
func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) bool {
|
||||
func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) {
|
||||
if len(tssBlock) == 0 {
|
||||
// Nothing to push
|
||||
return true
|
||||
return
|
||||
}
|
||||
|
||||
if len(rwctxs) == 1 {
|
||||
// Fast path - just push data to the configured single remote storage
|
||||
return rwctxs[0].TryPush(tssBlock)
|
||||
rwctxs[0].Push(tssBlock)
|
||||
return
|
||||
}
|
||||
|
||||
// We need to push tssBlock to multiple remote storages.
|
||||
@@ -516,42 +428,27 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar
|
||||
if *shardByURL {
|
||||
// Shard the data among rwctxs
|
||||
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
|
||||
tmpLabels := promutils.GetLabels()
|
||||
for _, ts := range tssBlock {
|
||||
hashLabels := ts.Labels
|
||||
if len(shardByURLLabelsMap) > 0 {
|
||||
hashLabels = tmpLabels.Labels[:0]
|
||||
for _, label := range ts.Labels {
|
||||
if _, ok := shardByURLLabelsMap[label.Name]; ok {
|
||||
hashLabels = append(hashLabels, label)
|
||||
}
|
||||
}
|
||||
}
|
||||
h := getLabelsHash(hashLabels)
|
||||
h := getLabelsHash(ts.Labels)
|
||||
idx := h % uint64(len(tssByURL))
|
||||
tssByURL[idx] = append(tssByURL[idx], ts)
|
||||
}
|
||||
promutils.PutLabels(tmpLabels)
|
||||
|
||||
// Push sharded data to remote storages in parallel in order to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for i, rwctx := range rwctxs {
|
||||
tssShard := tssByURL[i]
|
||||
if len(tssShard) == 0 {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(rwctx *remoteWriteCtx, tss []prompbmarshal.TimeSeries) {
|
||||
defer wg.Done()
|
||||
if !rwctx.TryPush(tss) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
rwctx.Push(tss)
|
||||
}(rwctx, tssShard)
|
||||
}
|
||||
wg.Wait()
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
return
|
||||
}
|
||||
|
||||
// Replicate data among rwctxs.
|
||||
@@ -559,17 +456,13 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for _, rwctx := range rwctxs {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
if !rwctx.TryPush(tssBlock) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
rwctx.Push(tssBlock)
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
}
|
||||
|
||||
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
|
||||
@@ -689,19 +582,13 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
|
||||
maxPendingBytes = persistentqueue.DefaultChunkFileSize
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, *disableOnDiskQueue)
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queue_blocked{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
if fq.IsWriteBlocked() {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
@@ -723,7 +610,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
}
|
||||
pss := make([]*pendingSeries, pssLen)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq, c.useVMProto, sf, rd)
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
@@ -740,7 +627,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(argIdx)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggr.config=%q: %s", sasFile, err)
|
||||
}
|
||||
@@ -776,7 +663,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.rowsDroppedByRelabel = nil
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
@@ -814,9 +701,7 @@ func (rwctx *remoteWriteCtx) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
}
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
|
||||
// Try pushing the data to remote storage
|
||||
ok := rwctx.tryPushInternal(tss)
|
||||
rwctx.pushInternal(tss)
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
if rctx != nil {
|
||||
@@ -824,8 +709,6 @@ func (rwctx *remoteWriteCtx) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
var matchIdxsPool bytesutil.ByteBufferPool
|
||||
@@ -845,21 +728,7 @@ func dropAggregatedSeries(src []prompbmarshal.TimeSeries, matchIdxs []byte, drop
|
||||
return dst
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompbmarshal.TimeSeries) {
|
||||
if rwctx.tryPushInternal(tss) {
|
||||
return
|
||||
}
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushInternal must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if *dropSamplesOnOverload {
|
||||
rowsCount := getRowsCount(tss)
|
||||
samplesDropped.Add(rowsCount)
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) tryPushInternal(tss []prompbmarshal.TimeSeries) bool {
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
if len(labelsGlobal) > 0 {
|
||||
@@ -873,16 +742,13 @@ func (rwctx *remoteWriteCtx) tryPushInternal(tss []prompbmarshal.TimeSeries) boo
|
||||
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
|
||||
ok := pss[idx].TryPush(tss)
|
||||
pss[idx].Push(tss)
|
||||
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
@@ -895,7 +761,7 @@ func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", sasFile)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, sasFile)).Inc()
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(rwctx.idx)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
if err != nil {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, sasFile)).Inc()
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(0)
|
||||
@@ -946,24 +812,3 @@ func CheckStreamAggrConfigs() error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAggregators returns aggregators for all the configured remote writes.
|
||||
func GetAggregators() map[string]*streamaggr.Aggregators {
|
||||
var result = map[string]*streamaggr.Aggregators{}
|
||||
|
||||
if len(*remoteWriteMultitenantURLs) > 0 {
|
||||
rwctxsMapLock.Lock()
|
||||
for tenant, rwctxs := range rwctxsMap {
|
||||
for rwNum, rw := range rwctxs {
|
||||
result[fmt.Sprintf("rw %d for tenant %v:%v", rwNum, tenant.AccountID, tenant.ProjectID)] = rw.sas.Load()
|
||||
}
|
||||
}
|
||||
rwctxsMapLock.Unlock()
|
||||
} else {
|
||||
for rwNum, rw := range rwctxsDefault {
|
||||
result[fmt.Sprintf("remote write %d", rwNum)] = rw.sas.Load()
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -76,9 +76,7 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
# All these commands must run from repository root.
|
||||
|
||||
vmalert-tool:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-local
|
||||
|
||||
vmalert-tool-race:
|
||||
APP_NAME=vmalert-tool RACE=-race $(MAKE) app-local
|
||||
|
||||
vmalert-tool-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker
|
||||
|
||||
vmalert-tool-pure-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-pure
|
||||
|
||||
vmalert-tool-linux-amd64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-amd64
|
||||
|
||||
vmalert-tool-linux-arm-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-arm
|
||||
|
||||
vmalert-tool-linux-arm64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-arm64
|
||||
|
||||
vmalert-tool-linux-ppc64le-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-ppc64le
|
||||
|
||||
vmalert-tool-linux-386-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmalert-tool-darwin-amd64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
vmalert-tool-darwin-arm64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-arm64
|
||||
|
||||
vmalert-tool-freebsd-amd64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-freebsd-amd64
|
||||
|
||||
vmalert-tool-openbsd-amd64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-openbsd-amd64
|
||||
|
||||
vmalert-tool-windows-amd64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
package-vmalert-tool:
|
||||
APP_NAME=vmalert-tool $(MAKE) package-via-docker
|
||||
|
||||
package-vmalert-tool-pure:
|
||||
APP_NAME=vmalert-tool $(MAKE) package-via-docker-pure
|
||||
|
||||
package-vmalert-tool-amd64:
|
||||
APP_NAME=vmalert-tool $(MAKE) package-via-docker-amd64
|
||||
|
||||
package-vmalert-tool-arm:
|
||||
APP_NAME=vmalert-tool $(MAKE) package-via-docker-arm
|
||||
|
||||
package-vmalert-tool-arm64:
|
||||
APP_NAME=vmalert-tool $(MAKE) package-via-docker-arm64
|
||||
|
||||
package-vmalert-tool-ppc64le:
|
||||
APP_NAME=vmalert-tool $(MAKE) package-via-docker-ppc64le
|
||||
|
||||
package-vmalert-tool-386:
|
||||
APP_NAME=vmalert-tool $(MAKE) package-via-docker-386
|
||||
|
||||
publish-vmalert-tool:
|
||||
APP_NAME=vmalert-tool $(MAKE) publish-via-docker
|
||||
|
||||
vmalert-tool-linux-amd64:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-linux-arm:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-linux-arm64:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-linux-ppc64le:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-linux-s390x:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-linux-386:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-darwin-amd64:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-darwin-arm64:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-freebsd-amd64:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-openbsd-amd64:
|
||||
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-tool-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=vmalert-tool $(MAKE) app-local-windows-goarch
|
||||
|
||||
vmalert-tool-pure:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-local-pure
|
||||
@@ -1,3 +0,0 @@
|
||||
See vmalert-tool docs [here](https://docs.victoriametrics.com/vmalert-tool.html).
|
||||
|
||||
vmalert-tool docs can be edited at [docs/vmalert-tool.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmalert-tool.md).
|
||||
@@ -1,54 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert-tool/unittest"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
)
|
||||
|
||||
func main() {
|
||||
start := time.Now()
|
||||
app := &cli.App{
|
||||
Name: "vmalert-tool",
|
||||
Usage: "VMAlert command-line tool",
|
||||
UsageText: "More info in https://docs.victoriametrics.com/vmalert-tool.html",
|
||||
Version: buildinfo.Version,
|
||||
Commands: []*cli.Command{
|
||||
{
|
||||
Name: "unittest",
|
||||
Usage: "Run unittest for alerting and recording rules.",
|
||||
UsageText: "More info in https://docs.victoriametrics.com/vmalert-tool.html#Unit-testing-for-rules",
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "files",
|
||||
Usage: "files to run unittest with. Supports an array of values separated by comma or specified via multiple flags.",
|
||||
Required: true,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "disableAlertgroupLabel",
|
||||
Usage: "disable adding group's Name as label to generated alerts and time series.",
|
||||
Required: false,
|
||||
},
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
if failed := unittest.UnitTest(c.StringSlice("files"), c.Bool("disableAlertgroupLabel")); failed {
|
||||
return fmt.Errorf("unittest failed")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := app.Run(os.Args)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
log.Printf("Total time: %v", time.Since(start))
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8429
|
||||
ENTRYPOINT ["/vmalert-tool-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmalert-tool-linux-${TARGETARCH}-prod ./vmalert-tool-prod
|
||||
@@ -1,19 +0,0 @@
|
||||
package unittest
|
||||
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
// alertTestCase holds alert_rule_test cases defined in test file
|
||||
type alertTestCase struct {
|
||||
EvalTime *promutils.Duration `yaml:"eval_time"`
|
||||
GroupName string `yaml:"groupname"`
|
||||
Alertname string `yaml:"alertname"`
|
||||
ExpAlerts []expAlert `yaml:"exp_alerts"`
|
||||
}
|
||||
|
||||
// expAlert holds exp_alerts defined in test file
|
||||
type expAlert struct {
|
||||
ExpLabels map[string]string `yaml:"exp_labels"`
|
||||
ExpAnnotations map[string]string `yaml:"exp_annotations"`
|
||||
}
|
||||
@@ -1,182 +0,0 @@
|
||||
package unittest
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
testutil "github.com/VictoriaMetrics/VictoriaMetrics/app/victoria-metrics/test"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
// series holds input_series defined in the test file
|
||||
type series struct {
|
||||
Series string `yaml:"series"`
|
||||
Values string `yaml:"values"`
|
||||
}
|
||||
|
||||
// sequenceValue is an omittable value in a sequence of time series values.
|
||||
type sequenceValue struct {
|
||||
Value float64
|
||||
Omitted bool
|
||||
}
|
||||
|
||||
func httpWrite(address string, r io.Reader) {
|
||||
resp, err := http.Post(address, "", r)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to send to storage: %v", err)
|
||||
}
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
// writeInputSeries send input series to vmstorage and flush them
|
||||
func writeInputSeries(input []series, interval *promutils.Duration, startStamp time.Time, dst string) error {
|
||||
r := testutil.WriteRequest{}
|
||||
for _, data := range input {
|
||||
expr, err := metricsql.Parse(data.Series)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse series %s: %v", data.Series, err)
|
||||
}
|
||||
promvals, err := parseInputValue(data.Values, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse input series value %s: %v", data.Values, err)
|
||||
}
|
||||
metricExpr, ok := expr.(*metricsql.MetricExpr)
|
||||
if !ok {
|
||||
return fmt.Errorf("failed to parse series %s to metric expr: %v", data.Series, err)
|
||||
}
|
||||
samples := make([]testutil.Sample, 0, len(promvals))
|
||||
ts := startStamp
|
||||
for _, v := range promvals {
|
||||
if !v.Omitted {
|
||||
samples = append(samples, testutil.Sample{
|
||||
Timestamp: ts.UnixMilli(),
|
||||
Value: v.Value,
|
||||
})
|
||||
}
|
||||
ts = ts.Add(interval.Duration())
|
||||
}
|
||||
var ls []testutil.Label
|
||||
for _, filter := range metricExpr.LabelFilterss[0] {
|
||||
ls = append(ls, testutil.Label{Name: filter.Label, Value: filter.Value})
|
||||
}
|
||||
r.Timeseries = append(r.Timeseries, testutil.TimeSeries{Labels: ls, Samples: samples})
|
||||
}
|
||||
|
||||
data, err := testutil.Compress(r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to compress data: %v", err)
|
||||
}
|
||||
// write input series to vm
|
||||
httpWrite(dst, bytes.NewBuffer(data))
|
||||
vmstorage.Storage.DebugFlush()
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseInputValue support input like "1", "1+1x1 _ -4 3+20x1", see more examples in test.
|
||||
func parseInputValue(input string, origin bool) ([]sequenceValue, error) {
|
||||
var res []sequenceValue
|
||||
items := strings.Split(input, " ")
|
||||
reg := regexp.MustCompile(`\D?\d*\D?`)
|
||||
for _, item := range items {
|
||||
if item == "stale" {
|
||||
res = append(res, sequenceValue{Value: decimal.StaleNaN})
|
||||
continue
|
||||
}
|
||||
vals := reg.FindAllString(item, -1)
|
||||
switch len(vals) {
|
||||
case 1:
|
||||
if vals[0] == "_" {
|
||||
res = append(res, sequenceValue{Omitted: true})
|
||||
continue
|
||||
}
|
||||
v, err := strconv.ParseFloat(vals[0], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res = append(res, sequenceValue{Value: v})
|
||||
continue
|
||||
case 2:
|
||||
p1 := vals[0][:len(vals[0])-1]
|
||||
v2, err := strconv.ParseInt(vals[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
option := vals[0][len(vals[0])-1]
|
||||
switch option {
|
||||
case '+':
|
||||
v1, err := strconv.ParseFloat(p1, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res = append(res, sequenceValue{Value: v1 + float64(v2)})
|
||||
case 'x':
|
||||
for i := int64(0); i <= v2; i++ {
|
||||
if p1 == "_" {
|
||||
if i == 0 {
|
||||
i = 1
|
||||
}
|
||||
res = append(res, sequenceValue{Omitted: true})
|
||||
continue
|
||||
}
|
||||
v1, err := strconv.ParseFloat(p1, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !origin || v1 == 0 {
|
||||
res = append(res, sequenceValue{Value: v1 * float64(i)})
|
||||
continue
|
||||
}
|
||||
newVal := fmt.Sprintf("%s+0x%s", p1, vals[1])
|
||||
newRes, err := parseInputValue(newVal, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res = append(res, newRes...)
|
||||
break
|
||||
}
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("got invalid operation %b", option)
|
||||
}
|
||||
case 3:
|
||||
r1, err := parseInputValue(fmt.Sprintf("%s%s", vals[1], vals[2]), false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p1 := vals[0][:len(vals[0])-1]
|
||||
v1, err := strconv.ParseFloat(p1, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
option := vals[0][len(vals[0])-1]
|
||||
var isAdd bool
|
||||
if option == '+' {
|
||||
isAdd = true
|
||||
}
|
||||
for _, r := range r1 {
|
||||
if isAdd {
|
||||
res = append(res, sequenceValue{
|
||||
Value: r.Value + v1,
|
||||
})
|
||||
} else {
|
||||
res = append(res, sequenceValue{
|
||||
Value: v1 - r.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported input %s", input)
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
package unittest
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
)
|
||||
|
||||
func TestParseInputValue(t *testing.T) {
|
||||
testCases := []struct {
|
||||
input string
|
||||
exp []sequenceValue
|
||||
failed bool
|
||||
}{
|
||||
{
|
||||
"",
|
||||
nil,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"testfailed",
|
||||
nil,
|
||||
true,
|
||||
},
|
||||
// stale doesn't support operations
|
||||
{
|
||||
"stalex3",
|
||||
nil,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"-4",
|
||||
[]sequenceValue{{Value: -4}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"_",
|
||||
[]sequenceValue{{Omitted: true}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"stale",
|
||||
[]sequenceValue{{Value: decimal.StaleNaN}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"-4x1",
|
||||
[]sequenceValue{{Value: -4}, {Value: -4}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"_x1",
|
||||
[]sequenceValue{{Omitted: true}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"1+1x4",
|
||||
[]sequenceValue{{Value: 1}, {Value: 2}, {Value: 3}, {Value: 4}, {Value: 5}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"2-1x4",
|
||||
[]sequenceValue{{Value: 2}, {Value: 1}, {Value: 0}, {Value: -1}, {Value: -2}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"1+1x1 _ -4 stale 3+20x1",
|
||||
[]sequenceValue{{Value: 1}, {Value: 2}, {Omitted: true}, {Value: -4}, {Value: decimal.StaleNaN}, {Value: 3}, {Value: 23}},
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
output, err := parseInputValue(tc.input, true)
|
||||
if err != nil != tc.failed {
|
||||
t.Fatalf("failed to parse %s, expect %t, got %t", tc.input, tc.failed, err != nil)
|
||||
}
|
||||
if len(tc.exp) != len(output) {
|
||||
t.Fatalf("expect %v, got %v", tc.exp, output)
|
||||
}
|
||||
for i := 0; i < len(tc.exp); i++ {
|
||||
if tc.exp[i].Omitted != output[i].Omitted {
|
||||
t.Fatalf("expect %v, got %v", tc.exp, output)
|
||||
}
|
||||
if tc.exp[i].Value != output[i].Value {
|
||||
if decimal.IsStaleNaN(tc.exp[i].Value) && decimal.IsStaleNaN(output[i].Value) {
|
||||
continue
|
||||
}
|
||||
t.Fatalf("expect %v, got %v", tc.exp, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
package unittest
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
// metricsqlTestCase holds metricsql_expr_test cases defined in test file
|
||||
type metricsqlTestCase struct {
|
||||
Expr string `yaml:"expr"`
|
||||
EvalTime *promutils.Duration `yaml:"eval_time"`
|
||||
ExpSamples []expSample `yaml:"exp_samples"`
|
||||
}
|
||||
|
||||
type expSample struct {
|
||||
Labels string `yaml:"labels"`
|
||||
Value float64 `yaml:"value"`
|
||||
}
|
||||
|
||||
// checkMetricsqlCase will check metricsql_expr_test cases
|
||||
func checkMetricsqlCase(cases []metricsqlTestCase, q datasource.QuerierBuilder) (checkErrs []error) {
|
||||
queries := q.BuildWithParams(datasource.QuerierParams{QueryParams: url.Values{"nocache": {"1"}, "latency_offset": {"1ms"}}, DataSourceType: "prometheus"})
|
||||
Outer:
|
||||
for _, mt := range cases {
|
||||
result, _, err := queries.Query(context.Background(), mt.Expr, durationToTime(mt.EvalTime))
|
||||
if err != nil {
|
||||
checkErrs = append(checkErrs, fmt.Errorf(" expr: %q, time: %s, err: %w", mt.Expr,
|
||||
mt.EvalTime.Duration().String(), err))
|
||||
continue
|
||||
}
|
||||
var gotSamples []parsedSample
|
||||
for _, s := range result.Data {
|
||||
sort.Slice(s.Labels, func(i, j int) bool {
|
||||
return s.Labels[i].Name < s.Labels[j].Name
|
||||
})
|
||||
gotSamples = append(gotSamples, parsedSample{
|
||||
Labels: s.Labels,
|
||||
Value: s.Values[0],
|
||||
})
|
||||
}
|
||||
var expSamples []parsedSample
|
||||
for _, s := range mt.ExpSamples {
|
||||
expLb := datasource.Labels{}
|
||||
if s.Labels != "" {
|
||||
metricsqlExpr, err := metricsql.Parse(s.Labels)
|
||||
if err != nil {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s, err: %v", mt.Expr,
|
||||
mt.EvalTime.Duration().String(), fmt.Errorf("failed to parse labels %q: %w", s.Labels, err)))
|
||||
continue Outer
|
||||
}
|
||||
metricsqlMetricExpr, ok := metricsqlExpr.(*metricsql.MetricExpr)
|
||||
if !ok {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s, err: %v", mt.Expr,
|
||||
mt.EvalTime.Duration().String(), fmt.Errorf("got unsupported metricsql type")))
|
||||
continue Outer
|
||||
}
|
||||
for _, l := range metricsqlMetricExpr.LabelFilterss[0] {
|
||||
expLb = append(expLb, datasource.Label{
|
||||
Name: l.Label,
|
||||
Value: l.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
sort.Slice(expLb, func(i, j int) bool {
|
||||
return expLb[i].Name < expLb[j].Name
|
||||
})
|
||||
expSamples = append(expSamples, parsedSample{
|
||||
Labels: expLb,
|
||||
Value: s.Value,
|
||||
})
|
||||
}
|
||||
sort.Slice(expSamples, func(i, j int) bool {
|
||||
return datasource.LabelCompare(expSamples[i].Labels, expSamples[j].Labels) <= 0
|
||||
})
|
||||
sort.Slice(gotSamples, func(i, j int) bool {
|
||||
return datasource.LabelCompare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0
|
||||
})
|
||||
if !reflect.DeepEqual(expSamples, gotSamples) {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s,\n exp: %v\n got: %v", mt.Expr,
|
||||
mt.EvalTime.Duration().String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples)))
|
||||
}
|
||||
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func durationToTime(pd *promutils.Duration) time.Time {
|
||||
if pd == nil {
|
||||
return time.Time{}
|
||||
}
|
||||
return time.UnixMilli(pd.Duration().Milliseconds())
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
evaluation_interval: 1m
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'up{job="vmagent2", instance="localhost:9090"}'
|
||||
values: "0+0x1440"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: suquery_interval_test
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- labels: '{__name__="suquery_interval_test",datacenter="dc-123", instance="localhost:9090", job="vmagent2"}'
|
||||
value: 1
|
||||
|
||||
alert_rule_test:
|
||||
- eval_time: 2h
|
||||
alertname: InstanceDown
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
job: vmagent2
|
||||
severity: page
|
||||
instance: localhost:9090
|
||||
datacenter: dc-123
|
||||
exp_annotations:
|
||||
summary: "Instance localhost:9090 down"
|
||||
description: "localhost:9090 of job vmagent2 has been down for more than 5 minutes."
|
||||
|
||||
- eval_time: 0
|
||||
alertname: AlwaysFiring
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
datacenter: dc-123
|
||||
|
||||
- eval_time: 0
|
||||
alertname: InstanceDown
|
||||
exp_alerts: []
|
||||
|
||||
external_labels:
|
||||
datacenter: dc-123
|
||||
@@ -1,49 +0,0 @@
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
name: "Failing test"
|
||||
input_series:
|
||||
- series: test
|
||||
values: "0"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: test
|
||||
eval_time: 0m
|
||||
exp_samples:
|
||||
- value: 0
|
||||
labels: test
|
||||
|
||||
# will failed cause there is no "Test" group and rule defined
|
||||
alert_rule_test:
|
||||
- eval_time: 0m
|
||||
groupname: Test
|
||||
alertname: Test
|
||||
exp_alerts:
|
||||
- exp_labels: {}
|
||||
|
||||
- interval: 1m
|
||||
name: Failing alert test
|
||||
input_series:
|
||||
- series: 'up{job="test"}'
|
||||
values: 0x10
|
||||
|
||||
alert_rule_test:
|
||||
# will failed cause rule is firing
|
||||
- eval_time: 5m
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts: []
|
||||
|
||||
- interval: 1m
|
||||
name: Failing alert test with missing groupname
|
||||
input_series:
|
||||
- series: 'up{job="test"}'
|
||||
values: 0x10
|
||||
|
||||
alert_rule_test:
|
||||
# will failed cause missing groupname
|
||||
- eval_time: 5m
|
||||
alertname: AlwaysFiring
|
||||
exp_alerts: []
|
||||
@@ -1,30 +0,0 @@
|
||||
# can be executed successfully but will take more than 1 minute
|
||||
# not included in unit test now
|
||||
evaluation_interval: 100d
|
||||
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
tests:
|
||||
- interval: 1d
|
||||
input_series:
|
||||
- series: test
|
||||
# Max time in time.Duration is 106751d from 1970 (2^63/10^9), i.e. 2262.
|
||||
# But VictoriaMetrics supports maxTimestamp value +2 days from now. see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/827.
|
||||
# We input series to 2024-01-01T00:00:00 here.
|
||||
values: "0+1x19723"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: timestamp(test)
|
||||
eval_time: 0m
|
||||
exp_samples:
|
||||
- value: 0
|
||||
- expr: test
|
||||
eval_time: 100d
|
||||
exp_samples:
|
||||
- labels: test
|
||||
value: 100
|
||||
- expr: timestamp(test)
|
||||
eval_time: 19000d
|
||||
exp_samples:
|
||||
- value: 1641600000 # 19000d -> seconds.
|
||||
39
app/vmalert-tool/unittest/testdata/rules.yaml
vendored
39
app/vmalert-tool/unittest/testdata/rules.yaml
vendored
@@ -1,39 +0,0 @@
|
||||
groups:
|
||||
- name: group1
|
||||
rules:
|
||||
- alert: InstanceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||
- alert: AlwaysFiring
|
||||
expr: 1
|
||||
- alert: SameAlertNameWithDifferentGroup
|
||||
expr: absent(test)
|
||||
for: 1m
|
||||
|
||||
- name: group2
|
||||
rules:
|
||||
- record: t1
|
||||
expr: test
|
||||
- record: job:test:count_over_time1m
|
||||
expr: sum without(instance) (count_over_time(test[1m]))
|
||||
- record: suquery_interval_test
|
||||
expr: count_over_time(up[5m:])
|
||||
|
||||
- alert: SameAlertNameWithDifferentGroup
|
||||
expr: absent(test)
|
||||
for: 5m
|
||||
|
||||
- name: group3
|
||||
rules:
|
||||
- record: t2
|
||||
expr: t1
|
||||
|
||||
- name: group4
|
||||
rules:
|
||||
- record: t3
|
||||
expr: t1
|
||||
104
app/vmalert-tool/unittest/testdata/test1.yaml
vendored
104
app/vmalert-tool/unittest/testdata/test1.yaml
vendored
@@ -1,104 +0,0 @@
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
evaluation_interval: 1m
|
||||
group_eval_order: ["group4", "group2", "group3"]
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
name: "basic test"
|
||||
input_series:
|
||||
- series: "test"
|
||||
values: "_x5 1x5 _ stale"
|
||||
|
||||
alert_rule_test:
|
||||
- eval_time: 1m
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts:
|
||||
- {}
|
||||
- eval_time: 1m
|
||||
groupname: group2
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts: []
|
||||
- eval_time: 150s
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts:
|
||||
- {}
|
||||
- eval_time: 6m
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts: []
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: test
|
||||
eval_time: 11m
|
||||
exp_samples:
|
||||
- labels: '{__name__="test"}'
|
||||
value: 1
|
||||
- expr: test
|
||||
eval_time: 12m
|
||||
exp_samples: []
|
||||
|
||||
- interval: 1m
|
||||
name: "basic test2"
|
||||
input_series:
|
||||
- series: 'up{job="vmagent1", instance="localhost:9090"}'
|
||||
values: "0+0x1440"
|
||||
- series: "test"
|
||||
values: "0+1x1440"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: count(ALERTS) by (alertgroup, alertname, alertstate)
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- labels: '{alertgroup="group1", alertname="AlwaysFiring", alertstate="firing"}'
|
||||
value: 1
|
||||
- labels: '{alertgroup="group1", alertname="InstanceDown", alertstate="pending"}'
|
||||
value: 1
|
||||
- expr: t1
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- value: 4
|
||||
labels: '{__name__="t1", datacenter="dc-123"}'
|
||||
- expr: t2
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- value: 4
|
||||
labels: '{__name__="t2", datacenter="dc-123"}'
|
||||
- expr: t3
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
# t3 is 3 instead of 4 cause it's rules3 is evaluated before rules1
|
||||
- value: 3
|
||||
labels: '{__name__="t3", datacenter="dc-123"}'
|
||||
|
||||
alert_rule_test:
|
||||
- eval_time: 10m
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
job: vmagent1
|
||||
severity: page
|
||||
instance: localhost:9090
|
||||
datacenter: dc-123
|
||||
exp_annotations:
|
||||
summary: "Instance localhost:9090 down"
|
||||
description: "localhost:9090 of job vmagent1 has been down for more than 5 minutes."
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: AlwaysFiring
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
datacenter: dc-123
|
||||
|
||||
- eval_time: 0
|
||||
groupname: alerts
|
||||
alertname: InstanceDown
|
||||
exp_alerts: []
|
||||
|
||||
external_labels:
|
||||
datacenter: dc-123
|
||||
46
app/vmalert-tool/unittest/testdata/test2.yaml
vendored
46
app/vmalert-tool/unittest/testdata/test2.yaml
vendored
@@ -1,46 +0,0 @@
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
evaluation_interval: 1m
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'up{job="vmagent2", instance="localhost:9090"}'
|
||||
values: "0+0x1440"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: suquery_interval_test
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- labels: '{__name__="suquery_interval_test",datacenter="dc-123", instance="localhost:9090", job="vmagent2"}'
|
||||
value: 1
|
||||
|
||||
alert_rule_test:
|
||||
- eval_time: 2h
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
job: vmagent2
|
||||
severity: page
|
||||
instance: localhost:9090
|
||||
datacenter: dc-123
|
||||
exp_annotations:
|
||||
summary: "Instance localhost:9090 down"
|
||||
description: "localhost:9090 of job vmagent2 has been down for more than 5 minutes."
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: AlwaysFiring
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
datacenter: dc-123
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts: []
|
||||
|
||||
external_labels:
|
||||
datacenter: dc-123
|
||||
@@ -1,83 +0,0 @@
|
||||
package unittest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
)
|
||||
|
||||
// parsedSample is a sample with parsed Labels
|
||||
type parsedSample struct {
|
||||
Labels datasource.Labels
|
||||
Value float64
|
||||
}
|
||||
|
||||
func (ps *parsedSample) String() string {
|
||||
return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64)
|
||||
}
|
||||
|
||||
func parsedSamplesString(pss []parsedSample) string {
|
||||
if len(pss) == 0 {
|
||||
return "nil"
|
||||
}
|
||||
s := pss[0].String()
|
||||
for _, ps := range pss[1:] {
|
||||
s += ", " + ps.String()
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// labelAndAnnotation holds labels and annotations
|
||||
type labelAndAnnotation struct {
|
||||
Labels datasource.Labels
|
||||
Annotations datasource.Labels
|
||||
}
|
||||
|
||||
func (la *labelAndAnnotation) String() string {
|
||||
return "Labels:" + la.Labels.String() + "\nAnnotations:" + la.Annotations.String()
|
||||
}
|
||||
|
||||
// labelsAndAnnotations is collection of LabelAndAnnotation
|
||||
type labelsAndAnnotations []labelAndAnnotation
|
||||
|
||||
func (la labelsAndAnnotations) Len() int { return len(la) }
|
||||
|
||||
func (la labelsAndAnnotations) Swap(i, j int) { la[i], la[j] = la[j], la[i] }
|
||||
func (la labelsAndAnnotations) Less(i, j int) bool {
|
||||
diff := datasource.LabelCompare(la[i].Labels, la[j].Labels)
|
||||
if diff != 0 {
|
||||
return diff < 0
|
||||
}
|
||||
return datasource.LabelCompare(la[i].Annotations, la[j].Annotations) < 0
|
||||
}
|
||||
|
||||
func (la labelsAndAnnotations) String() string {
|
||||
if len(la) == 0 {
|
||||
return "[]"
|
||||
}
|
||||
s := "[\n0:" + indentLines("\n"+la[0].String(), " ")
|
||||
for i, l := range la[1:] {
|
||||
s += ",\n" + fmt.Sprintf("%d", i+1) + ":" + indentLines("\n"+l.String(), " ")
|
||||
}
|
||||
s += "\n]"
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// indentLines prefixes each line in the supplied string with the given "indent" string.
|
||||
func indentLines(lines, indent string) string {
|
||||
sb := strings.Builder{}
|
||||
n := strings.Split(lines, "\n")
|
||||
for i, l := range n {
|
||||
if i > 0 {
|
||||
sb.WriteString(indent)
|
||||
}
|
||||
sb.WriteString(l)
|
||||
if i != len(n)-1 {
|
||||
sb.WriteRune('\n')
|
||||
}
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
@@ -1,443 +0,0 @@
|
||||
package unittest
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
vmalertconfig "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
storagePath string
|
||||
httpListenAddr = ":8880"
|
||||
// insert series from 1970-01-01T00:00:00
|
||||
testStartTime = time.Unix(0, 0).UTC()
|
||||
|
||||
testPromWriteHTTPPath = "http://127.0.0.1" + httpListenAddr + "/api/v1/write"
|
||||
testDataSourcePath = "http://127.0.0.1" + httpListenAddr + "/prometheus"
|
||||
testRemoteWritePath = "http://127.0.0.1" + httpListenAddr
|
||||
testHealthHTTPPath = "http://127.0.0.1" + httpListenAddr + "/health"
|
||||
|
||||
disableAlertgroupLabel bool
|
||||
)
|
||||
|
||||
const (
|
||||
testStoragePath = "vmalert-unittest"
|
||||
testLogLevel = "ERROR"
|
||||
)
|
||||
|
||||
// UnitTest runs unittest for files
|
||||
func UnitTest(files []string, disableGroupLabel bool) bool {
|
||||
if err := templates.Load([]string{}, true); err != nil {
|
||||
logger.Fatalf("failed to load template: %v", err)
|
||||
}
|
||||
storagePath = filepath.Join(os.TempDir(), testStoragePath)
|
||||
processFlags()
|
||||
vminsert.Init()
|
||||
vmselect.Init()
|
||||
// storagePath will be created again when closing vmselect, so remove it again.
|
||||
defer fs.MustRemoveAll(storagePath)
|
||||
defer vminsert.Stop()
|
||||
defer vmselect.Stop()
|
||||
disableAlertgroupLabel = disableGroupLabel
|
||||
return rulesUnitTest(files)
|
||||
}
|
||||
|
||||
func rulesUnitTest(files []string) bool {
|
||||
var failed bool
|
||||
for _, f := range files {
|
||||
if err := ruleUnitTest(f); err != nil {
|
||||
fmt.Println(" FAILED")
|
||||
fmt.Printf("\nfailed to run unit test for file %q: \n%v", f, err)
|
||||
failed = true
|
||||
} else {
|
||||
fmt.Println(" SUCCESS")
|
||||
}
|
||||
}
|
||||
return failed
|
||||
}
|
||||
|
||||
func ruleUnitTest(filename string) []error {
|
||||
fmt.Println("\nUnit Testing: ", filename)
|
||||
b, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return []error{fmt.Errorf("failed to read file: %w", err)}
|
||||
}
|
||||
|
||||
var unitTestInp unitTestFile
|
||||
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
|
||||
return []error{fmt.Errorf("failed to unmarshal file: %w", err)}
|
||||
}
|
||||
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
|
||||
return []error{fmt.Errorf("failed to resolve path for `rule_files`: %w", err)}
|
||||
}
|
||||
|
||||
if unitTestInp.EvaluationInterval.Duration() == 0 {
|
||||
fmt.Println("evaluation_interval set to 1m by default")
|
||||
unitTestInp.EvaluationInterval = &promutils.Duration{D: 1 * time.Minute}
|
||||
}
|
||||
|
||||
groupOrderMap := make(map[string]int)
|
||||
for i, gn := range unitTestInp.GroupEvalOrder {
|
||||
if _, ok := groupOrderMap[gn]; ok {
|
||||
return []error{fmt.Errorf("group name repeated in `group_eval_order`: %s", gn)}
|
||||
}
|
||||
groupOrderMap[gn] = i
|
||||
}
|
||||
|
||||
testGroups, err := vmalertconfig.Parse(unitTestInp.RuleFiles, nil, true)
|
||||
if err != nil {
|
||||
return []error{fmt.Errorf("failed to parse `rule_files`: %w", err)}
|
||||
}
|
||||
|
||||
var errs []error
|
||||
for _, t := range unitTestInp.Tests {
|
||||
if err := verifyTestGroup(t); err != nil {
|
||||
errs = append(errs, err)
|
||||
continue
|
||||
}
|
||||
testErrs := t.test(unitTestInp.EvaluationInterval.Duration(), groupOrderMap, testGroups)
|
||||
errs = append(errs, testErrs...)
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errs
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func verifyTestGroup(group testGroup) error {
|
||||
var testGroupName string
|
||||
if group.TestGroupName != "" {
|
||||
testGroupName = fmt.Sprintf("testGroupName: %s\n", group.TestGroupName)
|
||||
}
|
||||
for _, at := range group.AlertRuleTests {
|
||||
if at.Alertname == "" {
|
||||
return fmt.Errorf("\n%s missing required filed \"alertname\"", testGroupName)
|
||||
}
|
||||
if !disableAlertgroupLabel && at.GroupName == "" {
|
||||
return fmt.Errorf("\n%s missing required filed \"groupname\" when flag \"disableAlertGroupLabel\" is false", testGroupName)
|
||||
}
|
||||
if disableAlertgroupLabel && at.GroupName != "" {
|
||||
return fmt.Errorf("\n%s shouldn't set filed \"groupname\" when flag \"disableAlertGroupLabel\" is true", testGroupName)
|
||||
}
|
||||
if at.EvalTime == nil {
|
||||
return fmt.Errorf("\n%s missing required filed \"eval_time\"", testGroupName)
|
||||
}
|
||||
}
|
||||
for _, et := range group.MetricsqlExprTests {
|
||||
if et.Expr == "" {
|
||||
return fmt.Errorf("\n%s missing required filed \"expr\"", testGroupName)
|
||||
}
|
||||
if et.EvalTime == nil {
|
||||
return fmt.Errorf("\n%s missing required filed \"eval_time\"", testGroupName)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func processFlags() {
|
||||
flag.Parse()
|
||||
for _, fv := range []struct {
|
||||
flag string
|
||||
value string
|
||||
}{
|
||||
{flag: "storageDataPath", value: storagePath},
|
||||
{flag: "loggerLevel", value: testLogLevel},
|
||||
{flag: "search.disableCache", value: "true"},
|
||||
// set storage retention time to 100 years, allow to store series from 1970-01-01T00:00:00.
|
||||
{flag: "retentionPeriod", value: "100y"},
|
||||
{flag: "datasource.url", value: testDataSourcePath},
|
||||
{flag: "remoteWrite.url", value: testRemoteWritePath},
|
||||
} {
|
||||
// panics if flag doesn't exist
|
||||
if err := flag.Lookup(fv.flag).Value.Set(fv.value); err != nil {
|
||||
logger.Fatalf("unable to set %q with value %q, err: %v", fv.flag, fv.value, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setUp() {
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
go httpserver.Serve(httpListenAddr, false, func(w http.ResponseWriter, r *http.Request) bool {
|
||||
switch r.URL.Path {
|
||||
case "/prometheus/api/v1/query":
|
||||
if err := prometheus.QueryHandler(nil, time.Now(), w, r); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
}
|
||||
return true
|
||||
case "/prometheus/api/v1/write", "/api/v1/write":
|
||||
if err := promremotewrite.InsertHandler(r); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
}
|
||||
return true
|
||||
default:
|
||||
}
|
||||
return false
|
||||
})
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
readyCheckFunc := func() bool {
|
||||
resp, err := http.Get(testHealthHTTPPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
return resp.StatusCode == 200
|
||||
}
|
||||
checkCheck:
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger.Fatalf("http server can't be ready in 30s")
|
||||
default:
|
||||
if readyCheckFunc() {
|
||||
break checkCheck
|
||||
}
|
||||
time.Sleep(3 * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func tearDown() {
|
||||
if err := httpserver.Stop(httpListenAddr); err != nil {
|
||||
logger.Errorf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
vmstorage.Stop()
|
||||
metrics.UnregisterAllMetrics()
|
||||
fs.MustRemoveAll(storagePath)
|
||||
}
|
||||
|
||||
// resolveAndGlobFilepaths joins all relative paths in a configuration
|
||||
// with a given base directory and replaces all globs with matching files.
|
||||
func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error {
|
||||
for i, rf := range utf.RuleFiles {
|
||||
if rf != "" && !filepath.IsAbs(rf) {
|
||||
utf.RuleFiles[i] = filepath.Join(baseDir, rf)
|
||||
}
|
||||
}
|
||||
|
||||
var globbedFiles []string
|
||||
for _, rf := range utf.RuleFiles {
|
||||
m, err := filepath.Glob(rf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(m) == 0 {
|
||||
fmt.Fprintln(os.Stderr, " WARNING: no file match pattern", rf)
|
||||
}
|
||||
globbedFiles = append(globbedFiles, m...)
|
||||
}
|
||||
utf.RuleFiles = globbedFiles
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, testGroups []vmalertconfig.Group) (checkErrs []error) {
|
||||
// set up vmstorage and http server for ingest and read queries
|
||||
setUp()
|
||||
// tear down vmstorage and clean the data dir
|
||||
defer tearDown()
|
||||
|
||||
err := writeInputSeries(tg.InputSeries, tg.Interval, testStartTime, testPromWriteHTTPPath)
|
||||
if err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
|
||||
q, err := datasource.Init(nil)
|
||||
if err != nil {
|
||||
return []error{fmt.Errorf("failed to init datasource: %v", err)}
|
||||
}
|
||||
rw, err := remotewrite.NewDebugClient()
|
||||
if err != nil {
|
||||
return []error{fmt.Errorf("failed to init wr: %v", err)}
|
||||
}
|
||||
|
||||
alertEvalTimesMap := map[time.Duration]struct{}{}
|
||||
alertExpResultMap := map[time.Duration]map[string]map[string][]expAlert{}
|
||||
for _, at := range tg.AlertRuleTests {
|
||||
et := at.EvalTime.Duration()
|
||||
alertEvalTimesMap[et] = struct{}{}
|
||||
if _, ok := alertExpResultMap[et]; !ok {
|
||||
alertExpResultMap[et] = make(map[string]map[string][]expAlert)
|
||||
}
|
||||
if _, ok := alertExpResultMap[et][at.GroupName]; !ok {
|
||||
alertExpResultMap[et][at.GroupName] = make(map[string][]expAlert)
|
||||
}
|
||||
alertExpResultMap[et][at.GroupName][at.Alertname] = at.ExpAlerts
|
||||
}
|
||||
alertEvalTimes := make([]time.Duration, 0, len(alertEvalTimesMap))
|
||||
for k := range alertEvalTimesMap {
|
||||
alertEvalTimes = append(alertEvalTimes, k)
|
||||
}
|
||||
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||
})
|
||||
|
||||
// sort group eval order according to the given "group_eval_order".
|
||||
sort.Slice(testGroups, func(i, j int) bool {
|
||||
return groupOrderMap[testGroups[i].Name] < groupOrderMap[testGroups[j].Name]
|
||||
})
|
||||
|
||||
// create groups with given rule
|
||||
var groups []*rule.Group
|
||||
for _, group := range testGroups {
|
||||
ng := rule.NewGroup(group, q, time.Minute, tg.ExternalLabels)
|
||||
groups = append(groups, ng)
|
||||
}
|
||||
|
||||
evalIndex := 0
|
||||
maxEvalTime := testStartTime.Add(tg.maxEvalTime())
|
||||
for ts := testStartTime; ts.Before(maxEvalTime) || ts.Equal(maxEvalTime); ts = ts.Add(evalInterval) {
|
||||
for _, g := range groups {
|
||||
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
|
||||
for err := range errs {
|
||||
if err != nil {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
||||
ts, err))
|
||||
}
|
||||
}
|
||||
// flush series after each group evaluation
|
||||
vmstorage.Storage.DebugFlush()
|
||||
}
|
||||
|
||||
// check alert_rule_test case at every eval time
|
||||
for evalIndex < len(alertEvalTimes) {
|
||||
if ts.Sub(testStartTime) > alertEvalTimes[evalIndex] ||
|
||||
alertEvalTimes[evalIndex] >= ts.Add(evalInterval).Sub(testStartTime) {
|
||||
break
|
||||
}
|
||||
gotAlertsMap := map[string]map[string]labelsAndAnnotations{}
|
||||
for _, g := range groups {
|
||||
if disableAlertgroupLabel {
|
||||
g.Name = ""
|
||||
}
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name]; !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := gotAlertsMap[g.Name]; !ok {
|
||||
gotAlertsMap[g.Name] = make(map[string]labelsAndAnnotations)
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
ar, isAlertRule := r.(*rule.AlertingRule)
|
||||
if !isAlertRule {
|
||||
continue
|
||||
}
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name][ar.Name]; ok {
|
||||
for _, got := range ar.GetAlerts() {
|
||||
if got.State != notifier.StateFiring {
|
||||
continue
|
||||
}
|
||||
if disableAlertgroupLabel {
|
||||
delete(got.Labels, "alertgroup")
|
||||
}
|
||||
laa := labelAndAnnotation{
|
||||
Labels: datasource.ConvertToLabels(got.Labels),
|
||||
Annotations: datasource.ConvertToLabels(got.Annotations),
|
||||
}
|
||||
gotAlertsMap[g.Name][ar.Name] = append(gotAlertsMap[g.Name][ar.Name], laa)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
for groupname, gres := range alertExpResultMap[alertEvalTimes[evalIndex]] {
|
||||
for alertname, res := range gres {
|
||||
var expAlerts labelsAndAnnotations
|
||||
for _, expAlert := range res {
|
||||
if expAlert.ExpLabels == nil {
|
||||
expAlert.ExpLabels = make(map[string]string)
|
||||
}
|
||||
// alertGroupNameLabel is added as additional labels when `disableAlertGroupLabel` is false
|
||||
if !disableAlertgroupLabel {
|
||||
expAlert.ExpLabels["alertgroup"] = groupname
|
||||
}
|
||||
// alertNameLabel is added as additional labels in vmalert.
|
||||
expAlert.ExpLabels["alertname"] = alertname
|
||||
expAlerts = append(expAlerts, labelAndAnnotation{
|
||||
Labels: datasource.ConvertToLabels(expAlert.ExpLabels),
|
||||
Annotations: datasource.ConvertToLabels(expAlert.ExpAnnotations),
|
||||
})
|
||||
}
|
||||
sort.Sort(expAlerts)
|
||||
|
||||
gotAlerts := gotAlertsMap[groupname][alertname]
|
||||
sort.Sort(gotAlerts)
|
||||
if !reflect.DeepEqual(expAlerts, gotAlerts) {
|
||||
var testGroupName string
|
||||
if tg.TestGroupName != "" {
|
||||
testGroupName = fmt.Sprintf("testGroupName: %s,\n", tg.TestGroupName)
|
||||
}
|
||||
expString := indentLines(expAlerts.String(), " ")
|
||||
gotString := indentLines(gotAlerts.String(), " ")
|
||||
checkErrs = append(checkErrs, fmt.Errorf("\n%s groupname: %s, alertname: %s, time: %s, \n exp:%v, \n got:%v ",
|
||||
testGroupName, groupname, alertname, alertEvalTimes[evalIndex].String(), expString, gotString))
|
||||
}
|
||||
}
|
||||
}
|
||||
evalIndex++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
checkErrs = append(checkErrs, checkMetricsqlCase(tg.MetricsqlExprTests, q)...)
|
||||
return checkErrs
|
||||
}
|
||||
|
||||
// unitTestFile holds the contents of a single unit test file
|
||||
type unitTestFile struct {
|
||||
RuleFiles []string `yaml:"rule_files"`
|
||||
EvaluationInterval *promutils.Duration `yaml:"evaluation_interval"`
|
||||
GroupEvalOrder []string `yaml:"group_eval_order"`
|
||||
Tests []testGroup `yaml:"tests"`
|
||||
}
|
||||
|
||||
// testGroup is a group of input series and test cases associated with it
|
||||
type testGroup struct {
|
||||
Interval *promutils.Duration `yaml:"interval"`
|
||||
InputSeries []series `yaml:"input_series"`
|
||||
AlertRuleTests []alertTestCase `yaml:"alert_rule_test"`
|
||||
MetricsqlExprTests []metricsqlTestCase `yaml:"metricsql_expr_test"`
|
||||
ExternalLabels map[string]string `yaml:"external_labels"`
|
||||
TestGroupName string `yaml:"name"`
|
||||
}
|
||||
|
||||
// maxEvalTime returns the max eval time among all alert_rule_test and metricsql_expr_test
|
||||
func (tg *testGroup) maxEvalTime() time.Duration {
|
||||
var maxd time.Duration
|
||||
for _, alert := range tg.AlertRuleTests {
|
||||
if alert.EvalTime.Duration() > maxd {
|
||||
maxd = alert.EvalTime.Duration()
|
||||
}
|
||||
}
|
||||
for _, met := range tg.MetricsqlExprTests {
|
||||
if met.EvalTime.Duration() > maxd {
|
||||
maxd = met.EvalTime.Duration()
|
||||
}
|
||||
}
|
||||
return maxd
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
package unittest
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if err := templates.Load([]string{}, true); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestUnitRule(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
disableGroupLabel bool
|
||||
files []string
|
||||
failed bool
|
||||
}{
|
||||
{
|
||||
name: "run multi files",
|
||||
files: []string{"./testdata/test1.yaml", "./testdata/test2.yaml"},
|
||||
failed: false,
|
||||
},
|
||||
{
|
||||
name: "disable group label",
|
||||
disableGroupLabel: true,
|
||||
files: []string{"./testdata/disable-group-label.yaml"},
|
||||
failed: false,
|
||||
},
|
||||
{
|
||||
name: "failing test",
|
||||
files: []string{"./testdata/failed-test.yaml"},
|
||||
failed: true,
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
fail := UnitTest(tc.files, tc.disableGroupLabel)
|
||||
if fail != tc.failed {
|
||||
t.Fatalf("failed to test %s, expect %t, got %t", tc.name, tc.failed, fail)
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,10 +1,11 @@
|
||||
package rule
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -30,7 +31,6 @@ type AlertingRule struct {
|
||||
Annotations map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
File string
|
||||
EvalInterval time.Duration
|
||||
Debug bool
|
||||
|
||||
@@ -48,15 +48,14 @@ type AlertingRule struct {
|
||||
}
|
||||
|
||||
type alertingRuleMetrics struct {
|
||||
errors *utils.Counter
|
||||
errors *utils.Gauge
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
seriesFetched *utils.Gauge
|
||||
}
|
||||
|
||||
// NewAlertingRule creates a new AlertingRule
|
||||
func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
|
||||
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
|
||||
ar := &AlertingRule{
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
@@ -68,7 +67,6 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
Annotations: cfg.Annotations,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
File: group.File,
|
||||
EvalInterval: group.Interval,
|
||||
Debug: cfg.Debug,
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
@@ -82,18 +80,13 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
metrics: &alertingRuleMetrics{},
|
||||
}
|
||||
|
||||
entrySize := *ruleUpdateEntriesLimit
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
entrySize = *cfg.UpdateEntriesLimit
|
||||
}
|
||||
if entrySize < 1 {
|
||||
entrySize = 1
|
||||
}
|
||||
ar.state = &ruleState{
|
||||
entries: make([]StateEntry, entrySize),
|
||||
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
ar.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, file=%q, id="%d"`, ar.Name, group.Name, group.File, ar.ID())
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.alertsMu.RLock()
|
||||
@@ -118,32 +111,39 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{%s}`, labels))
|
||||
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
return float64(e.Samples)
|
||||
return float64(e.samples)
|
||||
})
|
||||
ar.metrics.seriesFetched = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_series_fetched{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.SeriesFetched == nil {
|
||||
if e.seriesFetched == nil {
|
||||
// means seriesFetched is unsupported
|
||||
return -1
|
||||
}
|
||||
seriesFetched := float64(*e.SeriesFetched)
|
||||
if seriesFetched == 0 && e.Samples > 0 {
|
||||
seriesFetched := float64(*e.seriesFetched)
|
||||
if seriesFetched == 0 && e.samples > 0 {
|
||||
// `alert: 0.95` will fetch no series
|
||||
// but will get one time series in response.
|
||||
seriesFetched = float64(e.Samples)
|
||||
seriesFetched = float64(e.samples)
|
||||
}
|
||||
return seriesFetched
|
||||
})
|
||||
return ar
|
||||
}
|
||||
|
||||
// close unregisters rule metrics
|
||||
func (ar *AlertingRule) close() {
|
||||
// Close unregisters rule metrics
|
||||
func (ar *AlertingRule) Close() {
|
||||
ar.metrics.active.Unregister()
|
||||
ar.metrics.pending.Unregister()
|
||||
ar.metrics.errors.Unregister()
|
||||
@@ -162,27 +162,6 @@ func (ar *AlertingRule) ID() uint64 {
|
||||
return ar.RuleID
|
||||
}
|
||||
|
||||
// GetAlerts returns active alerts of rule
|
||||
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
var alerts []*notifier.Alert
|
||||
for _, a := range ar.alerts {
|
||||
alerts = append(alerts, a)
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlert returns alert if id exists
|
||||
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
if ar.alerts == nil {
|
||||
return nil
|
||||
}
|
||||
return ar.alerts[id]
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...interface{}) {
|
||||
if !ar.Debug {
|
||||
return
|
||||
@@ -209,26 +188,6 @@ func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string
|
||||
logger.Infof("%s", prefix+msg)
|
||||
}
|
||||
|
||||
// updateWith copies all significant fields.
|
||||
// alerts state isn't copied since
|
||||
// it should be updated in next 2 Execs
|
||||
func (ar *AlertingRule) updateWith(r Rule) error {
|
||||
nr, ok := r.(*AlertingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
|
||||
}
|
||||
ar.Expr = nr.Expr
|
||||
ar.For = nr.For
|
||||
ar.KeepFiringFor = nr.KeepFiringFor
|
||||
ar.Labels = nr.Labels
|
||||
ar.Annotations = nr.Annotations
|
||||
ar.EvalInterval = nr.EvalInterval
|
||||
ar.Debug = nr.Debug
|
||||
ar.q = nr.q
|
||||
ar.state = nr.state
|
||||
return nil
|
||||
}
|
||||
|
||||
type labelSet struct {
|
||||
// origin labels extracted from received time series
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
@@ -237,30 +196,11 @@ type labelSet struct {
|
||||
origin map[string]string
|
||||
// processed labels includes origin labels
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
// in case of key conflicts, origin labels are renamed with prefix `exported_` and extra labels are preferred.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5161
|
||||
// in case of conflicts, extra labels are preferred.
|
||||
// used as labels attached to notifier.Alert and ALERTS series written to remote storage.
|
||||
processed map[string]string
|
||||
}
|
||||
|
||||
// add adds a value v with key k to origin and processed label sets.
|
||||
// On k conflicts in processed set, the passed v is preferred.
|
||||
// On k conflicts in origin set, the original value is preferred and copied
|
||||
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
||||
func (ls *labelSet) add(k, v string) {
|
||||
ls.processed[k] = v
|
||||
ov, ok := ls.origin[k]
|
||||
if !ok {
|
||||
ls.origin[k] = v
|
||||
return
|
||||
}
|
||||
if ov != v {
|
||||
// copy value only if v and ov are different
|
||||
key := fmt.Sprintf("exported_%s", k)
|
||||
ls.processed[key] = ov
|
||||
}
|
||||
}
|
||||
|
||||
// toLabels converts labels from given Metric
|
||||
// to labelSet which contains original and processed labels.
|
||||
func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
||||
@@ -283,49 +223,50 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
Expr: ar.Expr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.add(k, v)
|
||||
ls.processed[k] = v
|
||||
if _, ok := ls.origin[k]; !ok {
|
||||
ls.origin[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// set additional labels to identify group and rule name
|
||||
if ar.Name != "" {
|
||||
ls.add(alertNameLabel, ar.Name)
|
||||
ls.processed[alertNameLabel] = ar.Name
|
||||
if _, ok := ls.origin[alertNameLabel]; !ok {
|
||||
ls.origin[alertNameLabel] = ar.Name
|
||||
}
|
||||
}
|
||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||
ls.add(alertGroupNameLabel, ar.GroupName)
|
||||
ls.processed[alertGroupNameLabel] = ar.GroupName
|
||||
if _, ok := ls.origin[alertGroupNameLabel]; !ok {
|
||||
ls.origin[alertGroupNameLabel] = ar.GroupName
|
||||
}
|
||||
}
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
// execRange executes alerting rule on the given time range similarly to exec.
|
||||
// When making consecutive calls make sure to respect time linearity for start and end params,
|
||||
// as this function modifies AlertingRule alerts state.
|
||||
// It is not thread safe.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as a result.
|
||||
func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
// ExecRange executes alerting rule on the given time range similarly to Exec.
|
||||
// It doesn't update internal states of the Rule and meant to be used just
|
||||
// to get time series for backfilling.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
||||
func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var result []prompbmarshal.TimeSeries
|
||||
holdAlertState := make(map[uint64]*notifier.Alert)
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
|
||||
}
|
||||
for _, s := range res.Data {
|
||||
ls, err := ar.toLabels(s, qFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
}
|
||||
|
||||
// if alert is instant, For: 0
|
||||
if ar.For == 0 {
|
||||
if ar.For == 0 { // if alert is instant
|
||||
a.State = notifier.StateFiring
|
||||
for i := range s.Values {
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
@@ -337,32 +278,18 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
prevT := time.Time{}
|
||||
for i := range s.Values {
|
||||
at := time.Unix(s.Timestamps[i], 0)
|
||||
// try to restore alert's state on the first iteration
|
||||
if at.Equal(start) {
|
||||
if _, ok := ar.alerts[h]; ok {
|
||||
a = ar.alerts[h]
|
||||
prevT = at
|
||||
}
|
||||
}
|
||||
if at.Sub(prevT) > ar.EvalInterval {
|
||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = at
|
||||
a.Start = time.Time{}
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For {
|
||||
a.State = notifier.StateFiring
|
||||
a.Start = at
|
||||
}
|
||||
prevT = at
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
|
||||
// save alert's state on last iteration, so it can be used on the next execRange call
|
||||
if at.Equal(end) {
|
||||
holdAlertState[h] = a
|
||||
}
|
||||
}
|
||||
}
|
||||
ar.alerts = holdAlertState
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -370,26 +297,23 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
// is kept in memory state and consequently repeatedly sent to the AlertManager.
|
||||
const resolvedRetention = 15 * time.Minute
|
||||
|
||||
// exec executes AlertingRule expression via the given Querier.
|
||||
// Exec executes AlertingRule expression via the given Querier.
|
||||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||
func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
res, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
curState := StateEntry{
|
||||
Time: start,
|
||||
At: ts,
|
||||
Duration: time.Since(start),
|
||||
Samples: len(res.Data),
|
||||
SeriesFetched: res.SeriesFetched,
|
||||
Err: err,
|
||||
Curl: requestToCurl(req),
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(res.Data),
|
||||
seriesFetched: res.SeriesFetched,
|
||||
err: err,
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
if curState.Err != nil {
|
||||
ar.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
ar.alertsMu.Lock()
|
||||
@@ -399,7 +323,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||
}
|
||||
|
||||
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.Samples, curState.Duration)
|
||||
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.samples, curState.duration)
|
||||
|
||||
for h, a := range ar.alerts {
|
||||
// cleanup inactive alerts from previous Exec
|
||||
@@ -418,14 +342,15 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
for _, m := range res.Data {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
curState.Err = fmt.Errorf("failed to expand labels: %w", err)
|
||||
return nil, curState.Err
|
||||
curState.err = fmt.Errorf("failed to expand labels: %s", err)
|
||||
return nil, curState.err
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
if _, ok := updated[h]; ok {
|
||||
// duplicate may be caused the removal of `__name__` label
|
||||
curState.Err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||
return nil, curState.Err
|
||||
// duplicate may be caused by extra labels
|
||||
// conflicting with the metric labels
|
||||
curState.err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||
return nil, curState.err
|
||||
}
|
||||
updated[h] = struct{}{}
|
||||
if a, ok := ar.alerts[h]; ok {
|
||||
@@ -448,8 +373,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
}
|
||||
a, err := ar.newAlert(m, ls, start, qFn)
|
||||
if err != nil {
|
||||
curState.Err = fmt.Errorf("failed to create alert: %w", err)
|
||||
return nil, curState.Err
|
||||
curState.err = fmt.Errorf("failed to create alert: %w", err)
|
||||
return nil, curState.err
|
||||
}
|
||||
a.ID = h
|
||||
a.State = notifier.StatePending
|
||||
@@ -498,8 +423,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
}
|
||||
if limit > 0 && numActivePending > limit {
|
||||
ar.alerts = map[uint64]*notifier.Alert{}
|
||||
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||
return nil, curState.Err
|
||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||
return nil, curState.err
|
||||
}
|
||||
return ar.toTimeSeries(ts.Unix()), nil
|
||||
}
|
||||
@@ -516,6 +441,26 @@ func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries
|
||||
return tss
|
||||
}
|
||||
|
||||
// UpdateWith copies all significant fields.
|
||||
// alerts state isn't copied since
|
||||
// it should be updated in next 2 Execs
|
||||
func (ar *AlertingRule) UpdateWith(r Rule) error {
|
||||
nr, ok := r.(*AlertingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
|
||||
}
|
||||
ar.Expr = nr.Expr
|
||||
ar.For = nr.For
|
||||
ar.KeepFiringFor = nr.KeepFiringFor
|
||||
ar.Labels = nr.Labels
|
||||
ar.Annotations = nr.Annotations
|
||||
ar.EvalInterval = nr.EvalInterval
|
||||
ar.Debug = nr.Debug
|
||||
ar.q = nr.q
|
||||
ar.state = nr.state
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: consider hashing algorithm in VM
|
||||
func hash(labels map[string]string) uint64 {
|
||||
hash := fnv.New64a()
|
||||
@@ -542,7 +487,7 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
if ls == nil {
|
||||
ls, err = ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
}
|
||||
a := ¬ifier.Alert{
|
||||
@@ -558,6 +503,102 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
return a, err
|
||||
}
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
||||
func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
a, ok := ar.alerts[id]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return ar.newAlertAPI(*a)
|
||||
}
|
||||
|
||||
// ToAPI returns Rule representation in form of APIRule
|
||||
// Isn't thread-safe. Call must be protected by AlertingRule mutex.
|
||||
func (ar *AlertingRule) ToAPI() APIRule {
|
||||
lastState := ar.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "alerting",
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
LastSeriesFetched: lastState.seriesFetched,
|
||||
MaxUpdates: ar.state.size(),
|
||||
Updates: ar.state.getAll(),
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
}
|
||||
if lastState.err != nil {
|
||||
r.LastError = lastState.err.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
// satisfy APIRule.State logic
|
||||
if len(r.Alerts) > 0 {
|
||||
r.State = notifier.StatePending.String()
|
||||
stateFiring := notifier.StateFiring.String()
|
||||
for _, a := range r.Alerts {
|
||||
if a.State == stateFiring {
|
||||
r.State = stateFiring
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// AlertsToAPI generates list of APIAlert objects from existing alerts
|
||||
func (ar *AlertingRule) AlertsToAPI() []*APIAlert {
|
||||
var alerts []*APIAlert
|
||||
ar.alertsMu.RLock()
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateInactive {
|
||||
continue
|
||||
}
|
||||
alerts = append(alerts, ar.newAlertAPI(*a))
|
||||
}
|
||||
ar.alertsMu.RUnlock()
|
||||
return alerts
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
|
||||
aa := &APIAlert{
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", a.ID),
|
||||
GroupID: fmt.Sprintf("%d", a.GroupID),
|
||||
RuleID: fmt.Sprintf("%d", ar.RuleID),
|
||||
|
||||
Name: a.Name,
|
||||
Expression: ar.Expr,
|
||||
Labels: a.Labels,
|
||||
Annotations: a.Annotations,
|
||||
State: a.State.String(),
|
||||
ActiveAt: a.ActiveAt,
|
||||
Restored: a.Restored,
|
||||
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
||||
}
|
||||
if alertURLGeneratorFn != nil {
|
||||
aa.SourceLink = alertURLGeneratorFn(a)
|
||||
}
|
||||
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
||||
aa.Stabilizing = true
|
||||
}
|
||||
return aa
|
||||
}
|
||||
|
||||
const (
|
||||
// alertMetricName is the metric name for synthetic alert timeseries.
|
||||
alertMetricName = "ALERTS"
|
||||
@@ -605,10 +646,10 @@ func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.Time
|
||||
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
|
||||
}
|
||||
|
||||
// restore restores the value of ActiveAt field for active alerts,
|
||||
// Restore restores the value of ActiveAt field for active alerts,
|
||||
// based on previously written time series `alertForStateMetricName`.
|
||||
// Only rules with For > 0 can be restored.
|
||||
func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||
if ar.For < 1 {
|
||||
return nil
|
||||
}
|
||||
@@ -620,41 +661,44 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
||||
return nil
|
||||
}
|
||||
|
||||
nameStr := fmt.Sprintf("%s=%q", alertNameLabel, ar.Name)
|
||||
if !*disableAlertGroupLabel {
|
||||
nameStr = fmt.Sprintf("%s=%q,%s=%q", alertGroupNameLabel, ar.GroupName, alertNameLabel, ar.Name)
|
||||
}
|
||||
var labelsFilter string
|
||||
for k, v := range ar.Labels {
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s%s}[%ds])",
|
||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||
}
|
||||
|
||||
if len(res.Data) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
return nil
|
||||
}
|
||||
for _, series := range res.Data {
|
||||
series.DelLabel("__name__")
|
||||
labelSet := make(map[string]string, len(series.Labels))
|
||||
for _, v := range series.Labels {
|
||||
labelSet[v.Name] = v.Value
|
||||
}
|
||||
id := hash(labelSet)
|
||||
a, ok := ar.alerts[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, a := range ar.alerts {
|
||||
if a.Restored || a.State != notifier.StatePending {
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(series.Values[0]), 0)
|
||||
|
||||
var labelsFilter []string
|
||||
for k, v := range a.Labels {
|
||||
labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
|
||||
}
|
||||
sort.Strings(labelsFilter)
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
|
||||
alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
|
||||
|
||||
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
qMetrics := res.Data
|
||||
if len(qMetrics) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
continue
|
||||
}
|
||||
|
||||
// only one series expected in response
|
||||
m := qMetrics[0]
|
||||
// __name__ supposed to be alertForStateMetricName
|
||||
m.DelLabel("__name__")
|
||||
|
||||
// we assume that restore query contains all label matchers,
|
||||
// so all received labels will match anyway if their number is equal.
|
||||
if len(m.Labels) != len(a.Labels) {
|
||||
ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
|
||||
a.Restored = true
|
||||
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
|
||||
}
|
||||
@@ -1,9 +1,8 @@
|
||||
package rule
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -14,7 +13,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
@@ -305,13 +303,13 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.q = fq
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
for i, step := range tc.steps {
|
||||
fq.Reset()
|
||||
fq.Add(step...)
|
||||
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
fq.reset()
|
||||
fq.add(step...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
// artificial delay between applying steps
|
||||
@@ -348,18 +346,15 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
data []datasource.Metric
|
||||
expAlerts []*notifier.Alert
|
||||
expHoldAlertStateAlerts map[uint64]*notifier.Alert
|
||||
rule *AlertingRule
|
||||
data []datasource.Metric
|
||||
expAlerts []*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestAlertingRule("empty", 0),
|
||||
[]datasource.Metric{},
|
||||
nil,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("empty labels", 0),
|
||||
@@ -369,7 +364,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing", 0),
|
||||
@@ -382,7 +376,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
State: notifier.StateFiring,
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing-on-range", 0),
|
||||
@@ -394,7 +387,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending", time.Second),
|
||||
@@ -406,16 +398,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-pending"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-pending",
|
||||
Labels: map[string]string{"alertname": "for-pending"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-firing", 3*time.Second),
|
||||
@@ -427,38 +409,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-firing"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-firing",
|
||||
Labels: map[string]string{"alertname": "for-firing"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-hold-pending", time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 2, 5}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-hold-pending"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-hold-pending",
|
||||
Labels: map[string]string{"alertname": "for-hold-pending"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
|
||||
@@ -472,14 +422,12 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("multi-series", 3*time.Second),
|
||||
newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
||||
{
|
||||
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
||||
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
||||
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
|
||||
},
|
||||
},
|
||||
@@ -487,49 +435,22 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
{
|
||||
State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
|
||||
//
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
|
||||
}},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{"alertname": "multi-series"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "multi-series",
|
||||
Labels: map[string]string{"alertname": "multi-series"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
},
|
||||
hash(map[string]string{"alertname": "multi-series", "foo": "bar"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "multi-series",
|
||||
Labels: map[string]string{"alertname": "multi-series", "foo": "bar"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1}, Timestamps: []int64{1, 100}},
|
||||
{
|
||||
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
||||
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
||||
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
|
||||
},
|
||||
},
|
||||
@@ -550,16 +471,16 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
"source": "vm",
|
||||
}},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.q = fq
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
fq.Add(tc.data...)
|
||||
gotTS, err := tc.rule.execRange(context.TODO(), time.Unix(1, 0), time.Unix(5, 0))
|
||||
fq.add(tc.data...)
|
||||
gotTS, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
@@ -585,35 +506,30 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
|
||||
}
|
||||
}
|
||||
if tc.expHoldAlertStateAlerts != nil {
|
||||
if !reflect.DeepEqual(tc.expHoldAlertStateAlerts, tc.rule.alerts) {
|
||||
t.Fatalf("expected hold alerts state: \n%v but got \n%v", tc.expHoldAlertStateAlerts, tc.rule.alerts)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroup_Restore(t *testing.T) {
|
||||
defaultTS := time.Now()
|
||||
fqr := &datasource.FakeQuerierWithRegistry{}
|
||||
fqr := &fakeQuerierWithRegistry{}
|
||||
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
|
||||
t.Helper()
|
||||
defer fqr.Reset()
|
||||
defer fqr.reset()
|
||||
|
||||
for _, r := range rules {
|
||||
fqr.Set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
||||
fqr.set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
||||
}
|
||||
|
||||
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
fg := newGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
nts := func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} }
|
||||
fg.Start(context.Background(), nts, nil, fqr)
|
||||
nts := func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} }
|
||||
fg.start(context.Background(), nts, nil, fqr)
|
||||
wg.Done()
|
||||
}()
|
||||
fg.Close()
|
||||
fg.close()
|
||||
wg.Wait()
|
||||
|
||||
gotAlerts := make(map[uint64]*notifier.Alert)
|
||||
@@ -642,9 +558,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
if got.Name != exp.Name {
|
||||
t.Fatalf("expected alertname %q; got %q", exp.Name, got.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -660,29 +573,26 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
fqr.Reset()
|
||||
fqr.reset()
|
||||
|
||||
// one active alert with state restore
|
||||
ts := time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("bar", ts))
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
@@ -690,20 +600,17 @@ func TestGroup_Restore(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "bar",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, two with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("bar", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
@@ -712,82 +619,74 @@ func TestGroup_Restore(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "bar",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// one active alert but wrong state restore
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
||||
stateMetric("wrong alert", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with labels
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with restore labels missmatch
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq := &fakeQuerier{}
|
||||
ar := newTestAlertingRule("test", 0)
|
||||
ar.Labels = map[string]string{"job": "test"}
|
||||
ar.q = fq
|
||||
|
||||
// successful attempt
|
||||
// label `job` will be overridden by rule extra label, the original value will be reserved by "exported_job"
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
_, err := ar.exec(context.TODO(), time.Now(), 0)
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
_, err := ar.Exec(context.TODO(), time.Now(), 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// label `__name__` will be omitted and get duplicated results here
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo_1", "job", "bar"))
|
||||
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
||||
// label `job` will collide with rule extra label and will make both time series equal
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
_, err = ar.Exec(context.TODO(), time.Now(), 0)
|
||||
if !errors.Is(err, errDuplicate) {
|
||||
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
||||
}
|
||||
|
||||
fq.Reset()
|
||||
fq.reset()
|
||||
|
||||
expErr := "connection reset by peer"
|
||||
fq.SetErr(errors.New(expErr))
|
||||
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
||||
fq.setErr(errors.New(expErr))
|
||||
_, err = ar.Exec(context.TODO(), time.Now(), 0)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to get err; got nil")
|
||||
}
|
||||
@@ -797,7 +696,7 @@ func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAlertingRuleLimit(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq := &fakeQuerier{}
|
||||
ar := newTestAlertingRule("test", 0)
|
||||
ar.Labels = map[string]string{"job": "test"}
|
||||
ar.q = fq
|
||||
@@ -829,15 +728,15 @@ func TestAlertingRuleLimit(t *testing.T) {
|
||||
err error
|
||||
timestamp = time.Now()
|
||||
)
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
|
||||
for _, testCase := range testCases {
|
||||
_, err = ar.exec(context.TODO(), timestamp, testCase.limit)
|
||||
_, err = ar.Exec(context.TODO(), timestamp, testCase.limit)
|
||||
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
fq.Reset()
|
||||
fq.reset()
|
||||
}
|
||||
|
||||
func TestAlertingRule_Template(t *testing.T) {
|
||||
@@ -901,22 +800,20 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"exported_alertname": "override",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "override label",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `first: Too high connection number for "foo"`,
|
||||
"description": `override: It is 2 connections for "foo"`,
|
||||
},
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "bar"}): {
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "bar"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"exported_alertname": "override",
|
||||
"instance": "bar",
|
||||
alertNameLabel: "override label",
|
||||
"instance": "bar",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `second: Too high connection number for "bar"`,
|
||||
@@ -945,18 +842,13 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"exported_alertgroup": "originGroupname",
|
||||
"instance": "foo",
|
||||
}): {
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"exported_alertgroup": "originGroupname",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||
@@ -968,12 +860,12 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
|
||||
fq.Add(tc.metrics...)
|
||||
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
tc.rule.state = newRuleState(10)
|
||||
fq.add(tc.metrics...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
for hash, expAlert := range tc.expAlerts {
|
||||
@@ -1087,10 +979,7 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||
For: waitFor,
|
||||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
metrics: &alertingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{alertname=%q}`, name)),
|
||||
},
|
||||
state: newRuleState(10),
|
||||
}
|
||||
return &rule
|
||||
}
|
||||
@@ -1100,54 +989,3 @@ func newTestAlertingRuleWithKeepFiring(name string, waitFor, keepFiringFor time.
|
||||
rule.KeepFiringFor = keepFiringFor
|
||||
return rule
|
||||
}
|
||||
|
||||
func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
metric := datasource.Metric{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "instance", Value: "0.0.0.0:8800"},
|
||||
{Name: "group", Value: "vmalert"},
|
||||
{Name: "alertname", Value: "ConfigurationReloadFailure"},
|
||||
},
|
||||
Values: []float64{1},
|
||||
Timestamps: []int64{time.Now().UnixNano()},
|
||||
}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Labels: map[string]string{
|
||||
"instance": "override", // this should override instance with new value
|
||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||
},
|
||||
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
||||
Name: "AlertingRulesError",
|
||||
GroupName: "vmalert",
|
||||
}
|
||||
|
||||
expectedOriginLabels := map[string]string{
|
||||
"instance": "0.0.0.0:8800",
|
||||
"group": "vmalert",
|
||||
"alertname": "ConfigurationReloadFailure",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
expectedProcessedLabels := map[string]string{
|
||||
"instance": "override",
|
||||
"exported_instance": "0.0.0.0:8800",
|
||||
"alertname": "AlertingRulesError",
|
||||
"exported_alertname": "ConfigurationReloadFailure",
|
||||
"group": "vmalert",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
ls, err := ar.toLabels(metric, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
||||
t.Errorf("origin labels mismatch, got: %v, want: %v", ls.origin, expectedOriginLabels)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.processed, expectedProcessedLabels) {
|
||||
t.Errorf("processed labels mismatch, got: %v, want: %v", ls.processed, expectedProcessedLabels)
|
||||
}
|
||||
}
|
||||
@@ -19,14 +19,10 @@ import (
|
||||
// Group contains list of Rules grouped into
|
||||
// entity with one name and evaluation interval
|
||||
type Group struct {
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutils.Duration `yaml:"interval,omitempty"`
|
||||
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *promutils.Duration `yaml:"eval_delay,omitempty"`
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutils.Duration `yaml:"interval,omitempty"`
|
||||
Limit int `yaml:"limit,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
@@ -42,8 +38,6 @@ type Group struct {
|
||||
Headers []Header `yaml:"headers,omitempty"`
|
||||
// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
|
||||
NotifierHeaders []Header `yaml:"notifier_headers,omitempty"`
|
||||
// EvalAlignment will make the timestamp of group query requests be aligned with interval
|
||||
EvalAlignment *bool `yaml:"eval_alignment,omitempty"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
@@ -69,27 +63,11 @@ func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate checks configuration errors for group and internal rules
|
||||
// Validate check for internal Group or Rule configuration errors
|
||||
func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool) error {
|
||||
if g.Name == "" {
|
||||
return fmt.Errorf("group name must be set")
|
||||
}
|
||||
if g.Interval.Duration() < 0 {
|
||||
return fmt.Errorf("interval shouldn't be lower than 0")
|
||||
}
|
||||
if g.EvalOffset.Duration() < 0 {
|
||||
return fmt.Errorf("eval_offset shouldn't be lower than 0")
|
||||
}
|
||||
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
|
||||
if g.EvalOffset.Duration() > g.Interval.Duration() {
|
||||
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
||||
}
|
||||
if g.Limit < 0 {
|
||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
|
||||
}
|
||||
if g.Concurrency < 0 {
|
||||
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
||||
}
|
||||
|
||||
uniqueRules := map[uint64]struct{}{}
|
||||
for _, r := range g.Rules {
|
||||
@@ -98,26 +76,26 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
ruleName = r.Alert
|
||||
}
|
||||
if _, ok := uniqueRules[r.ID]; ok {
|
||||
return fmt.Errorf("%q is a duplicate in group", r.String())
|
||||
return fmt.Errorf("%q is a duplicate within the group %q", r.String(), g.Name)
|
||||
}
|
||||
uniqueRules[r.ID] = struct{}{}
|
||||
if err := r.Validate(); err != nil {
|
||||
return fmt.Errorf("invalid rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if validateExpressions {
|
||||
// its needed only for tests.
|
||||
// because correct types must be inherited after unmarshalling.
|
||||
exprValidator := g.Type.ValidateExpr
|
||||
if err := exprValidator(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
if validateTplFn != nil {
|
||||
if err := validateTplFn(r.Annotations); err != nil {
|
||||
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid annotations for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if err := validateTplFn(r.Labels); err != nil {
|
||||
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid labels for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -236,7 +214,7 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
|
||||
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
}
|
||||
return parse(files, validateTplFn, validateExpressions)
|
||||
}
|
||||
@@ -245,11 +223,11 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
|
||||
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
}
|
||||
groups, err := parse(files, validateTplFn, validateExpressions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %s: %w", pathPatterns, err)
|
||||
return nil, fmt.Errorf("failed to parse %s: %s", pathPatterns, err)
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
|
||||
@@ -68,10 +68,6 @@ func TestParseBad(t *testing.T) {
|
||||
path []string
|
||||
expErr string
|
||||
}{
|
||||
{
|
||||
[]string{"testdata/rules/rules_interval_bad.rules"},
|
||||
"eval_offset should be smaller than interval",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/rules/rules0-bad.rules"},
|
||||
"unexpected token",
|
||||
@@ -106,7 +102,7 @@ func TestParseBad(t *testing.T) {
|
||||
},
|
||||
{
|
||||
[]string{"http://unreachable-url"},
|
||||
"failed to",
|
||||
"failed to read",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
@@ -145,35 +141,6 @@ func TestGroup_Validate(t *testing.T) {
|
||||
group: &Group{},
|
||||
expErr: "group name must be set",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "negative interval",
|
||||
Interval: promutils.NewDuration(-1),
|
||||
},
|
||||
expErr: "interval shouldn't be lower than 0",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "wrong eval_offset",
|
||||
Interval: promutils.NewDuration(time.Minute),
|
||||
EvalOffset: promutils.NewDuration(2 * time.Minute),
|
||||
},
|
||||
expErr: "eval_offset should be smaller than interval",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "wrong limit",
|
||||
Limit: -1,
|
||||
},
|
||||
expErr: "invalid limit",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "wrong concurrency",
|
||||
Concurrency: -1,
|
||||
},
|
||||
expErr: "invalid concurrency",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
|
||||
@@ -49,7 +49,7 @@ func (fs *FS) Read(files []string) (map[string][]byte, error) {
|
||||
path, resp.StatusCode, http.StatusOK, data)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read %q: %w", path, err)
|
||||
return nil, fmt.Errorf("cannot read %q: %s", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ groups:
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
eval_delay: 30s
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: sum(vm_tcplistener_conns) by (instance) > 1
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
groups:
|
||||
- name: groupTest
|
||||
## default interval is 1min, eval_offset shouldn't be greater than interval
|
||||
eval_offset: 2m
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 2s
|
||||
expr: sum(rate(vm_http_request_errors_total[2s])) > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
@@ -1,131 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// FakeQuerier is a mock querier that return predefined results and error message
|
||||
type FakeQuerier struct {
|
||||
sync.Mutex
|
||||
metrics []Metric
|
||||
err error
|
||||
}
|
||||
|
||||
// SetErr sets query error message
|
||||
func (fq *FakeQuerier) SetErr(err error) {
|
||||
fq.Lock()
|
||||
fq.err = err
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
// Reset reset querier's error message and results
|
||||
func (fq *FakeQuerier) Reset() {
|
||||
fq.Lock()
|
||||
fq.err = nil
|
||||
fq.metrics = fq.metrics[:0]
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
// Add appends metrics to querier result metrics
|
||||
func (fq *FakeQuerier) Add(metrics ...Metric) {
|
||||
fq.Lock()
|
||||
fq.metrics = append(fq.metrics, metrics...)
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
// BuildWithParams return FakeQuerier itself
|
||||
func (fq *FakeQuerier) BuildWithParams(_ QuerierParams) Querier {
|
||||
return fq
|
||||
}
|
||||
|
||||
// QueryRange performs query
|
||||
func (fq *FakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
|
||||
req, _, err := fq.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
// Query returns metrics restored in querier
|
||||
func (fq *FakeQuerier) Query(_ context.Context, _ string, _ time.Time) (Result, *http.Request, error) {
|
||||
fq.Lock()
|
||||
defer fq.Unlock()
|
||||
if fq.err != nil {
|
||||
return Result{}, nil, fq.err
|
||||
}
|
||||
cp := make([]Metric, len(fq.metrics))
|
||||
copy(cp, fq.metrics)
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
return Result{Data: cp}, req, nil
|
||||
}
|
||||
|
||||
// FakeQuerierWithRegistry can store different results for different query expr
|
||||
type FakeQuerierWithRegistry struct {
|
||||
sync.Mutex
|
||||
registry map[string][]Metric
|
||||
}
|
||||
|
||||
// Set stores query result for given key
|
||||
func (fqr *FakeQuerierWithRegistry) Set(key string, metrics ...Metric) {
|
||||
fqr.Lock()
|
||||
if fqr.registry == nil {
|
||||
fqr.registry = make(map[string][]Metric)
|
||||
}
|
||||
fqr.registry[key] = metrics
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
// Reset clean querier's results registry
|
||||
func (fqr *FakeQuerierWithRegistry) Reset() {
|
||||
fqr.Lock()
|
||||
fqr.registry = nil
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
// BuildWithParams returns itself
|
||||
func (fqr *FakeQuerierWithRegistry) BuildWithParams(_ QuerierParams) Querier {
|
||||
return fqr
|
||||
}
|
||||
|
||||
// QueryRange performs query
|
||||
func (fqr *FakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
|
||||
req, _, err := fqr.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
// Query returns metrics restored in querier registry
|
||||
func (fqr *FakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (Result, *http.Request, error) {
|
||||
fqr.Lock()
|
||||
defer fqr.Unlock()
|
||||
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
metrics, ok := fqr.registry[expr]
|
||||
if !ok {
|
||||
return Result{}, req, nil
|
||||
}
|
||||
cp := make([]Metric, len(metrics))
|
||||
copy(cp, metrics)
|
||||
return Result{Data: cp}, req, nil
|
||||
}
|
||||
|
||||
// FakeQuerierWithDelay mock querier with given delay duration
|
||||
type FakeQuerierWithDelay struct {
|
||||
FakeQuerier
|
||||
Delay time.Duration
|
||||
}
|
||||
|
||||
// Query returns query result after delay duration
|
||||
func (fqd *FakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (Result, *http.Request, error) {
|
||||
timer := time.NewTimer(fqd.Delay)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-timer.C:
|
||||
}
|
||||
return fqd.FakeQuerier.Query(ctx, expr, ts)
|
||||
}
|
||||
|
||||
// BuildWithParams returns itself
|
||||
func (fqd *FakeQuerierWithDelay) BuildWithParams(_ QuerierParams) Querier {
|
||||
return fqd
|
||||
}
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -37,24 +36,18 @@ var (
|
||||
tlsCAFile = flag.String("datasource.tlsCAFile", "", `Optional path to TLS CA file to use for verifying connections to -datasource.url. By default, system CA is used`)
|
||||
tlsServerName = flag.String("datasource.tlsServerName", "", `Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used`)
|
||||
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url")
|
||||
oauth2EndpointParams = flag.String("datasource.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -datasource.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url. ")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url.")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url. ")
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url.")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Will be deprecated soon, please adjust "-search.latencyOffset" at datasource side `+
|
||||
`or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. `+
|
||||
`For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
|
||||
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
|
||||
"If set to 0, rule's evaluation interval will be used instead.")
|
||||
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Deprecated: please use "eval_alignment" in rule group instead. `+
|
||||
`Whether to align "time" parameter with evaluation interval. `+
|
||||
"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. "+
|
||||
"See more details at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
|
||||
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Whether to align "time" parameter with evaluation interval.`+
|
||||
"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
|
||||
disableKeepAlive = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
|
||||
`If true, disables HTTP keep-alives and will only use the connection to the server for a single HTTP request.`)
|
||||
@@ -86,12 +79,6 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
if *addr == "" {
|
||||
return nil, fmt.Errorf("datasource.url is empty")
|
||||
}
|
||||
if !*queryTimeAlignment {
|
||||
logger.Warnf("flag `-datasource.queryTimeAlignment` is deprecated and will be removed in next releases. Please use `eval_alignment` in rule group instead.")
|
||||
}
|
||||
if *lookBack != 0 {
|
||||
logger.Warnf("flag `-datasource.lookback` will be deprecated soon. Please use `-rule.evalDelay` command-line flag instead. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 for details.")
|
||||
}
|
||||
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
@@ -110,22 +97,14 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
extraParams.Set("round_digits", fmt.Sprintf("%d", *roundDigits))
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
}
|
||||
_, err = authCfg.GetAuthHeader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
|
||||
}
|
||||
|
||||
return &VMStorage{
|
||||
c: &http.Client{Transport: tr},
|
||||
|
||||
@@ -37,14 +37,11 @@ type VMStorage struct {
|
||||
appendTypePrefix bool
|
||||
lookBack time.Duration
|
||||
queryStep time.Duration
|
||||
dataSourceType datasourceType
|
||||
|
||||
// evaluationInterval will help setting request's `step` param.
|
||||
dataSourceType datasourceType
|
||||
evaluationInterval time.Duration
|
||||
// extraParams contains params to be attached to each HTTP request
|
||||
extraParams url.Values
|
||||
// extraHeaders are headers to be attached to each HTTP request
|
||||
extraHeaders []keyValue
|
||||
extraParams url.Values
|
||||
extraHeaders []keyValue
|
||||
|
||||
// whether to print additional log messages
|
||||
// for each sent request
|
||||
@@ -137,35 +134,33 @@ func NewVMStorage(baseURL string, authCfg *promauth.Config, lookBack time.Durati
|
||||
|
||||
// Query executes the given query and returns parsed response
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
|
||||
req, err := s.newQueryRequest(query, ts)
|
||||
req, err := s.newRequestPOST()
|
||||
if err != nil {
|
||||
return Result{}, nil, err
|
||||
}
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// Return unexpected error to the caller.
|
||||
return Result{}, nil, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = s.newQueryRequest(query, ts)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = s.do(ctx, req)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
|
||||
switch s.dataSourceType {
|
||||
case "", datasourcePrometheus:
|
||||
s.setPrometheusInstantReqParams(req, query, ts)
|
||||
case datasourceGraphite:
|
||||
s.setGraphiteReqParams(req, query, ts)
|
||||
default:
|
||||
return Result{}, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
|
||||
}
|
||||
|
||||
// Process the received response.
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
return Result{}, req, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
|
||||
parseFn := parsePrometheusResponse
|
||||
if s.dataSourceType != datasourcePrometheus {
|
||||
parseFn = parseGraphiteResponse
|
||||
}
|
||||
result, err := parseFn(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return result, req, err
|
||||
}
|
||||
|
||||
@@ -176,38 +171,25 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
|
||||
if s.dataSourceType != datasourcePrometheus {
|
||||
return res, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
|
||||
}
|
||||
req, err := s.newRequestPOST()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
if start.IsZero() {
|
||||
return res, fmt.Errorf("start param is missing")
|
||||
}
|
||||
if end.IsZero() {
|
||||
return res, fmt.Errorf("end param is missing")
|
||||
}
|
||||
req, err := s.newQueryRangeRequest(query, start, end)
|
||||
s.setPrometheusRangeReqParams(req, query, start, end)
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// Return unexpected error to the caller.
|
||||
return res, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = s.newQueryRangeRequest(query, start, end)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = s.do(ctx, req)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Process the received response.
|
||||
res, err = parsePrometheusResponse(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return res, err
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
return parsePrometheusResponse(req, resp)
|
||||
}
|
||||
|
||||
func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
|
||||
@@ -219,6 +201,11 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
|
||||
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
|
||||
}
|
||||
resp, err := s.c.Do(req.WithContext(ctx))
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
resp, err = s.c.Do(req.WithContext(ctx))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
|
||||
}
|
||||
@@ -230,42 +217,14 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*http.Request, error) {
|
||||
req, err := s.newRequest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", s.datasourceURL, err)
|
||||
}
|
||||
s.setPrometheusRangeReqParams(req, query, start, end)
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (s *VMStorage) newQueryRequest(query string, ts time.Time) (*http.Request, error) {
|
||||
req, err := s.newRequest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", s.datasourceURL, err)
|
||||
}
|
||||
switch s.dataSourceType {
|
||||
case "", datasourcePrometheus:
|
||||
s.setPrometheusInstantReqParams(req, query, ts)
|
||||
case datasourceGraphite:
|
||||
s.setGraphiteReqParams(req, query, ts)
|
||||
default:
|
||||
logger.Panicf("BUG: engine not found: %q", s.dataSourceType)
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (s *VMStorage) newRequest() (*http.Request, error) {
|
||||
func (s *VMStorage) newRequestPOST() (*http.Request, error) {
|
||||
req, err := http.NewRequest(http.MethodPost, s.datasourceURL, nil)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", s.datasourceURL, err)
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if s.authCfg != nil {
|
||||
err = s.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.authCfg.SetHeaders(req, true)
|
||||
}
|
||||
for _, h := range s.extraHeaders {
|
||||
req.Header.Set(h.key, h.value)
|
||||
|
||||
@@ -112,14 +112,14 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
|
||||
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
}
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
var pi promInstant
|
||||
if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
|
||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||
return res, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
@@ -164,6 +164,10 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
|
||||
if s.lookBack > 0 {
|
||||
timestamp = timestamp.Add(-s.lookBack)
|
||||
}
|
||||
if *queryTimeAlignment && s.evaluationInterval > 0 {
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
|
||||
timestamp = timestamp.Truncate(s.evaluationInterval)
|
||||
}
|
||||
q.Set("time", timestamp.Format(time.RFC3339))
|
||||
if !*disableStepParam && s.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
// always convert to seconds to keep compatibility with older
|
||||
|
||||
@@ -506,7 +506,8 @@ func TestRequestParams(t *testing.T) {
|
||||
},
|
||||
func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 15 * time.Second
|
||||
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
tt := timestamp.Truncate(evalInterval)
|
||||
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
},
|
||||
},
|
||||
@@ -520,6 +521,7 @@ func TestRequestParams(t *testing.T) {
|
||||
func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 15 * time.Second
|
||||
tt := timestamp.Add(-time.Minute)
|
||||
tt = tt.Truncate(evalInterval)
|
||||
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
},
|
||||
@@ -547,7 +549,8 @@ func TestRequestParams(t *testing.T) {
|
||||
},
|
||||
func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 3 * time.Hour
|
||||
exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
tt := timestamp.Truncate(evalInterval)
|
||||
exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {tt.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
},
|
||||
},
|
||||
@@ -637,9 +640,9 @@ func TestRequestParams(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req, err := tc.vm.newRequest()
|
||||
req, err := tc.vm.newRequestPOST()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
switch tc.vm.dataSourceType {
|
||||
case "", datasourcePrometheus:
|
||||
@@ -735,9 +738,9 @@ func TestHeaders(t *testing.T) {
|
||||
for _, tt := range testCases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
vm := tt.vmFn()
|
||||
req, err := vm.newQueryRequest("foo", time.Now())
|
||||
req, err := vm.newRequestPOST()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tt.checkFn(t, req)
|
||||
})
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
package rule
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net/url"
|
||||
@@ -13,7 +11,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
@@ -23,34 +21,16 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||
"which by default is 4 times evaluationInterval of the parent group")
|
||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the `time` parameter for rule evaluation requests to compensate intentional data delay from the datasource."+
|
||||
"Normally, should be equal to `-search.latencyOffset` (cmd-line flag configured for VictoriaMetrics single-node or vmselect).")
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
)
|
||||
|
||||
// Group is an entity for grouping rules
|
||||
type Group struct {
|
||||
mu sync.RWMutex
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type config.Type
|
||||
Interval time.Duration
|
||||
EvalOffset *time.Duration
|
||||
// EvalDelay will adjust timestamp for rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *time.Duration
|
||||
mu sync.RWMutex
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type config.Type
|
||||
Interval time.Duration
|
||||
Limit int
|
||||
Concurrency int
|
||||
Checksum string
|
||||
@@ -71,9 +51,6 @@ type Group struct {
|
||||
evalCancel context.CancelFunc
|
||||
|
||||
metrics *groupMetrics
|
||||
// evalAlignment will make the timestamp of group query
|
||||
// requests be aligned with interval
|
||||
evalAlignment *bool
|
||||
}
|
||||
|
||||
type groupMetrics struct {
|
||||
@@ -115,8 +92,7 @@ func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[s
|
||||
return r
|
||||
}
|
||||
|
||||
// NewGroup returns a new group
|
||||
func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
|
||||
func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
|
||||
g := &Group{
|
||||
Type: cfg.Type,
|
||||
Name: cfg.Name,
|
||||
@@ -129,7 +105,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
Headers: make(map[string]string),
|
||||
NotifierHeaders: make(map[string]string),
|
||||
Labels: cfg.Labels,
|
||||
evalAlignment: cfg.EvalAlignment,
|
||||
|
||||
doneCh: make(chan struct{}),
|
||||
finishedCh: make(chan struct{}),
|
||||
@@ -141,12 +116,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
if g.Concurrency < 1 {
|
||||
g.Concurrency = 1
|
||||
}
|
||||
if cfg.EvalOffset != nil {
|
||||
g.EvalOffset = &cfg.EvalOffset.D
|
||||
}
|
||||
if cfg.EvalDelay != nil {
|
||||
g.EvalDelay = &cfg.EvalDelay.D
|
||||
}
|
||||
for _, h := range cfg.Headers {
|
||||
g.Headers[h.Key] = h.Value
|
||||
}
|
||||
@@ -176,11 +145,11 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
return g
|
||||
}
|
||||
|
||||
func (g *Group) newRule(qb datasource.QuerierBuilder, r config.Rule) Rule {
|
||||
if r.Alert != "" {
|
||||
return NewAlertingRule(qb, g, r)
|
||||
func (g *Group) newRule(qb datasource.QuerierBuilder, rule config.Rule) Rule {
|
||||
if rule.Alert != "" {
|
||||
return newAlertingRule(qb, g, rule)
|
||||
}
|
||||
return NewRecordingRule(qb, g, r)
|
||||
return newRecordingRule(qb, g, rule)
|
||||
}
|
||||
|
||||
// ID return unique group ID that consists of
|
||||
@@ -194,15 +163,11 @@ func (g *Group) ID() uint64 {
|
||||
hash.Write([]byte("\xff"))
|
||||
hash.Write([]byte(g.Name))
|
||||
hash.Write([]byte(g.Type.Get()))
|
||||
hash.Write([]byte(g.Interval.String()))
|
||||
if g.EvalOffset != nil {
|
||||
hash.Write([]byte(g.EvalOffset.String()))
|
||||
}
|
||||
return hash.Sum64()
|
||||
}
|
||||
|
||||
// restore restores alerts state for group rules
|
||||
func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
||||
// Restore restores alerts state for group rules
|
||||
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
||||
for _, rule := range g.Rules {
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
@@ -218,7 +183,7 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
|
||||
Headers: g.Headers,
|
||||
Debug: ar.Debug,
|
||||
})
|
||||
if err := ar.restore(ctx, q, ts, lookback); err != nil {
|
||||
if err := ar.Restore(ctx, q, ts, lookback); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
||||
}
|
||||
}
|
||||
@@ -228,7 +193,7 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
|
||||
// updateWith updates existing group with
|
||||
// passed group object. This function ignores group
|
||||
// evaluation interval change. It supposed to be updated
|
||||
// in group.Start function.
|
||||
// in group.start function.
|
||||
// Not thread-safe.
|
||||
func (g *Group) updateWith(newGroup *Group) error {
|
||||
rulesRegistry := make(map[uint64]Rule)
|
||||
@@ -241,11 +206,11 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
if !ok {
|
||||
// old rule is not present in the new list
|
||||
// so we mark it for removing
|
||||
g.Rules[i].close()
|
||||
g.Rules[i].Close()
|
||||
g.Rules[i] = nil
|
||||
continue
|
||||
}
|
||||
if err := or.updateWith(nr); err != nil {
|
||||
if err := or.UpdateWith(nr); err != nil {
|
||||
return err
|
||||
}
|
||||
delete(rulesRegistry, nr.ID())
|
||||
@@ -278,10 +243,10 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// InterruptEval interrupts in-flight rules evaluations
|
||||
// interruptEval interrupts in-flight rules evaluations
|
||||
// within the group. It is expected that g.evalCancel
|
||||
// will be repopulated after the call.
|
||||
func (g *Group) InterruptEval() {
|
||||
func (g *Group) interruptEval() {
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
|
||||
@@ -290,13 +255,12 @@ func (g *Group) InterruptEval() {
|
||||
}
|
||||
}
|
||||
|
||||
// Close stops the group and it's rules, unregisters group metrics
|
||||
func (g *Group) Close() {
|
||||
func (g *Group) close() {
|
||||
if g.doneCh == nil {
|
||||
return
|
||||
}
|
||||
close(g.doneCh)
|
||||
g.InterruptEval()
|
||||
g.interruptEval()
|
||||
<-g.finishedCh
|
||||
|
||||
g.metrics.iterationDuration.Unregister()
|
||||
@@ -304,25 +268,24 @@ func (g *Group) Close() {
|
||||
g.metrics.iterationMissed.Unregister()
|
||||
g.metrics.iterationInterval.Unregister()
|
||||
for _, rule := range g.Rules {
|
||||
rule.close()
|
||||
rule.Close()
|
||||
}
|
||||
}
|
||||
|
||||
// SkipRandSleepOnGroupStart will skip random sleep delay in group first evaluation
|
||||
var SkipRandSleepOnGroupStart bool
|
||||
var skipRandSleepOnGroupStart bool
|
||||
|
||||
// Start starts group's evaluation
|
||||
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
|
||||
evalTS := time.Now()
|
||||
// sleep random duration to spread group rules evaluation
|
||||
// over time in order to reduce load on datasource.
|
||||
if !SkipRandSleepOnGroupStart {
|
||||
sleepBeforeStart := delayBeforeStart(evalTS, g.ID(), g.Interval, g.EvalOffset)
|
||||
g.infof("will start in %v", sleepBeforeStart)
|
||||
|
||||
sleepTimer := time.NewTimer(sleepBeforeStart)
|
||||
// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
|
||||
if !skipRandSleepOnGroupStart {
|
||||
randSleep := uint64(float64(g.Interval) * (float64(g.ID()) / (1 << 64)))
|
||||
sleepOffset := uint64(time.Now().UnixNano()) % uint64(g.Interval)
|
||||
if randSleep < sleepOffset {
|
||||
randSleep += uint64(g.Interval)
|
||||
}
|
||||
randSleep -= sleepOffset
|
||||
sleepTimer := time.NewTimer(time.Duration(randSleep))
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
sleepTimer.Stop()
|
||||
@@ -332,17 +295,18 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
return
|
||||
case <-sleepTimer.C:
|
||||
}
|
||||
evalTS = evalTS.Add(sleepBeforeStart)
|
||||
}
|
||||
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
rw: rw,
|
||||
notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
g.infof("started")
|
||||
evalTS := time.Now()
|
||||
|
||||
logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
|
||||
eval := func(ctx context.Context, ts time.Time) {
|
||||
g.metrics.iterationTotal.Inc()
|
||||
@@ -356,7 +320,6 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
}
|
||||
|
||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||
ts = g.adjustReqTimestamp(ts)
|
||||
errs := e.execConcurrently(ctx, g.Rules, ts, g.Concurrency, resolveDuration, g.Limit)
|
||||
for err := range errs {
|
||||
if err != nil {
|
||||
@@ -381,7 +344,7 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||
err := g.Restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||
}
|
||||
@@ -412,12 +375,19 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
continue
|
||||
}
|
||||
|
||||
// ensure that staleness is tracked for existing rules only
|
||||
// ensure that staleness is tracked or existing rules only
|
||||
e.purgeStaleSeries(g.Rules)
|
||||
e.notifierHeaders = g.NotifierHeaders
|
||||
g.mu.Unlock()
|
||||
|
||||
g.infof("re-started")
|
||||
e.notifierHeaders = g.NotifierHeaders
|
||||
|
||||
if g.Interval != ng.Interval {
|
||||
g.Interval = ng.Interval
|
||||
t.Stop()
|
||||
t = time.NewTicker(g.Interval)
|
||||
evalTS = time.Now()
|
||||
}
|
||||
g.mu.Unlock()
|
||||
logger.Infof("group %q re-started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
case <-t.C:
|
||||
missed := (time.Since(evalTS) / g.Interval) - 1
|
||||
if missed < 0 {
|
||||
@@ -435,134 +405,6 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateWith inserts new group to updateCh
|
||||
func (g *Group) UpdateWith(new *Group) {
|
||||
g.updateCh <- new
|
||||
}
|
||||
|
||||
// DeepCopy returns a deep copy of group
|
||||
func (g *Group) DeepCopy() *Group {
|
||||
g.mu.RLock()
|
||||
data, _ := json.Marshal(g)
|
||||
g.mu.RUnlock()
|
||||
newG := Group{}
|
||||
_ = json.Unmarshal(data, &newG)
|
||||
newG.Rules = g.Rules
|
||||
return &newG
|
||||
}
|
||||
|
||||
// delayBeforeStart returns a duration on the interval between [ts..ts+interval].
|
||||
// delayBeforeStart accounts for `offset`, so returned duration should be always
|
||||
// bigger than the `offset`.
|
||||
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
|
||||
var randSleep time.Duration
|
||||
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
|
||||
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
||||
if randSleep < sleepOffset {
|
||||
randSleep += interval
|
||||
}
|
||||
randSleep -= sleepOffset
|
||||
// check if `ts` after randSleep is before `offset`,
|
||||
// if it is, add extra eval_offset to randSleep.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3409.
|
||||
if offset != nil {
|
||||
tmpEvalTS := ts.Add(randSleep)
|
||||
if tmpEvalTS.Before(tmpEvalTS.Truncate(interval).Add(*offset)) {
|
||||
randSleep += *offset
|
||||
}
|
||||
}
|
||||
return randSleep
|
||||
}
|
||||
|
||||
func (g *Group) infof(format string, args ...interface{}) {
|
||||
msg := fmt.Sprintf(format, args...)
|
||||
logger.Infof("group %q %s; interval=%v; eval_offset=%v; concurrency=%d",
|
||||
g.Name, msg, g.Interval, g.EvalOffset, g.Concurrency)
|
||||
}
|
||||
|
||||
// Replay performs group replay
|
||||
func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoint, replayRuleRetryAttempts int, replayDelay time.Duration, disableProgressBar bool) int {
|
||||
var total int
|
||||
step := g.Interval * time.Duration(maxDataPoint)
|
||||
ri := rangeIterator{start: start, end: end, step: step}
|
||||
iterations := int(end.Sub(start)/step) + 1
|
||||
fmt.Printf("\nGroup %q"+
|
||||
"\ninterval: \t%v"+
|
||||
"\nrequests to make: \t%d"+
|
||||
"\nmax range per request: \t%v\n",
|
||||
g.Name, g.Interval, iterations, step)
|
||||
if g.Limit > 0 {
|
||||
fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
|
||||
g.Limit)
|
||||
}
|
||||
for _, rule := range g.Rules {
|
||||
fmt.Printf("> Rule %q (ID: %d)\n", rule, rule.ID())
|
||||
var bar *pb.ProgressBar
|
||||
if !disableProgressBar {
|
||||
bar = pb.StartNew(iterations)
|
||||
}
|
||||
ri.reset()
|
||||
for ri.next() {
|
||||
n, err := replayRule(rule, ri.s, ri.e, rw, replayRuleRetryAttempts)
|
||||
if err != nil {
|
||||
logger.Fatalf("rule %q: %s", rule, err)
|
||||
}
|
||||
total += n
|
||||
if bar != nil {
|
||||
bar.Increment()
|
||||
}
|
||||
}
|
||||
if bar != nil {
|
||||
bar.Finish()
|
||||
}
|
||||
// sleep to let remote storage to flush data on-disk
|
||||
// so chained rules could be calculated correctly
|
||||
time.Sleep(replayDelay)
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
||||
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
if len(g.Rules) < 1 {
|
||||
return nil
|
||||
}
|
||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||
return e.execConcurrently(ctx, g.Rules, evalTS, g.Concurrency, resolveDuration, g.Limit)
|
||||
}
|
||||
|
||||
type rangeIterator struct {
|
||||
step time.Duration
|
||||
start, end time.Time
|
||||
|
||||
iter int
|
||||
s, e time.Time
|
||||
}
|
||||
|
||||
func (ri *rangeIterator) reset() {
|
||||
ri.iter = 0
|
||||
ri.s, ri.e = time.Time{}, time.Time{}
|
||||
}
|
||||
|
||||
func (ri *rangeIterator) next() bool {
|
||||
ri.s = ri.start.Add(ri.step * time.Duration(ri.iter))
|
||||
if !ri.end.After(ri.s) {
|
||||
return false
|
||||
}
|
||||
ri.e = ri.s.Add(ri.step)
|
||||
if ri.e.After(ri.end) {
|
||||
ri.e = ri.end
|
||||
}
|
||||
ri.iter++
|
||||
return true
|
||||
}
|
||||
|
||||
// getResolveDuration returns the duration after which firing alert
|
||||
// can be considered as resolved.
|
||||
func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Duration {
|
||||
@@ -576,49 +418,11 @@ func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Du
|
||||
return resolveDuration
|
||||
}
|
||||
|
||||
func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||
if g.EvalOffset != nil {
|
||||
// calculate the min timestamp on the evaluationInterval
|
||||
intervalStart := timestamp.Truncate(g.Interval)
|
||||
ts := intervalStart.Add(*g.EvalOffset)
|
||||
if timestamp.Before(ts) {
|
||||
// if passed timestamp is before the expected evaluation offset,
|
||||
// then we should adjust it to the previous evaluation round.
|
||||
// E.g. request with evaluationInterval=1h and evaluationOffset=30m
|
||||
// was evaluated at 11:20. Then the timestamp should be adjusted
|
||||
// to 10:30, to the previous evaluationInterval.
|
||||
return ts.Add(-g.Interval)
|
||||
}
|
||||
// when `eval_offset` is using, ts shouldn't be effect by `eval_alignment` and `eval_delay`
|
||||
// since it should be always aligned.
|
||||
return ts
|
||||
}
|
||||
|
||||
timestamp = timestamp.Add(-g.getEvalDelay())
|
||||
|
||||
// always apply the alignment as a last step
|
||||
if g.evalAlignment == nil || *g.evalAlignment {
|
||||
// align query time with interval to get similar result with grafana when plotting time series.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
|
||||
return timestamp.Truncate(g.Interval)
|
||||
}
|
||||
return timestamp
|
||||
}
|
||||
|
||||
func (g *Group) getEvalDelay() time.Duration {
|
||||
if g.EvalDelay != nil {
|
||||
return *g.EvalDelay
|
||||
}
|
||||
return *evalDelay
|
||||
}
|
||||
|
||||
// executor contains group's notify and rw configs
|
||||
type executor struct {
|
||||
Notifiers func() []notifier.Notifier
|
||||
notifiers func() []notifier.Notifier
|
||||
notifierHeaders map[string]string
|
||||
|
||||
Rw remotewrite.RWClient
|
||||
rw *remotewrite.Client
|
||||
|
||||
previouslySentSeriesToRWMu sync.Mutex
|
||||
// previouslySentSeriesToRW stores series sent to RW on previous iteration
|
||||
@@ -628,7 +432,6 @@ type executor struct {
|
||||
previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
|
||||
}
|
||||
|
||||
// execConcurrently executes rules concurrently if concurrency>1
|
||||
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.Time, concurrency int, resolveDuration time.Duration, limit int) chan error {
|
||||
res := make(chan error, len(rules))
|
||||
if concurrency == 1 {
|
||||
@@ -643,14 +446,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
|
||||
sem := make(chan struct{}, concurrency)
|
||||
go func() {
|
||||
wg := sync.WaitGroup{}
|
||||
for _, r := range rules {
|
||||
for _, rule := range rules {
|
||||
sem <- struct{}{}
|
||||
wg.Add(1)
|
||||
go func(r Rule) {
|
||||
res <- e.exec(ctx, r, ts, resolveDuration, limit)
|
||||
<-sem
|
||||
wg.Done()
|
||||
}(r)
|
||||
}(rule)
|
||||
}
|
||||
wg.Wait()
|
||||
close(res)
|
||||
@@ -663,12 +466,15 @@ var (
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
remoteWriteTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||
)
|
||||
|
||||
func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
|
||||
func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
|
||||
execTotal.Inc()
|
||||
|
||||
tss, err := r.exec(ctx, ts, limit)
|
||||
tss, err := rule.Exec(ctx, ts, limit)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
// the context can be cancelled on graceful shutdown
|
||||
@@ -676,15 +482,17 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return nil
|
||||
}
|
||||
execErrors.Inc()
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
|
||||
}
|
||||
|
||||
if e.Rw != nil {
|
||||
if e.rw != nil {
|
||||
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
|
||||
var lastErr error
|
||||
for _, ts := range tss {
|
||||
if err := e.Rw.Push(ts); err != nil {
|
||||
lastErr = fmt.Errorf("rule %q: remote write failure: %w", r, err)
|
||||
remoteWriteTotal.Inc()
|
||||
if err := e.rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
lastErr = fmt.Errorf("rule %q: remote write failure: %w", rule, err)
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
@@ -693,13 +501,13 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return err
|
||||
}
|
||||
|
||||
staleSeries := e.getStaleSeries(r, tss, ts)
|
||||
staleSeries := e.getStaleSeries(rule, tss, ts)
|
||||
if err := pushToRW(staleSeries); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
ar, ok := r.(*AlertingRule)
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
@@ -711,11 +519,11 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
errGr := new(utils.ErrGroup)
|
||||
for _, nt := range e.Notifiers() {
|
||||
for _, nt := range e.notifiers() {
|
||||
wg.Add(1)
|
||||
go func(nt notifier.Notifier) {
|
||||
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
|
||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", rule, nt.Addr(), err))
|
||||
}
|
||||
wg.Done()
|
||||
}(nt)
|
||||
@@ -725,7 +533,7 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
}
|
||||
|
||||
// getStaledSeries checks whether there are stale series from previously sent ones.
|
||||
func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
|
||||
func (e *executor) getStaleSeries(rule Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
|
||||
ruleLabels := make(map[string][]prompbmarshal.Label, len(tss))
|
||||
for _, ts := range tss {
|
||||
// convert labels to strings so we can compare with previously sent series
|
||||
@@ -733,7 +541,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
|
||||
ruleLabels[key] = ts.Labels
|
||||
}
|
||||
|
||||
rID := r.ID()
|
||||
rID := rule.ID()
|
||||
var staleS []prompbmarshal.TimeSeries
|
||||
// check whether there are series which disappeared and need to be marked as stale
|
||||
e.previouslySentSeriesToRWMu.Lock()
|
||||
@@ -1,22 +1,16 @@
|
||||
package rule
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
@@ -25,15 +19,7 @@ import (
|
||||
func init() {
|
||||
// Disable rand sleep on group start during tests in order to speed up test execution.
|
||||
// Rand sleep is needed only in prod code.
|
||||
SkipRandSleepOnGroupStart = true
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if err := templates.Load([]string{}, true); err != nil {
|
||||
fmt.Println("failed to load template for test")
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(m.Run())
|
||||
skipRandSleepOnGroupStart = true
|
||||
}
|
||||
|
||||
func TestUpdateWith(t *testing.T) {
|
||||
@@ -49,19 +35,18 @@ func TestUpdateWith(t *testing.T) {
|
||||
},
|
||||
{
|
||||
"update alerting rule",
|
||||
[]config.Rule{
|
||||
{
|
||||
Alert: "foo",
|
||||
Expr: "up > 0",
|
||||
For: promutils.NewDuration(time.Second),
|
||||
Labels: map[string]string{
|
||||
"bar": "baz",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "{{ $value|humanize }}",
|
||||
"description": "{{$labels}}",
|
||||
},
|
||||
[]config.Rule{{
|
||||
Alert: "foo",
|
||||
Expr: "up > 0",
|
||||
For: promutils.NewDuration(time.Second),
|
||||
Labels: map[string]string{
|
||||
"bar": "baz",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "{{ $value|humanize }}",
|
||||
"description": "{{$labels}}",
|
||||
},
|
||||
},
|
||||
{
|
||||
Alert: "bar",
|
||||
Expr: "up > 0",
|
||||
@@ -69,8 +54,7 @@ func TestUpdateWith(t *testing.T) {
|
||||
Labels: map[string]string{
|
||||
"bar": "baz",
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
[]config.Rule{
|
||||
{
|
||||
Alert: "foo",
|
||||
@@ -91,8 +75,7 @@ func TestUpdateWith(t *testing.T) {
|
||||
Labels: map[string]string{
|
||||
"bar": "baz",
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
"update recording rule",
|
||||
@@ -151,7 +134,7 @@ func TestUpdateWith(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
g := &Group{Name: "test"}
|
||||
qb := &datasource.FakeQuerier{}
|
||||
qb := &fakeQuerier{}
|
||||
for _, r := range tc.currentRules {
|
||||
r.ID = config.HashRule(r)
|
||||
g.Rules = append(g.Rules, g.newRule(qb, r))
|
||||
@@ -183,7 +166,7 @@ func TestUpdateWith(t *testing.T) {
|
||||
if got.ID() != want.ID() {
|
||||
t.Fatalf("expected to have rule %q; got %q", want, got)
|
||||
}
|
||||
if err := CompareRules(t, got, want); err != nil {
|
||||
if err := compareRules(t, got, want); err != nil {
|
||||
t.Fatalf("comparison error: %s", err)
|
||||
}
|
||||
}
|
||||
@@ -192,31 +175,17 @@ func TestUpdateWith(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGroupStart(t *testing.T) {
|
||||
const (
|
||||
rules = `
|
||||
- name: groupTest
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 1ms
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
`
|
||||
)
|
||||
var groups []config.Group
|
||||
err := yaml.Unmarshal([]byte(rules), &groups)
|
||||
// TODO: make parsing from string instead of file
|
||||
groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse rules: %s", err)
|
||||
}
|
||||
|
||||
fs := &datasource.FakeQuerier{}
|
||||
fn := ¬ifier.FakeNotifier{}
|
||||
fs := &fakeQuerier{}
|
||||
fn := &fakeNotifier{}
|
||||
|
||||
const evalInterval = time.Millisecond
|
||||
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
||||
g := newGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
||||
g.Concurrency = 2
|
||||
|
||||
const inst1, inst2, job = "foo", "bar", "baz"
|
||||
@@ -231,7 +200,7 @@ func TestGroupStart(t *testing.T) {
|
||||
alert1.State = notifier.StateFiring
|
||||
// add external label
|
||||
alert1.Labels["cluster"] = "east-1"
|
||||
// add rule labels
|
||||
// add rule labels - see config/testdata/rules1-good.rules
|
||||
alert1.Labels["label"] = "bar"
|
||||
alert1.Labels["host"] = inst1
|
||||
// add service labels
|
||||
@@ -246,7 +215,7 @@ func TestGroupStart(t *testing.T) {
|
||||
alert2.State = notifier.StateFiring
|
||||
// add external label
|
||||
alert2.Labels["cluster"] = "east-1"
|
||||
// add rule labels
|
||||
// add rule labels - see config/testdata/rules1-good.rules
|
||||
alert2.Labels["label"] = "bar"
|
||||
alert2.Labels["host"] = inst2
|
||||
// add service labels
|
||||
@@ -255,40 +224,40 @@ func TestGroupStart(t *testing.T) {
|
||||
alert2.ID = hash(alert2.Labels)
|
||||
|
||||
finished := make(chan struct{})
|
||||
fs.Add(m1)
|
||||
fs.Add(m2)
|
||||
fs.add(m1)
|
||||
fs.add(m2)
|
||||
go func() {
|
||||
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||
g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
// wait for multiple evals
|
||||
time.Sleep(20 * evalInterval)
|
||||
|
||||
gotAlerts := fn.GetAlerts()
|
||||
gotAlerts := fn.getAlerts()
|
||||
expectedAlerts := []notifier.Alert{*alert1, *alert2}
|
||||
compareAlerts(t, expectedAlerts, gotAlerts)
|
||||
|
||||
gotAlertsNum := fn.GetCounter()
|
||||
gotAlertsNum := fn.getCounter()
|
||||
if gotAlertsNum < len(expectedAlerts)*2 {
|
||||
t.Fatalf("expected to receive at least %d alerts; got %d instead",
|
||||
len(expectedAlerts)*2, gotAlertsNum)
|
||||
}
|
||||
|
||||
// reset previous data
|
||||
fs.Reset()
|
||||
fs.reset()
|
||||
// and set only one datapoint for response
|
||||
fs.Add(m1)
|
||||
fs.add(m1)
|
||||
|
||||
// wait for multiple evals
|
||||
time.Sleep(20 * evalInterval)
|
||||
|
||||
gotAlerts = fn.GetAlerts()
|
||||
gotAlerts = fn.getAlerts()
|
||||
alert2.State = notifier.StateInactive
|
||||
expectedAlerts = []notifier.Alert{*alert1, *alert2}
|
||||
compareAlerts(t, expectedAlerts, gotAlerts)
|
||||
|
||||
g.Close()
|
||||
g.close()
|
||||
<-finished
|
||||
}
|
||||
|
||||
@@ -323,13 +292,13 @@ func TestGetStaleSeries(t *testing.T) {
|
||||
e := &executor{
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
|
||||
f := func(rule Rule, labels, expLabels [][]prompbmarshal.Label) {
|
||||
t.Helper()
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
for _, l := range labels {
|
||||
tss = append(tss, newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, l))
|
||||
}
|
||||
staleS := e.getStaleSeries(r, tss, ts)
|
||||
staleS := e.getStaleSeries(rule, tss, ts)
|
||||
if staleS == nil && expLabels == nil {
|
||||
return
|
||||
}
|
||||
@@ -465,17 +434,17 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFaultyNotifier(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq := &fakeQuerier{}
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
|
||||
r := newTestAlertingRule("instant", 0)
|
||||
r.q = fq
|
||||
|
||||
fn := ¬ifier.FakeNotifier{}
|
||||
fn := &fakeNotifier{}
|
||||
e := &executor{
|
||||
Notifiers: func() []notifier.Notifier {
|
||||
notifiers: func() []notifier.Notifier {
|
||||
return []notifier.Notifier{
|
||||
¬ifier.FaultyNotifier{},
|
||||
&faultyNotifier{},
|
||||
fn,
|
||||
}
|
||||
},
|
||||
@@ -491,7 +460,7 @@ func TestFaultyNotifier(t *testing.T) {
|
||||
tn := time.Now()
|
||||
deadline := tn.Add(delay / 2)
|
||||
for {
|
||||
if fn.GetCounter() > 0 {
|
||||
if fn.getCounter() > 0 {
|
||||
return
|
||||
}
|
||||
if tn.After(deadline) {
|
||||
@@ -504,17 +473,17 @@ func TestFaultyNotifier(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFaultyRW(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq := &fakeQuerier{}
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
|
||||
r := &RecordingRule{
|
||||
Name: "test",
|
||||
state: newRuleState(10),
|
||||
q: fq,
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
}
|
||||
|
||||
e := &executor{
|
||||
Rw: &remotewrite.Client{},
|
||||
rw: &remotewrite.Client{},
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
@@ -525,38 +494,23 @@ func TestFaultyRW(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCloseWithEvalInterruption(t *testing.T) {
|
||||
const (
|
||||
rules = `
|
||||
- name: groupTest
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 1ms
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
`
|
||||
)
|
||||
var groups []config.Group
|
||||
err := yaml.Unmarshal([]byte(rules), &groups)
|
||||
groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse rules: %s", err)
|
||||
}
|
||||
|
||||
const delay = time.Second * 2
|
||||
fq := &datasource.FakeQuerierWithDelay{Delay: delay}
|
||||
fq := &fakeQuerierWithDelay{delay: delay}
|
||||
|
||||
const evalInterval = time.Millisecond
|
||||
g := NewGroup(groups[0], fq, evalInterval, nil)
|
||||
g := newGroup(groups[0], fq, evalInterval, nil)
|
||||
|
||||
go g.Start(context.Background(), nil, nil, nil)
|
||||
go g.start(context.Background(), nil, nil, nil)
|
||||
|
||||
time.Sleep(evalInterval * 20)
|
||||
|
||||
go func() {
|
||||
g.Close()
|
||||
g.close()
|
||||
}()
|
||||
|
||||
deadline := time.Tick(delay / 2)
|
||||
@@ -566,225 +520,3 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
||||
case <-g.finishedCh:
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroupStartDelay(t *testing.T) {
|
||||
g := &Group{}
|
||||
// interval of 5min and key generate a static delay of 30s
|
||||
g.Interval = time.Minute * 5
|
||||
key := uint64(math.MaxUint64 / 10)
|
||||
|
||||
f := func(atS, expS string) {
|
||||
t.Helper()
|
||||
at, err := time.Parse(time.RFC3339Nano, atS)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expTS, err := time.Parse(time.RFC3339Nano, expS)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
|
||||
gotStart := at.Add(delay)
|
||||
if expTS != gotStart {
|
||||
t.Errorf("expected to get %v; got %v instead", expTS, gotStart)
|
||||
}
|
||||
}
|
||||
|
||||
// test group without offset
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:00.999+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
|
||||
|
||||
// test group with offset smaller than above fixed randSleep,
|
||||
// this way randSleep will always be enough
|
||||
offset := 20 * time.Second
|
||||
g.EvalOffset = &offset
|
||||
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
|
||||
|
||||
// test group with offset bigger than above fixed randSleep,
|
||||
// this way offset will be added to delay
|
||||
offset = 3 * time.Minute
|
||||
g.EvalOffset = &offset
|
||||
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:03:30.000+00:00")
|
||||
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:03:30.000+00:00")
|
||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:08:30.000+00:00")
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:30.000+00:00")
|
||||
f("2023-01-01T00:07:30.000+00:00", "2023-01-01T00:13:30.000+00:00")
|
||||
|
||||
offset = 10 * time.Minute
|
||||
g.EvalOffset = &offset
|
||||
// interval of 1h and key generate a static delay of 6m
|
||||
g.Interval = time.Hour
|
||||
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
|
||||
f("2023-01-01T00:05:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
|
||||
f("2023-01-01T00:30:00.000+00:00", "2023-01-01T01:16:00.000+00:00")
|
||||
}
|
||||
|
||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
offset := 30 * time.Minute
|
||||
evalDelay := 1 * time.Minute
|
||||
disableAlign := false
|
||||
testCases := []struct {
|
||||
name string
|
||||
g *Group
|
||||
originTS, expTS string
|
||||
}{
|
||||
{
|
||||
"with query align + default evalDelay",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"without query align + default evalDelay",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
evalAlignment: &disableAlign,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:10:30+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset, find previous offset point + default evalDelay",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T10:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset + default evalDelay",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"1h interval with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"1m interval with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Minute,
|
||||
},
|
||||
"2023-08-28T11:41:13+00:00",
|
||||
"2023-08-28T11:40:00+00:00",
|
||||
},
|
||||
{
|
||||
"disable alignment with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Hour,
|
||||
evalAlignment: &disableAlign,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:40:00+00:00",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
originT, _ := time.Parse(time.RFC3339, tc.originTS)
|
||||
expT, _ := time.Parse(time.RFC3339, tc.expTS)
|
||||
gotTS := tc.g.adjustReqTimestamp(originT)
|
||||
if !gotTS.Equal(expT) {
|
||||
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRangeIterator(t *testing.T) {
|
||||
testCases := []struct {
|
||||
ri rangeIterator
|
||||
result [][2]time.Time
|
||||
}{
|
||||
{
|
||||
ri: rangeIterator{
|
||||
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
|
||||
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
|
||||
step: 5 * time.Minute,
|
||||
},
|
||||
result: [][2]time.Time{
|
||||
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:05:00.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:05:00.000Z"), parseTime(t, "2021-01-01T12:10:00.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:10:00.000Z"), parseTime(t, "2021-01-01T12:15:00.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:15:00.000Z"), parseTime(t, "2021-01-01T12:20:00.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:20:00.000Z"), parseTime(t, "2021-01-01T12:25:00.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:25:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
||||
},
|
||||
},
|
||||
{
|
||||
ri: rangeIterator{
|
||||
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
|
||||
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
|
||||
step: 45 * time.Minute,
|
||||
},
|
||||
result: [][2]time.Time{
|
||||
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:30:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
||||
},
|
||||
},
|
||||
{
|
||||
ri: rangeIterator{
|
||||
start: parseTime(t, "2021-01-01T12:00:12.000Z"),
|
||||
end: parseTime(t, "2021-01-01T12:00:17.000Z"),
|
||||
step: time.Second,
|
||||
},
|
||||
result: [][2]time.Time{
|
||||
{parseTime(t, "2021-01-01T12:00:12.000Z"), parseTime(t, "2021-01-01T12:00:13.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:00:13.000Z"), parseTime(t, "2021-01-01T12:00:14.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:00:14.000Z"), parseTime(t, "2021-01-01T12:00:15.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:00:15.000Z"), parseTime(t, "2021-01-01T12:00:16.000Z")},
|
||||
{parseTime(t, "2021-01-01T12:00:16.000Z"), parseTime(t, "2021-01-01T12:00:17.000Z")},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
|
||||
var j int
|
||||
for tc.ri.next() {
|
||||
if len(tc.result) < j+1 {
|
||||
t.Fatalf("unexpected result for iterator on step %d: %v - %v",
|
||||
j, tc.ri.s, tc.ri.e)
|
||||
}
|
||||
s, e := tc.ri.s, tc.ri.e
|
||||
expS, expE := tc.result[j][0], tc.result[j][1]
|
||||
if s != expS {
|
||||
t.Fatalf("expected to get start=%v; got %v", expS, s)
|
||||
}
|
||||
if e != expE {
|
||||
t.Fatalf("expected to get end=%v; got %v", expE, e)
|
||||
}
|
||||
j++
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func parseTime(t *testing.T, s string) time.Time {
|
||||
t.Helper()
|
||||
tt, err := time.Parse("2006-01-02T15:04:05.000Z", s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return tt
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user