mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-18 17:26:31 +03:00
Compare commits
442 Commits
streaming-
...
streaming-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
26144f6d87 | ||
|
|
f89d16fc4c | ||
|
|
ff33e60a3d | ||
|
|
dab160cd74 | ||
|
|
a3b3ea4d73 | ||
|
|
9a353ee695 | ||
|
|
0c06934a59 | ||
|
|
5b419cfb2b | ||
|
|
71681fd1ca | ||
|
|
cf03e11d89 | ||
|
|
5bdf62de5b | ||
|
|
3c3450fc53 | ||
|
|
befcd93305 | ||
|
|
cc6819869a | ||
|
|
8040bdc1d6 | ||
|
|
3a1ef3184d | ||
|
|
f3c5687a04 | ||
|
|
1683df11f0 | ||
|
|
f2f0468ae7 | ||
|
|
ecce2d6db1 | ||
|
|
0f39c0e897 | ||
|
|
41932db848 | ||
|
|
c830064c2f | ||
|
|
b0287867fe | ||
|
|
a9fd130980 | ||
|
|
30d77393a5 | ||
|
|
4e4d7f4cbe | ||
|
|
19c04549a5 | ||
|
|
db6495560c | ||
|
|
4073bb3303 | ||
|
|
d365157381 | ||
|
|
846d5a3ab8 | ||
|
|
481471b872 | ||
|
|
a74f6d63e0 | ||
|
|
6eae3f6c8a | ||
|
|
9d886a2eb0 | ||
|
|
b49b8fed3c | ||
|
|
3ac44baebe | ||
|
|
d0e4190969 | ||
|
|
388d020b7c | ||
|
|
ce4f26db02 | ||
|
|
190a6565ae | ||
|
|
7fc2bd0412 | ||
|
|
bfa73ebdf3 | ||
|
|
51cdf3676b | ||
|
|
74219a1727 | ||
|
|
041a1966c5 | ||
|
|
191e322879 | ||
|
|
544da241e8 | ||
|
|
0c78b891b0 | ||
|
|
f51b7fda8e | ||
|
|
4b42c8abbb | ||
|
|
e14e3d9c8c | ||
|
|
5106045048 | ||
|
|
be509b3995 | ||
|
|
9fd20202e1 | ||
|
|
03a97dc678 | ||
|
|
4b8088e377 | ||
|
|
70cd09e736 | ||
|
|
d2c94a0663 | ||
|
|
a47127c1a6 | ||
|
|
c005245741 | ||
|
|
f2229c2e42 | ||
|
|
f405384c8c | ||
|
|
dd25049858 | ||
|
|
0597718435 | ||
|
|
828aca82e9 | ||
|
|
e5a767cff8 | ||
|
|
eae585e8de | ||
|
|
d374595e31 | ||
|
|
91ccea236f | ||
|
|
fe2d9f6646 | ||
|
|
b79d4cc988 | ||
|
|
e34f77aed4 | ||
|
|
bbea02f82b | ||
|
|
fdefc8a816 | ||
|
|
095d982976 | ||
|
|
105c6b2eb7 | ||
|
|
33df9bee22 | ||
|
|
463455665b | ||
|
|
eb08f5c7e5 | ||
|
|
1aa39efec1 | ||
|
|
07e5d6f0fb | ||
|
|
f75874f5df | ||
|
|
aecfabe318 | ||
|
|
47307c7a37 | ||
|
|
d0ca448093 | ||
|
|
35dd6e5e8e | ||
|
|
52692d001a | ||
|
|
95edeffbc6 | ||
|
|
910a39ad72 | ||
|
|
1f477aba41 | ||
|
|
be20501376 | ||
|
|
43d7de4afe | ||
|
|
34b69dcf58 | ||
|
|
8ba483eca3 | ||
|
|
7575f5c501 | ||
|
|
b4ba8d0d76 | ||
|
|
9678235eea | ||
|
|
fb90a56de2 | ||
|
|
8c1dcf4743 | ||
|
|
01f9edda64 | ||
|
|
160cc9debd | ||
|
|
67160d08a2 | ||
|
|
5ebd5a0d7b | ||
|
|
7a31f8a6c9 | ||
|
|
7cfde237ec | ||
|
|
5a88bc973f | ||
|
|
a35e52114b | ||
|
|
df012f1553 | ||
|
|
1fa619170b | ||
|
|
326a77c697 | ||
|
|
cecee0e05f | ||
|
|
bc3feebf69 | ||
|
|
11e2d41c77 | ||
|
|
afaf7f0b74 | ||
|
|
4b529562ce | ||
|
|
873f0deaa6 | ||
|
|
0379a0eb82 | ||
|
|
5ddccbc2b9 | ||
|
|
779bbc2e91 | ||
|
|
664fa5cb78 | ||
|
|
317834f876 | ||
|
|
9253c24dd6 | ||
|
|
cd277e3f84 | ||
|
|
0a6a2e455d | ||
|
|
5896fb129d | ||
|
|
0b0f565c31 | ||
|
|
72dbd24b22 | ||
|
|
df88baef07 | ||
|
|
66c76a4d4d | ||
|
|
2afb068f0f | ||
|
|
68be182075 | ||
|
|
6f15ca4a16 | ||
|
|
3cb5f88325 | ||
|
|
463a6e9ac6 | ||
|
|
304fe05650 | ||
|
|
7df8d19831 | ||
|
|
1a5cdb4790 | ||
|
|
0f91f83639 | ||
|
|
99384fce4f | ||
|
|
f7ef3d4aa1 | ||
|
|
92cadfff65 | ||
|
|
8f946a927c | ||
|
|
075bb8748f | ||
|
|
fb9a9d1463 | ||
|
|
242472086b | ||
|
|
6af732b6f7 | ||
|
|
e0fc5ef140 | ||
|
|
1e02efd511 | ||
|
|
715bf3af82 | ||
|
|
39c405ed4d | ||
|
|
27668c9d01 | ||
|
|
e13dc04fbf | ||
|
|
8fb68152e6 | ||
|
|
75196d7234 | ||
|
|
51df2248f0 | ||
|
|
635da5fab7 | ||
|
|
ce8ae450fc | ||
|
|
6d03779870 | ||
|
|
a5bc9d93cc | ||
|
|
3d3b0e31e0 | ||
|
|
b1fed78e0b | ||
|
|
c7504daa7a | ||
|
|
042267541f | ||
|
|
b05e1512d4 | ||
|
|
1065deccf8 | ||
|
|
8efe694160 | ||
|
|
74b09ab4de | ||
|
|
6acf28715b | ||
|
|
adc69b872c | ||
|
|
02a0a7f428 | ||
|
|
e373bb84d5 | ||
|
|
802adf3b65 | ||
|
|
b39e9257eb | ||
|
|
cb90d09c9d | ||
|
|
3a8b9cc81e | ||
|
|
7cb8ed8271 | ||
|
|
efbe25a678 | ||
|
|
67468a0c46 | ||
|
|
935bec447b | ||
|
|
65bc460323 | ||
|
|
e4f5039509 | ||
|
|
97373b7786 | ||
|
|
17e2b4f814 | ||
|
|
06c73df55a | ||
|
|
bc550e22d7 | ||
|
|
fdbbbf33ca | ||
|
|
902f1e5fdc | ||
|
|
0160435802 | ||
|
|
a28cc6ebec | ||
|
|
17900e39d7 | ||
|
|
d1aa15688a | ||
|
|
7c7a32efd7 | ||
|
|
f5c4fcc250 | ||
|
|
3532f52f4b | ||
|
|
c7a2e4e90a | ||
|
|
41291b6290 | ||
|
|
f62e03b3d2 | ||
|
|
487f6380d0 | ||
|
|
e1359c904c | ||
|
|
82a6e4efe5 | ||
|
|
760a530305 | ||
|
|
1911320c86 | ||
|
|
8eddccfbb4 | ||
|
|
837f6f0975 | ||
|
|
ec5b72c879 | ||
|
|
ac65c6b178 | ||
|
|
41f7940f97 | ||
|
|
7ca783dee9 | ||
|
|
48228031e4 | ||
|
|
98d0f81f21 | ||
|
|
543f218fe9 | ||
|
|
2291958648 | ||
|
|
5424632ba3 | ||
|
|
d7897e0d70 | ||
|
|
8a0bb4bf17 | ||
|
|
d024fcf37f | ||
|
|
5e38dde18d | ||
|
|
f42ec79958 | ||
|
|
c5b5895162 | ||
|
|
3d57cb3234 | ||
|
|
255bede1f2 | ||
|
|
fc2e7a30b3 | ||
|
|
48f0aa8483 | ||
|
|
5034aa0773 | ||
|
|
090cb2c9de | ||
|
|
a7800cdb95 | ||
|
|
2cd9cda12c | ||
|
|
0cf55ded34 | ||
|
|
06d2d933fb | ||
|
|
752f89f13f | ||
|
|
93b8bf66aa | ||
|
|
1831c731a3 | ||
|
|
e1fb9d9230 | ||
|
|
348482c575 | ||
|
|
0b0776a440 | ||
|
|
334a739ff6 | ||
|
|
8725b5e049 | ||
|
|
0715b4e121 | ||
|
|
389f34cb57 | ||
|
|
3a15c9ffb3 | ||
|
|
2b420b5c0a | ||
|
|
fb835ad658 | ||
|
|
d493da562e | ||
|
|
5d1ce9891b | ||
|
|
74fda0b311 | ||
|
|
95f12c7e28 | ||
|
|
e96b4410a1 | ||
|
|
c160a49908 | ||
|
|
a007a5a8a4 | ||
|
|
f3d47c3dc3 | ||
|
|
ba803a7cd2 | ||
|
|
81ddee4f3a | ||
|
|
fbab838dc0 | ||
|
|
cbe4a5c251 | ||
|
|
1ef6b7f32b | ||
|
|
20025d4fd6 | ||
|
|
3ffa8975d4 | ||
|
|
d3f919df3e | ||
|
|
4b7e6b36ce | ||
|
|
ae3107153c | ||
|
|
34a26397d7 | ||
|
|
75059f3feb | ||
|
|
727709da67 | ||
|
|
faee0e43d1 | ||
|
|
c8e6e47e2a | ||
|
|
657f3bdd21 | ||
|
|
3545633934 | ||
|
|
e9d86d7e52 | ||
|
|
aefd744abb | ||
|
|
aae06e003e | ||
|
|
84b3c1d3bc | ||
|
|
2ea03cf80d | ||
|
|
1fbd0dd9d8 | ||
|
|
8924fea33a | ||
|
|
8dfc874be3 | ||
|
|
61035419d5 | ||
|
|
2cbdb1db22 | ||
|
|
d389a4fcf3 | ||
|
|
ec7cac3641 | ||
|
|
0a28c8e91b | ||
|
|
98e73a4022 | ||
|
|
17c45d1206 | ||
|
|
85bf63078c | ||
|
|
7d4873bcef | ||
|
|
a79a439dac | ||
|
|
4c25ee3597 | ||
|
|
ebd76588da | ||
|
|
9b248e3b2f | ||
|
|
0ea35215f6 | ||
|
|
91f5c24f82 | ||
|
|
8d9e365512 | ||
|
|
741013a33f | ||
|
|
d9a7dea9a1 | ||
|
|
e837968e49 | ||
|
|
5bfa2a3e97 | ||
|
|
68d2cb203d | ||
|
|
02624c41eb | ||
|
|
ef2113e5df | ||
|
|
7b2e2a23c2 | ||
|
|
efd5d1f2dd | ||
|
|
201fb6ec5c | ||
|
|
3076c1f400 | ||
|
|
6a533023b1 | ||
|
|
0bf48a5d2a | ||
|
|
feff13851c | ||
|
|
3121d76bee | ||
|
|
cb106bdf39 | ||
|
|
5d61a7327d | ||
|
|
d3ae2b2f62 | ||
|
|
d6ae082598 | ||
|
|
fbc6289a21 | ||
|
|
f9bd265249 | ||
|
|
7cf7740d18 | ||
|
|
0116a333fb | ||
|
|
43e3302803 | ||
|
|
0e056ddb2d | ||
|
|
12aefa8a4b | ||
|
|
37997abd14 | ||
|
|
cef7a39ba3 | ||
|
|
89dcbc2fe7 | ||
|
|
8eed04b2c6 | ||
|
|
0feaeca3c1 | ||
|
|
8af56ea2ed | ||
|
|
cf23dc6480 | ||
|
|
b7fb7c5f77 | ||
|
|
3e93fa61ad | ||
|
|
c8d901424f | ||
|
|
66527c5981 | ||
|
|
6340911d38 | ||
|
|
4722b70c89 | ||
|
|
ba058a4514 | ||
|
|
25ff811d78 | ||
|
|
eded218e8c | ||
|
|
61594d2bd8 | ||
|
|
ec72b750f2 | ||
|
|
bfec8a3751 | ||
|
|
0df8489e0a | ||
|
|
230230cf0b | ||
|
|
80213f07fa | ||
|
|
2db1a664e1 | ||
|
|
010dc15d16 | ||
|
|
22498c5087 | ||
|
|
dc668b8246 | ||
|
|
a9a26c20b5 | ||
|
|
d407d13e7b | ||
|
|
2474281f1b | ||
|
|
73a1862182 | ||
|
|
930d26b2ff | ||
|
|
2b15f2fc8c | ||
|
|
746cc93e95 | ||
|
|
bffd30b57a | ||
|
|
5afc6a5765 | ||
|
|
b51d16e74c | ||
|
|
e4f44bad91 | ||
|
|
ea4ca70be1 | ||
|
|
4d2b98b755 | ||
|
|
c07dc45786 | ||
|
|
f90d2ec843 | ||
|
|
054367c421 | ||
|
|
323f3720ed | ||
|
|
da24c129db | ||
|
|
f2b373dd06 | ||
|
|
815fda8995 | ||
|
|
65db6609eb | ||
|
|
b5254199c6 | ||
|
|
6eb205f8b0 | ||
|
|
90d45574bf | ||
|
|
eeb25bc4a5 | ||
|
|
dd33fc0c76 | ||
|
|
536c1301fd | ||
|
|
87a86ec9db | ||
|
|
ed70a40669 | ||
|
|
828ddd4e4f | ||
|
|
4497a08e3d | ||
|
|
f59dda3223 | ||
|
|
b8739bc00b | ||
|
|
b44b74d118 | ||
|
|
324ecbeed6 | ||
|
|
da887b49e7 | ||
|
|
e482eeff58 | ||
|
|
92826b0b4a | ||
|
|
0189081490 | ||
|
|
3e76c3bd50 | ||
|
|
c4c6ee9485 | ||
|
|
a70818f72f | ||
|
|
ea81f6fc36 | ||
|
|
fba93dbe0b | ||
|
|
41a0fdaf39 | ||
|
|
c96fc05f3e | ||
|
|
51aab7bb17 | ||
|
|
714af89b13 | ||
|
|
98699f203b | ||
|
|
efb6ac27c2 | ||
|
|
68f82b1c06 | ||
|
|
1020faa7b4 | ||
|
|
aade16f534 | ||
|
|
a71efce784 | ||
|
|
4ac95b6f49 | ||
|
|
7ac49162c6 | ||
|
|
f6208965ce | ||
|
|
a950873fff | ||
|
|
a8051d48c4 | ||
|
|
0638bbe69c | ||
|
|
aaf9e3d526 | ||
|
|
9866974a53 | ||
|
|
8874b525b7 | ||
|
|
ca7457d906 | ||
|
|
23369321f1 | ||
|
|
abcb21aa5e | ||
|
|
e964df8039 | ||
|
|
a64b37cf24 | ||
|
|
ad839aa492 | ||
|
|
29cebd82fb | ||
|
|
9149353a36 | ||
|
|
613b545dfd | ||
|
|
90427abc65 | ||
|
|
632d788b63 | ||
|
|
7c90ce39cb | ||
|
|
3aec7eb44f | ||
|
|
b60bb1d98a | ||
|
|
68b1b3c4d4 | ||
|
|
cdbc06a639 | ||
|
|
8b41b506c2 | ||
|
|
5464376d16 | ||
|
|
ac933cc423 | ||
|
|
612dcf231a | ||
|
|
d5a599badc | ||
|
|
c22e3e7b1d | ||
|
|
eed5206376 | ||
|
|
4afcb2a689 | ||
|
|
cb34d4440c | ||
|
|
42dd71bb63 | ||
|
|
305c96e384 | ||
|
|
c07909a20b | ||
|
|
eed0c3c6b0 | ||
|
|
a216fe6728 | ||
|
|
c9375cac5e | ||
|
|
4e0a779efe | ||
|
|
003ef3a518 | ||
|
|
685f9c3c98 |
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -60,7 +60,7 @@ body:
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics-single-node/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
|
||||
See how to setup monitoring here:
|
||||
|
||||
26
.github/workflows/check-licenses.yml
vendored
26
.github/workflows/check-licenses.yml
vendored
@@ -14,13 +14,25 @@ jobs:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.21.3
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||
|
||||
- name: Check License
|
||||
run: |
|
||||
make check-licenses
|
||||
run: make check-licenses
|
||||
|
||||
19
.github/workflows/codeql-analysis.yml
vendored
19
.github/workflows/codeql-analysis.yml
vendored
@@ -55,11 +55,22 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
|
||||
100
.github/workflows/main.yml
vendored
100
.github/workflows/main.yml
vendored
@@ -7,6 +7,8 @@ on:
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
@@ -14,6 +16,8 @@ on:
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -30,18 +34,55 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-all-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-all-
|
||||
|
||||
- name: Run check-all
|
||||
run: |
|
||||
make install-golangci-lint
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
|
||||
build:
|
||||
needs: lint
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-crossbuild-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-crossbuild-
|
||||
|
||||
- name: Build
|
||||
run: make crossbuild
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
@@ -54,43 +95,26 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
run: make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
build:
|
||||
needs: test
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
||||
5
.github/workflows/sync-docs.yml
vendored
5
.github/workflows/sync-docs.yml
vendored
@@ -7,7 +7,8 @@ on:
|
||||
- 'docs/**'
|
||||
workflow_dispatch: {}
|
||||
env:
|
||||
PAGEFIND_VERSION: "1.0.3"
|
||||
PAGEFIND_VERSION: "1.0.4"
|
||||
HUGO_VERSION: "latest"
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout and to commit back image update
|
||||
deployments: write
|
||||
@@ -28,7 +29,7 @@ jobs:
|
||||
path: docs
|
||||
- uses: peaceiris/actions-hugo@v2
|
||||
with:
|
||||
hugo-version: 'latest'
|
||||
hugo-version: ${{env.HUGO_VERSION}}
|
||||
extended: true
|
||||
- name: Install PageFind #install the static search engine for index build
|
||||
uses: supplypike/setup-bin@v3
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2023 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2024 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
140
Makefile
140
Makefile
@@ -1,5 +1,7 @@
|
||||
PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
MAKE_CONCURRENCY ?= $(shell cat /proc/cpuinfo | grep -c processor)
|
||||
MAKE_PARALLEL := $(MAKE) -j $(MAKE_CONCURRENCY)
|
||||
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
|
||||
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
|
||||
@@ -15,6 +17,7 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TA
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
include app/*/Makefile
|
||||
include docs/Makefile
|
||||
include deployment/*/Makefile
|
||||
include dashboards/Makefile
|
||||
include snap/local/Makefile
|
||||
@@ -25,150 +28,152 @@ all: \
|
||||
victoria-logs-prod \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmalert-tool-prod \
|
||||
vmauth-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod \
|
||||
vmctl-prod \
|
||||
vmalert-tool-prod
|
||||
vmctl-prod
|
||||
|
||||
clean:
|
||||
rm -rf bin/*
|
||||
|
||||
publish: package-base \
|
||||
publish: \
|
||||
publish-victoria-metrics \
|
||||
publish-vmagent \
|
||||
publish-vmalert \
|
||||
publish-vmalert-tool \
|
||||
publish-vmauth \
|
||||
publish-vmbackup \
|
||||
publish-vmrestore \
|
||||
publish-vmctl \
|
||||
publish-vmalert-tool
|
||||
publish-vmctl
|
||||
|
||||
package: \
|
||||
package-victoria-metrics \
|
||||
package-victoria-logs \
|
||||
package-vmagent \
|
||||
package-vmalert \
|
||||
package-vmalert-tool \
|
||||
package-vmauth \
|
||||
package-vmbackup \
|
||||
package-vmrestore \
|
||||
package-vmctl \
|
||||
package-vmalert-tool
|
||||
package-vmctl
|
||||
|
||||
vmutils: \
|
||||
vmagent \
|
||||
vmalert \
|
||||
vmalert-tool \
|
||||
vmauth \
|
||||
vmbackup \
|
||||
vmrestore \
|
||||
vmctl \
|
||||
vmalert-tool
|
||||
vmctl
|
||||
|
||||
vmutils-pure: \
|
||||
vmagent-pure \
|
||||
vmalert-pure \
|
||||
vmalert-tool-pure \
|
||||
vmauth-pure \
|
||||
vmbackup-pure \
|
||||
vmrestore-pure \
|
||||
vmctl-pure \
|
||||
vmalert-tool-pure
|
||||
vmctl-pure
|
||||
|
||||
vmutils-linux-amd64: \
|
||||
vmagent-linux-amd64 \
|
||||
vmalert-linux-amd64 \
|
||||
vmalert-tool-linux-amd64 \
|
||||
vmauth-linux-amd64 \
|
||||
vmbackup-linux-amd64 \
|
||||
vmrestore-linux-amd64 \
|
||||
vmctl-linux-amd64 \
|
||||
vmalert-tool-linux-amd64
|
||||
vmctl-linux-amd64
|
||||
|
||||
vmutils-linux-arm64: \
|
||||
vmagent-linux-arm64 \
|
||||
vmalert-linux-arm64 \
|
||||
vmalert-tool-linux-arm64 \
|
||||
vmauth-linux-arm64 \
|
||||
vmbackup-linux-arm64 \
|
||||
vmrestore-linux-arm64 \
|
||||
vmctl-linux-arm64 \
|
||||
vmalert-tool-linux-arm64
|
||||
vmctl-linux-arm64
|
||||
|
||||
vmutils-linux-arm: \
|
||||
vmagent-linux-arm \
|
||||
vmalert-linux-arm \
|
||||
vmalert-tool-linux-arm \
|
||||
vmauth-linux-arm \
|
||||
vmbackup-linux-arm \
|
||||
vmrestore-linux-arm \
|
||||
vmctl-linux-arm \
|
||||
vmalert-tool-linux-arm
|
||||
vmctl-linux-arm
|
||||
|
||||
vmutils-linux-386: \
|
||||
vmagent-linux-386 \
|
||||
vmalert-linux-386 \
|
||||
vmalert-tool-linux-386 \
|
||||
vmauth-linux-386 \
|
||||
vmbackup-linux-386 \
|
||||
vmrestore-linux-386 \
|
||||
vmctl-linux-386 \
|
||||
vmalert-tool-linux-386
|
||||
vmctl-linux-386
|
||||
|
||||
vmutils-linux-ppc64le: \
|
||||
vmagent-linux-ppc64le \
|
||||
vmalert-linux-ppc64le \
|
||||
vmalert-tool-linux-ppc64le \
|
||||
vmauth-linux-ppc64le \
|
||||
vmbackup-linux-ppc64le \
|
||||
vmrestore-linux-ppc64le \
|
||||
vmctl-linux-ppc64le \
|
||||
vmalert-tool-linux-ppc64le
|
||||
vmctl-linux-ppc64le
|
||||
|
||||
vmutils-darwin-amd64: \
|
||||
vmagent-darwin-amd64 \
|
||||
vmalert-darwin-amd64 \
|
||||
vmalert-tool-darwin-amd64 \
|
||||
vmauth-darwin-amd64 \
|
||||
vmbackup-darwin-amd64 \
|
||||
vmrestore-darwin-amd64 \
|
||||
vmctl-darwin-amd64 \
|
||||
vmalert-tool-darwin-amd64
|
||||
vmctl-darwin-amd64
|
||||
|
||||
vmutils-darwin-arm64: \
|
||||
vmagent-darwin-arm64 \
|
||||
vmalert-darwin-arm64 \
|
||||
vmalert-tool-darwin-arm64 \
|
||||
vmauth-darwin-arm64 \
|
||||
vmbackup-darwin-arm64 \
|
||||
vmrestore-darwin-arm64 \
|
||||
vmctl-darwin-arm64 \
|
||||
vmalert-tool-darwin-arm64
|
||||
vmctl-darwin-arm64
|
||||
|
||||
vmutils-freebsd-amd64: \
|
||||
vmagent-freebsd-amd64 \
|
||||
vmalert-freebsd-amd64 \
|
||||
vmalert-tool-freebsd-amd64 \
|
||||
vmauth-freebsd-amd64 \
|
||||
vmbackup-freebsd-amd64 \
|
||||
vmrestore-freebsd-amd64 \
|
||||
vmctl-freebsd-amd64 \
|
||||
vmalert-tool-freebsd-amd64
|
||||
vmctl-freebsd-amd64
|
||||
|
||||
vmutils-openbsd-amd64: \
|
||||
vmagent-openbsd-amd64 \
|
||||
vmalert-openbsd-amd64 \
|
||||
vmalert-tool-openbsd-amd64 \
|
||||
vmauth-openbsd-amd64 \
|
||||
vmbackup-openbsd-amd64 \
|
||||
vmrestore-openbsd-amd64 \
|
||||
vmctl-openbsd-amd64 \
|
||||
vmalert-tool-openbsd-amd64
|
||||
vmctl-openbsd-amd64
|
||||
|
||||
vmutils-windows-amd64: \
|
||||
vmagent-windows-amd64 \
|
||||
vmalert-windows-amd64 \
|
||||
vmalert-tool-windows-amd64 \
|
||||
vmauth-windows-amd64 \
|
||||
vmbackup-windows-amd64 \
|
||||
vmrestore-windows-amd64 \
|
||||
vmctl-windows-amd64 \
|
||||
vmalert-tool-windows-amd64
|
||||
vmctl-windows-amd64
|
||||
|
||||
crossbuild:
|
||||
$(MAKE_PARALLEL) victoria-metrics-crossbuild vmutils-crossbuild
|
||||
|
||||
victoria-metrics-crossbuild: \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-amd64 \
|
||||
victoria-metrics-linux-arm64 \
|
||||
victoria-metrics-linux-arm \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-ppc64le \
|
||||
victoria-metrics-darwin-amd64 \
|
||||
victoria-metrics-darwin-arm64 \
|
||||
@@ -180,7 +185,6 @@ vmutils-crossbuild: \
|
||||
vmutils-linux-amd64 \
|
||||
vmutils-linux-arm64 \
|
||||
vmutils-linux-arm \
|
||||
vmutils-linux-386 \
|
||||
vmutils-linux-ppc64le \
|
||||
vmutils-darwin-amd64 \
|
||||
vmutils-darwin-arm64 \
|
||||
@@ -190,14 +194,15 @@ vmutils-crossbuild: \
|
||||
|
||||
publish-release:
|
||||
rm -rf bin/*
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
git checkout $(TAG) && $(MAKE) release && LATEST_TAG=stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release && LATEST_TAG=cluster-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release && LATEST_TAG=enterprise-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release && LATEST_TAG=enterprise-cluster-stable $(MAKE) publish
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
release:
|
||||
$(MAKE_PARALLEL) \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-386 \
|
||||
@@ -256,16 +261,16 @@ release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod
|
||||
cd bin && rm -rf \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe
|
||||
|
||||
release-victoria-logs: \
|
||||
release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
release-victoria-logs:
|
||||
$(MAKE_PARALLEL) release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
|
||||
release-victoria-logs-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-logs-goos-goarch
|
||||
@@ -354,72 +359,72 @@ release-vmutils-windows-amd64:
|
||||
release-vmutils-goos-goarch: \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||
&& sha256sum vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod
|
||||
|
||||
release-vmutils-windows-goarch: \
|
||||
vmagent-windows-$(GOARCH)-prod \
|
||||
vmalert-windows-$(GOARCH)-prod \
|
||||
vmalert-tool-windows-$(GOARCH)-prod \
|
||||
vmauth-windows-$(GOARCH)-prod \
|
||||
vmbackup-windows-$(GOARCH)-prod \
|
||||
vmrestore-windows-$(GOARCH)-prod \
|
||||
vmctl-windows-$(GOARCH)-prod \
|
||||
vmalert-tool-windows-$(GOARCH)-prod
|
||||
vmctl-windows-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe
|
||||
vmctl-windows-$(GOARCH)-prod.exe
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
@@ -486,7 +491,7 @@ golangci-lint: install-golangci-lint
|
||||
golangci-lint run
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.54.2
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.55.1
|
||||
|
||||
govulncheck: install-govulncheck
|
||||
govulncheck ./...
|
||||
@@ -529,12 +534,3 @@ copy-docs:
|
||||
docs-sync:
|
||||
SRC=README.md DST=docs/README.md OLD_URL='' ORDER=0 TITLE=VictoriaMetrics $(MAKE) copy-docs
|
||||
SRC=README.md DST=docs/Single-server-VictoriaMetrics.md OLD_URL='/Single-server-VictoriaMetrics.html' TITLE=VictoriaMetrics ORDER=1 $(MAKE) copy-docs
|
||||
SRC=app/vmagent/README.md DST=docs/vmagent.md OLD_URL='/vmagent.html' ORDER=3 TITLE=vmagent $(MAKE) copy-docs
|
||||
SRC=app/vmalert/README.md DST=docs/vmalert.md OLD_URL='/vmalert.html' ORDER=4 TITLE=vmalert $(MAKE) copy-docs
|
||||
SRC=app/vmauth/README.md DST=docs/vmauth.md OLD_URL='/vmauth.html' ORDER=5 TITLE=vmauth $(MAKE) copy-docs
|
||||
SRC=app/vmbackup/README.md DST=docs/vmbackup.md OLD_URL='/vmbackup.html' ORDER=6 TITLE=vmbackup $(MAKE) copy-docs
|
||||
SRC=app/vmrestore/README.md DST=docs/vmrestore.md OLD_URL='/vmrestore.html' ORDER=7 TITLE=vmrestore $(MAKE) copy-docs
|
||||
SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
|
||||
SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
|
||||
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
|
||||
SRC=app/vmalert-tool/README.md DST=docs/vmalert-tool.md OLD_URL='' ORDER=12 TITLE=vmalert-tool $(MAKE) copy-docs
|
||||
|
||||
289
README.md
289
README.md
@@ -9,7 +9,7 @@
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
|
||||
[](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
|
||||
<img src="logo.png" width="300" alt="VictoriaMetrics logo">
|
||||
<img src="docs/logo.webp" width="300" alt="VictoriaMetrics logo">
|
||||
|
||||
VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database.
|
||||
|
||||
@@ -22,17 +22,17 @@ The cluster version of VictoriaMetrics is available [here](https://docs.victoria
|
||||
Learn more about [key concepts](https://docs.victoriametrics.com/keyConcepts.html) of VictoriaMetrics and follow the
|
||||
[quick start guide](https://docs.victoriametrics.com/Quick-Start.html) for a better experience.
|
||||
|
||||
There is also user-friendly database for logs - [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/).
|
||||
There is also a user-friendly database for logs - [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/).
|
||||
|
||||
If you have questions about VictoriaMetrics, then feel free asking them at [VictoriaMetrics community Slack chat](https://slack.victoriametrics.com/).
|
||||
If you have questions about VictoriaMetrics, then feel free asking them in the [VictoriaMetrics community Slack chat](https://slack.victoriametrics.com/).
|
||||
|
||||
[Contact us](mailto:info@victoriametrics.com) if you need enterprise support for VictoriaMetrics.
|
||||
See [features available in enterprise package](https://docs.victoriametrics.com/enterprise.html).
|
||||
Enterprise binaries can be downloaded and evaluated for free
|
||||
from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).
|
||||
You can also [request a free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking the [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) and performing [regular upgrades](#how-to-upgrade-victoriametrics).
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended to check the [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) periodically, and to perform [regular upgrades](#how-to-upgrade-victoriametrics).
|
||||
|
||||
VictoriaMetrics has achieved security certifications for Database Software Development and Software-Based Monitoring Services. We apply strict security measures in everything we do. See our [Security page](https://victoriametrics.com/security/) for more details.
|
||||
|
||||
@@ -41,19 +41,19 @@ VictoriaMetrics has achieved security certifications for Database Software Devel
|
||||
VictoriaMetrics has the following prominent features:
|
||||
|
||||
* It can be used as long-term storage for Prometheus. See [these docs](#prometheus-setup) for details.
|
||||
* It can be used as a drop-in replacement for Prometheus in Grafana, because it supports [Prometheus querying API](#prometheus-querying-api-usage).
|
||||
* It can be used as a drop-in replacement for Graphite in Grafana, because it supports [Graphite API](#graphite-api-usage).
|
||||
* It can be used as a drop-in replacement for Prometheus in Grafana, because it supports the [Prometheus querying API](#prometheus-querying-api-usage).
|
||||
* It can be used as a drop-in replacement for Graphite in Grafana, because it supports the [Graphite API](#graphite-api-usage).
|
||||
VictoriaMetrics allows reducing infrastructure costs by more than 10x comparing to Graphite - see [this case study](https://docs.victoriametrics.com/CaseStudies.html#grammarly).
|
||||
* It is easy to setup and operate:
|
||||
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d)
|
||||
without external dependencies.
|
||||
* All the configuration is done via explicit command-line flags with reasonable defaults.
|
||||
* All the data is stored in a single directory pointed by `-storageDataPath` command-line flag.
|
||||
* All the data is stored in a single directory specified by the `-storageDataPath` command-line flag.
|
||||
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
|
||||
can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
* It implements PromQL-like query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* It provides global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics. Later this data may be queried via a single query.
|
||||
* It implements a PromQL-like query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* It provides a global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics. Later this data may be queried via a single query.
|
||||
* It provides high performance and good vertical and horizontal scalability for both
|
||||
[data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
|
||||
and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
|
||||
@@ -62,9 +62,9 @@ VictoriaMetrics has the following prominent features:
|
||||
and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
|
||||
when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
|
||||
* It is optimized for time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate).
|
||||
* It provides high data compression, so up to 70x more data points may be stored into limited storage comparing to TimescaleDB
|
||||
according to [these benchmarks](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
|
||||
and up to 7x less storage space is required compared to Prometheus, Thanos or Cortex
|
||||
* It provides high data compression: up to 70x more data points may be stored into limited storage compared with TimescaleDB
|
||||
according to [these benchmarks](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4),
|
||||
and up to 7x less storage space is required compared to Prometheus, Thanos or Cortex.
|
||||
according to [this benchmark](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
|
||||
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc).
|
||||
See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
|
||||
@@ -75,7 +75,7 @@ VictoriaMetrics has the following prominent features:
|
||||
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to
|
||||
[the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* It supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
|
||||
* It supports metrics scraping, ingestion and [backfilling](#backfilling) via the following protocols:
|
||||
* [Metrics scraping from Prometheus exporters](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* [Prometheus remote write API](#prometheus-setup).
|
||||
* [Prometheus exposition format](#how-to-import-data-in-prometheus-exposition-format).
|
||||
@@ -87,6 +87,7 @@ VictoriaMetrics has the following prominent features:
|
||||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* [Native binary format](#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
|
||||
* [NewRelic infrastructure agent](#how-to-send-data-from-newrelic-agent).
|
||||
* [OpenTelemetry metrics format](#sending-data-via-opentelemetry).
|
||||
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
|
||||
* It supports metrics [relabeling](#relabeling).
|
||||
@@ -94,7 +95,7 @@ VictoriaMetrics has the following prominent features:
|
||||
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data
|
||||
and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
|
||||
* It has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
* It has an open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/)
|
||||
and [Google Filestore](https://cloud.google.com/filestore).
|
||||
|
||||
@@ -137,7 +138,7 @@ See also [articles and slides about VictoriaMetrics from our users](https://docs
|
||||
|
||||
### Install
|
||||
|
||||
To quickly try VictoriaMetrics, just download [VictoriaMetrics executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
To quickly try VictoriaMetrics, just download the [VictoriaMetrics executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
or [Docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and start it with the desired command-line flags.
|
||||
See also [QuickStart guide](https://docs.victoriametrics.com/Quick-Start.html) for additional information.
|
||||
|
||||
@@ -154,10 +155,10 @@ VictoriaMetrics can also be installed via these installation methods:
|
||||
|
||||
The following command-line flags are used the most:
|
||||
|
||||
* `-storageDataPath` - VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in the current working directory.
|
||||
* `-storageDataPath` - VictoriaMetrics stores all the data in this directory. The default path is `victoria-metrics-data` in the current working directory.
|
||||
* `-retentionPeriod` - retention for stored data. Older data is automatically deleted. Default retention is 1 month (31 days). The minimum retention period is 24h or 1d. See [these docs](#retention) for more details.
|
||||
|
||||
Other flags have good enough default values, so set them only if you really need this. Pass `-help` to see [all the available flags with description and default values](#list-of-command-line-flags).
|
||||
Other flags have good enough default values, so set them only if you really need to. Pass `-help` to see [all the available flags with description and default values](#list-of-command-line-flags).
|
||||
|
||||
The following docs may be useful during initial VictoriaMetrics setup:
|
||||
* [How to set up scraping of Prometheus-compatible targets](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter)
|
||||
@@ -171,9 +172,6 @@ VictoriaMetrics accepts [Prometheus querying API requests](#prometheus-querying-
|
||||
|
||||
It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking the [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) and performing [regular upgrades](#how-to-upgrade-victoriametrics).
|
||||
|
||||
|
||||
### Environment variables
|
||||
|
||||
All the VictoriaMetrics components allow referring environment variables in `yaml` configuration files (such as `-promscrape.config`)
|
||||
@@ -338,7 +336,8 @@ which can be used as faster and less resource-hungry alternative to Prometheus.
|
||||
|
||||
## Grafana setup
|
||||
|
||||
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following url:
|
||||
Create [Prometheus datasource](https://grafana.com/docs/grafana/latest/datasources/prometheus/configure-prometheus-data-source/)
|
||||
in Grafana with the following url:
|
||||
|
||||
```url
|
||||
http://<victoriametrics-addr>:8428
|
||||
@@ -346,12 +345,23 @@ http://<victoriametrics-addr>:8428
|
||||
|
||||
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
|
||||
|
||||
In the "Type and version" section it is recommended to set the type to "Prometheus" and the version to at least "2.24.x":
|
||||
|
||||
<img src="docs/grafana-datasource-prometheus.webp" alt="Grafana datasource" />
|
||||
|
||||
This allows Grafana to use a more efficient API to get label values.
|
||||
|
||||
Then build graphs and dashboards for the created datasource using [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
|
||||
or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
|
||||
|
||||
Alternatively, use VictoriaMetrics [datasource plugin](https://github.com/VictoriaMetrics/grafana-datasource) with support of extra features.
|
||||
See more in [description](https://github.com/VictoriaMetrics/grafana-datasource#victoriametrics-data-source-for-grafana).
|
||||
|
||||
Creating a datasource may require [specific permissions](https://grafana.com/docs/grafana/latest/administration/data-source-management/).
|
||||
If you don't see an option to create a data source - try contacting system administrator.
|
||||
|
||||
Grafana playground is available for viewing at our [sandbox](https://play-grafana.victoriametrics.com).
|
||||
|
||||
## How to upgrade VictoriaMetrics
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking [the CHANGELOG page](https://docs.victoriametrics.com/CHANGELOG.html) and performing regular upgrades.
|
||||
@@ -385,6 +395,9 @@ The UI allows exploring query results via graphs and tables. It also provides th
|
||||
- [WITH expressions playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/expand-with-exprs) - test how WITH expressions work;
|
||||
- [Metric relabel debugger](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/relabeling) - playground for [relabeling](#relabeling) configs.
|
||||
|
||||
VMUI provides auto-completion for [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) functions, metric names, label names and label values. The auto-completion can be enabled
|
||||
by checking the `Autocomplete` toggle. When the auto-completion is disabled, it can still be triggered for the current cursor position by pressing `ctrl+space`.
|
||||
|
||||
VMUI automatically switches from graph view to heatmap view when the query returns [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) buckets
|
||||
(both [Prometheus histograms](https://prometheus.io/docs/concepts/metric_types/#histogram)
|
||||
and [VictoriaMetrics histograms](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) are supported).
|
||||
@@ -442,7 +455,7 @@ This information is obtained from the `/api/v1/status/active_queries` HTTP endpo
|
||||
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
|
||||
|
||||
1. Open the `vmui` at `http://victoriametrics:8428/vmui/`.
|
||||
1. Click the `Explore metrics` tab.
|
||||
1. Click the `Explore Prometheus metrics` tab.
|
||||
1. Select the `job` you want to explore.
|
||||
1. Optionally select the `instance` for the selected job to explore.
|
||||
1. Select metrics you want to explore and compare.
|
||||
@@ -502,10 +515,8 @@ See also [vmagent](https://docs.victoriametrics.com/vmagent.html), which can be
|
||||
|
||||
## How to send data from DataDog agent
|
||||
|
||||
VictoriaMetrics accepts data from [DataDog agent](https://docs.datadoghq.com/agent/)
|
||||
or [DogStatsD](https://docs.datadoghq.com/developers/dogstatsd/)
|
||||
via ["submit metrics" API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics)
|
||||
at `/datadog/api/v1/series` path.
|
||||
VictoriaMetrics accepts data from [DataDog agent](https://docs.datadoghq.com/agent/) or [DogStatsD](https://docs.datadoghq.com/developers/dogstatsd/)
|
||||
via ["submit metrics" API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics) at `/datadog/api/v2/series` path.
|
||||
|
||||
### Sending metrics to VictoriaMetrics
|
||||
|
||||
@@ -513,10 +524,11 @@ DataDog agent allows configuring destinations for metrics sending via ENV variab
|
||||
or via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) in section `dd_url`.
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.webp" width="800">
|
||||
</p>
|
||||
|
||||
To configure DataDog agent via ENV variable add the following prefix:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```
|
||||
@@ -531,14 +543,12 @@ To configure DataDog agent via [configuration file](https://github.com/DataDog/d
|
||||
add the following line:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```
|
||||
dd_url: http://victoriametrics:8428/datadog
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
vmagent also can accept Datadog metrics format. Depending on where vmagent will forward data,
|
||||
[vmagent](https://docs.victoriametrics.com/vmagent.html) also can accept Datadog metrics format. Depending on where vmagent will forward data,
|
||||
pick [single-node or cluster URL](https://docs.victoriametrics.com/url-examples.html#datadog) formats.
|
||||
|
||||
### Sending metrics to Datadog and VictoriaMetrics
|
||||
@@ -547,7 +557,7 @@ DataDog allows configuring [Dual Shipping](https://docs.datadoghq.com/agent/guid
|
||||
sending via ENV variable `DD_ADDITIONAL_ENDPOINTS` or via configuration file `additional_endpoints`.
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.webp" width="800">
|
||||
</p>
|
||||
|
||||
Run DataDog using the following ENV variable with VictoriaMetrics as additional metrics receiver:
|
||||
@@ -579,8 +589,7 @@ additional_endpoints:
|
||||
|
||||
### Send via cURL
|
||||
|
||||
See how to send data to VictoriaMetrics via
|
||||
[DataDog "submit metrics"](https://docs.victoriametrics.com/url-examples.html#datadogapiv1series) from command line.
|
||||
See how to send data to VictoriaMetrics via DataDog "submit metrics" API [here](https://docs.victoriametrics.com/url-examples.html#datadogapiv2series).
|
||||
|
||||
The imported data can be read via [export API](https://docs.victoriametrics.com/url-examples.html#apiv1export).
|
||||
|
||||
@@ -591,7 +600,7 @@ according to [DataDog metric naming recommendations](https://docs.datadoghq.com/
|
||||
If you need accepting metric names as is without sanitizing, then pass `-datadog.sanitizeMetricName=false` command-line flag to VictoriaMetrics.
|
||||
|
||||
Extra labels may be added to all the written time series by passing `extra_label=name=value` query args.
|
||||
For example, `/datadog/api/v1/series?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
|
||||
For example, `/datadog/api/v2/series?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
|
||||
|
||||
DataDog agent sends the [configured tags](https://docs.datadoghq.com/getting_started/tagging/) to
|
||||
undocumented endpoint - `/datadog/intake`. This endpoint isn't supported by VictoriaMetrics yet.
|
||||
@@ -1126,6 +1135,18 @@ For example, the following command builds the image on top of [scratch](https://
|
||||
ROOT_IMAGE=scratch make package-victoria-metrics
|
||||
```
|
||||
|
||||
#### Building VictoriaMetrics with Podman
|
||||
|
||||
VictoriaMetrics can be built with Podman in either rootful or rootless mode.
|
||||
|
||||
When building via rootlful Podman, simply add `DOCKER=podman` to the relevant `make` commandline. To build
|
||||
via rootless Podman, add `DOCKER=podman DOCKER_RUN="podman run --userns=keep-id"` to the `make`
|
||||
commandline.
|
||||
|
||||
For example: `make victoria-metrics-pure DOCKER=podman DOCKER_RUN="podman run --userns=keep-id"`
|
||||
|
||||
Note that `production` builds are not supported via Podman becuase Podman does not support `buildx`.
|
||||
|
||||
## Start with docker-compose
|
||||
|
||||
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
|
||||
@@ -1152,6 +1173,15 @@ Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-s
|
||||
is the command-line flag value. Snapshots can be archived to backup storage at any time
|
||||
with [vmbackup](https://docs.victoriametrics.com/vmbackup.html).
|
||||
|
||||
Snapshots consist of a mix of hard-links and soft-links to various files and directories inside `-storageDataPath`.
|
||||
See [this article](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
|
||||
for more details. This adds some restrictions on what can be done with the contents of `<-storageDataPath>/snapshots` directory:
|
||||
|
||||
- Do not delete subdirectories inside `<-storageDataPath>/snapshots` with `rm` or similar commands, since this will leave some snapshot data undeleted.
|
||||
Prefer using the `/snapshot/delete` API for deleting snapshot. See below for more details about this API.
|
||||
- Do not copy subdirectories inside `<-storageDataPath>/snapshot` with `cp`, `rsync` or similar commands, since there are high chances
|
||||
that these commands won't copy some data stored in the snapshot. Prefer using [vmbackup](https://docs.victoriametrics.com/vmbackup.html) for making copies of snapshot data.
|
||||
|
||||
The `http://<victoriametrics-addr>:8428/snapshot/list` page contains the list of available snapshots.
|
||||
|
||||
Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete?snapshot=<snapshot-name>` in order
|
||||
@@ -1182,6 +1212,7 @@ before actually deleting the metrics. By default, this query will only scan seri
|
||||
adjust `start` and `end` to a suitable range to achieve match hits.
|
||||
|
||||
The `/api/v1/admin/tsdb/delete_series` handler may be protected with `authKey` if `-deleteAuthKey` command-line flag is set.
|
||||
Note that handler accepts any HTTP method, so sending a `GET` request to `/api/v1/admin/tsdb/delete_series` will result in deletion of time series.
|
||||
|
||||
The delete API is intended mainly for the following cases:
|
||||
|
||||
@@ -1388,7 +1419,12 @@ For example, `/api/v1/import?extra_label=foo=bar` would add `"foo":"bar"` label
|
||||
|
||||
Note that it could be required to flush response cache after importing historical data. See [these docs](#backfilling) for detail.
|
||||
|
||||
VictoriaMetrics parses input JSON lines one-by-one. It loads the whole JSON line in memory, then parses it and then saves the parsed samples into persistent storage. This means that VictoriaMetrics can occupy big amounts of RAM when importing too long JSON lines. The solution is to split too long JSON lines into smaller lines. It is OK if samples for a single time series are split among multiple JSON lines.
|
||||
VictoriaMetrics parses input JSON lines one-by-one. It loads the whole JSON line in memory, then parses it and then saves the parsed samples into persistent storage.
|
||||
This means that VictoriaMetrics can occupy big amounts of RAM when importing too long JSON lines.
|
||||
The solution is to split too long JSON lines into shorter lines. It is OK if samples for a single time series are split among multiple JSON lines.
|
||||
JSON line length can be limited via `max_rows_per_line` query arg when exporting via [/api/v1/export](#how-to-export-data-in-json-line-format).
|
||||
|
||||
The maximum JSON line length, which can be parsed by VictoriaMetrics, is limited by `-import.maxLineLen` command-line flag value.
|
||||
|
||||
### How to import data in native format
|
||||
|
||||
@@ -1565,15 +1601,18 @@ The format follows [JSON streaming concept](http://ndjson.org/), e.g. each line
|
||||
```
|
||||
|
||||
Note that every JSON object must be written in a single line, e.g. all the newline chars must be removed from it.
|
||||
Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 100MB).
|
||||
[/api/v1/import](#how-to-import-data-in-json-line-format) handler doesn't accept JSON lines longer than the value
|
||||
passed to `-import.maxLineLen` command-line flag (by default this is 10MB).
|
||||
|
||||
It is recommended passing 1K-10K samples per line for achieving the maximum data ingestion performance at [/api/v1/import](#how-to-import-data-in-json-line-format).
|
||||
Too long JSON lines may increase RAM usage at VictoriaMetrics side.
|
||||
|
||||
[/api/v1/export](#how-to-export-data-in-json-line-format) handler accepts `max_rows_per_line` query arg, which allows limiting the number of samples per each exported line.
|
||||
|
||||
It is OK to split [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples)
|
||||
for the same [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) across multiple lines.
|
||||
|
||||
The number of lines in JSON line document can be arbitrary.
|
||||
The number of lines in the request to [/api/v1/import](#how-to-import-data-in-json-line-format) can be arbitrary - they are imported in streaming manner.
|
||||
|
||||
## Relabeling
|
||||
|
||||
@@ -1661,6 +1700,8 @@ By default, VictoriaMetrics is tuned for an optimal resource usage under typical
|
||||
- `-search.maxConcurrentRequests` limits the number of concurrent requests VictoriaMetrics can process. Bigger number of concurrent requests usually means bigger memory usage. For example, if a single query needs 100 MiB of additional memory during its execution, then 100 concurrent queries may need `100 * 100 MiB = 10 GiB` of additional memory. So it is better to limit the number of concurrent queries, while suspending additional incoming queries if the concurrency limit is reached. VictoriaMetrics provides `-search.maxQueueDuration` command-line flag for limiting the max wait time for suspended queries. See also `-search.maxMemoryPerQuery` command-line flag.
|
||||
- `-search.maxSamplesPerSeries` limits the number of raw samples the query can process per each time series. VictoriaMetrics sequentially processes raw samples per each found time series during the query. It unpacks raw samples on the selected time range per each time series into memory and then applies the given [rollup function](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). The `-search.maxSamplesPerSeries` command-line flag allows limiting memory usage in the case when the query is executed on a time range, which contains hundreds of millions of raw samples per each located time series.
|
||||
- `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
|
||||
- `-search.maxResponseSeries` limits the number of time series a single query can return from [`/api/v1/query`](https://docs.victoriametrics.com/keyConcepts.html#instant-query)
|
||||
and [`/api/v1/query_range`](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
- `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
- `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
|
||||
- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
|
||||
@@ -1669,48 +1710,51 @@ By default, VictoriaMetrics is tuned for an optimal resource usage under typical
|
||||
- `-search.maxTagValues` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage.
|
||||
- `-search.maxTagValueSuffixesPerSearch` limits the number of entries, which may be returned from `/metrics/find` endpoint. See [Graphite Metrics API usage docs](#graphite-metrics-api-usage).
|
||||
|
||||
See also [cardinality limiter](#cardinality-limiter) and [capacity planning docs](#capacity-planning).
|
||||
See also [resource usage limits at VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#resource-usage-limits),
|
||||
[cardinality limiter](#cardinality-limiter) and [capacity planning docs](#capacity-planning).
|
||||
|
||||
|
||||
## High availability
|
||||
|
||||
* Install multiple VictoriaMetrics instances in distinct datacenters (availability zones).
|
||||
* Pass addresses of these instances to [vmagent](https://docs.victoriametrics.com/vmagent.html) via `-remoteWrite.url` command-line flag:
|
||||
The general approach for achieving high availability is the following:
|
||||
|
||||
- To run two identically configured VictoriaMetrics instances in distinct datacenters (availability zones);
|
||||
- To store the collected data simultaneously into these instances via [vmagent](https://docs.victoriametrics.com/vmagent.html) or Prometheus.
|
||||
- To query the first VictoriaMetrics instance and to fail over to the second instance when the first instance becomes temporarily unavailable.
|
||||
This can be done via [vmauth](https://docs.victoriametrics.com/vmauth.html) according to [these docs](https://docs.victoriametrics.com/vmauth.html#high-availability).
|
||||
|
||||
Such a setup guarantees that the collected data isn't lost when one of VictoriaMetrics instance becomes unavailable.
|
||||
The collected data continues to be written to the available VictoriaMetrics instance, so it should be available for querying.
|
||||
Both [vmagent](https://docs.victoriametrics.com/vmagent.html) and Prometheus buffer the collected data locally if they cannot send it
|
||||
to the configured remote storage. So the collected data will be written to the temporarily unavailable VictoriaMetrics instance
|
||||
after it becomes available.
|
||||
|
||||
If you use [vmagent](https://docs.victoriametrics.com/vmagent.html) for storing the data into VictoriaMetrics,
|
||||
then it can be configured with multiple `-remoteWrite.url` command-line flags, where every flag points to the VictoriaMetrics
|
||||
instance in a particular availability zone, in order to replicate the collected data to all the VictoriaMetrics instances.
|
||||
For example, the following command instructs `vmagent` to replicate data to `vm-az1` and `vm-az2` instances of VictoriaMetrics:
|
||||
|
||||
```console
|
||||
/path/to/vmagent -remoteWrite.url=http://<victoriametrics-addr-1>:8428/api/v1/write -remoteWrite.url=http://<victoriametrics-addr-2>:8428/api/v1/write
|
||||
/path/to/vmagent \
|
||||
-remoteWrite.url=http://<vm-az1>:8428/api/v1/write \
|
||||
-remoteWrite.url=http://<vm-az2>:8428/api/v1/write
|
||||
```
|
||||
|
||||
Alternatively these addresses may be passed to `remote_write` section in Prometheus config:
|
||||
If you use Prometheus for collecting and writing the data to VictoriaMetrics,
|
||||
then the following [`remote_write`](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section
|
||||
in Prometheus config can be used for replicating the collected data to `vm-az1` and `vm-az2` VictoriaMetrics instances:
|
||||
|
||||
```yml
|
||||
remote_write:
|
||||
- url: http://<victoriametrics-addr-1>:8428/api/v1/write
|
||||
queue_config:
|
||||
max_samples_per_send: 10000
|
||||
# ...
|
||||
- url: http://<victoriametrics-addr-N>:8428/api/v1/write
|
||||
queue_config:
|
||||
max_samples_per_send: 10000
|
||||
- url: http://<vm-az1>:8428/api/v1/write
|
||||
- url: http://<vm-az2>:8428/api/v1/write
|
||||
```
|
||||
|
||||
* Apply the updated config:
|
||||
It is recommended to use [vmagent](https://docs.victoriametrics.com/vmagent.html) instead of Prometheus for highly loaded setups,
|
||||
since it uses lower amounts of RAM, CPU and network bandwidth than Prometheus.
|
||||
|
||||
```console
|
||||
kill -HUP `pidof prometheus`
|
||||
```
|
||||
|
||||
It is recommended to use [vmagent](https://docs.victoriametrics.com/vmagent.html) instead of Prometheus for highly loaded setups.
|
||||
|
||||
* Now Prometheus should write data into all the configured `remote_write` urls in parallel.
|
||||
* Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
|
||||
* Set up Prometheus datasource in Grafana that points to Promxy.
|
||||
|
||||
If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
|
||||
to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
|
||||
|
||||
Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
|
||||
with the enabled de-duplication. See [this section](#deduplication) for details.
|
||||
If you use identically configured [vmagent](https://docs.victoriametrics.com/vmagent.html) instances for collecting the same data
|
||||
and sending it to VictoriaMetrics, then do not forget enabling [deduplication](#deduplication) at VictoriaMetrics side.
|
||||
|
||||
## Deduplication
|
||||
|
||||
@@ -1724,6 +1768,10 @@ This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/
|
||||
If multiple raw samples have **the same timestamp** on the given `-dedup.minScrapeInterval` discrete interval,
|
||||
then the sample with **the biggest value** is kept.
|
||||
|
||||
[Prometheus stalenes markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are processed as any other value during de-duplication.
|
||||
If raw sample with the biggest timestamp on `-dedup.minScrapeInterval` contains a stale marker, then it is kept after the deduplication.
|
||||
This allows properly preserving staleness markers during the de-duplication.
|
||||
|
||||
Please note, [labels](https://docs.victoriametrics.com/keyConcepts.html#labels) of raw samples should be identical
|
||||
in order to be deduplicated. For example, this is why [HA pair of vmagents](https://docs.victoriametrics.com/vmagent.html#high-availability)
|
||||
needs to be identically configured.
|
||||
@@ -1807,7 +1855,7 @@ This increases overhead during data querying, since VictoriaMetrics needs to rea
|
||||
bigger number of parts per each request. That's why it is recommended to have at least 20%
|
||||
of free disk space under directory pointed by `-storageDataPath` command-line flag.
|
||||
|
||||
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics-single-node/)
|
||||
and [the dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/).
|
||||
See more details in [monitoring docs](#monitoring).
|
||||
|
||||
@@ -1897,8 +1945,23 @@ See how to request a free trial license [here](https://victoriametrics.com/produ
|
||||
|
||||
* `-downsampling.period=30d:5m,180d:1h` instructs VictoriaMetrics to deduplicate samples older than 30 days with 5 minutes interval and to deduplicate samples older than 180 days with 1 hour interval.
|
||||
|
||||
Downsampling is applied independently per each time series. It can reduce disk space usage and improve query performance if it is applied to time series with big number of samples per each series. The downsampling doesn't improve query performance if the database contains big number of time series with small number of samples per each series (aka [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)), since downsampling doesn't reduce the number of time series. So the majority of time is spent on searching for the matching time series.
|
||||
It is possible to use [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) in vmagent or recording rules in [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to [reduce the number of time series](https://docs.victoriametrics.com/vmalert.html#downsampling-and-aggregation-via-vmalert).
|
||||
Downsampling is applied independently per each time series and leaves a single [raw sample](https://docs.victoriametrics.com/keyConcepts.html#raw-samples)
|
||||
with the biggest [timestamp](https://en.wikipedia.org/wiki/Unix_time) on the configured interval, in the same way as [deduplication](#deduplication) does.
|
||||
It works the best for [counters](https://docs.victoriametrics.com/keyConcepts.html#counter) and [histograms](https://docs.victoriametrics.com/keyConcepts.html#histogram),
|
||||
as their values are always increasing. But downsampling [gauges](https://docs.victoriametrics.com/keyConcepts.html#gauge)
|
||||
and [summaries](https://docs.victoriametrics.com/keyConcepts.html#summary)
|
||||
would mean losing the changes within the downsampling interval. Please note, you can use [recording rules](https://docs.victoriametrics.com/vmalert.html#rules)
|
||||
or [steaming aggregation](https://docs.victoriametrics.com/stream-aggregation.html)
|
||||
to apply custom aggregation functions, like min/max/avg etc., in order to make gauges more resilient to downsampling.
|
||||
|
||||
Downsampling can reduce disk space usage and improve query performance if it is applied to time series with big number
|
||||
of samples per each series. The downsampling doesn't improve query performance if the database contains big number
|
||||
of time series with small number of samples per each series (aka [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)),
|
||||
since downsampling doesn't reduce the number of time series. In this case the majority of query time is spent on searching for the matching time series
|
||||
instead of processing the found samples.
|
||||
It is possible to use [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) in [vmagent](https://docs.victoriametrics.com/vmagent.html)
|
||||
or recording rules in [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to
|
||||
[reduce the number of time series](https://docs.victoriametrics.com/vmalert.html#downsampling-and-aggregation-via-vmalert).
|
||||
|
||||
Downsampling happens during [background merges](https://docs.victoriametrics.com/#storage)
|
||||
and can't be performed if there is not enough of free disk space or if vmstorage
|
||||
@@ -1956,6 +2019,8 @@ VictoriaMetrics provides the following security-related command-line flags:
|
||||
* `-flagsAuthKey` for protecting `/flags` endpoint.
|
||||
* `-pprofAuthKey` for protecting `/debug/pprof/*` endpoints, which can be used for [profiling](#profiling).
|
||||
* `-denyQueryTracing` for disallowing [query tracing](#query-tracing).
|
||||
* `-http.header.hsts`, `-http.header.csp`, and `-http.header.frameOptions` for serving `Strict-Transport-Security`, `Content-Security-Policy`
|
||||
and `X-Frame-Options` HTTP response headers.
|
||||
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
|
||||
@@ -1991,7 +2056,9 @@ Alternatively, single-node VictoriaMetrics can self-scrape the metrics when `-se
|
||||
set to duration greater than 0. For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page
|
||||
with 10 seconds interval.
|
||||
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
_Please note, never use loadbalancer address for scraping metrics. All monitored components should be scraped directly by their address._
|
||||
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics-single-node/)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/) VictoriaMetrics.
|
||||
See an [alternative dashboard for clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11831)
|
||||
created by community.
|
||||
@@ -2221,11 +2288,14 @@ The following command-line flags are related to pushing metrics from VictoriaMet
|
||||
The `-pushmetrics.url` can be specified multiple times. In this case metrics are pushed to all the specified urls.
|
||||
The url can contain basic auth params in the form `http://user:pass@hostname/api/v1/import/prometheus`.
|
||||
Metrics are pushed to the provided `-pushmetrics.url` in a compressed form with `Content-Encoding: gzip` request header.
|
||||
This allows reducing the required network bandwidth for metrics push.
|
||||
* `-pushmetrics.extraLabel` - labels to add to all the metrics before sending them to `-pushmetrics.url`. Each label must be specified in the format `label="value"`.
|
||||
This allows reducing the required network bandwidth for metrics push. The compression can be disabled by passing `-pushmetrics.disableCompression` command-line flag.
|
||||
* `-pushmetrics.extraLabel` - labels to add to all the metrics before sending them to every `-pushmetrics.url`. Each label must be specified in the format `label="value"`.
|
||||
It is OK to specify multiple `-pushmetrics.extraLabel` command-line flags. In this case all the specified labels
|
||||
are added to all the metrics before sending them to all the configured `-pushmetrics.url` addresses.
|
||||
* `-pushmetrics.interval` - the interval between pushes. By default it is set to 10 seconds.
|
||||
* `-pushmetrics.header` - an optional HTTP header to send to every `-pushmetrics.url`. For example, `-pushmetrics.header='Authorization: Basic foo'` instructs to send
|
||||
`Authorization: Basic foo` HTTP header with every request to every `-pushmetrics.url`. It is possible to set multiple `-pushmetrics.header` command-line flags
|
||||
for sending multiple different HTTP headers to `-pushmetrics.url`.
|
||||
|
||||
For example, the following command instructs VictoriaMetrics to push metrics from `/metrics` page to `https://maas.victoriametrics.com/api/v1/import/prometheus`
|
||||
with `user:pass` [Basic auth](https://en.wikipedia.org/wiki/Basic_access_authentication). The `instance="foobar"` and `job="vm"` labels
|
||||
@@ -2259,7 +2329,7 @@ The following metrics for each type of cache are exported at [`/metrics` page](#
|
||||
* `vm_cache_misses_total` - the number of cache misses
|
||||
* `vm_cache_entries` - the number of entries in the cache
|
||||
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics-single-node/)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
contain `Caches` section with cache metrics visualized. The panels show the current
|
||||
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
|
||||
@@ -2317,7 +2387,7 @@ It is recommended disabling query cache with `-search.disableCache` command-line
|
||||
historical data with timestamps from the past, since the cache assumes that the data is written with
|
||||
the current timestamps. Query cache can be enabled after the backfilling is complete.
|
||||
|
||||
An alternative solution is to query [/internal/resetRollupResultCache](https://docs.victoriametrics.com/url-examples.html#internalresetRollupResultCache) handler after the backfilling is complete. This will reset the query cache, which could contain incomplete data cached during the backfilling.
|
||||
An alternative solution is to query [/internal/resetRollupResultCache](https://docs.victoriametrics.com/url-examples.html#internalresetrollupresultcache) handler after the backfilling is complete. This will reset the query cache, which could contain incomplete data cached during the backfilling.
|
||||
|
||||
Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
|
||||
for data with timestamps close to the current time. Single-node VictoriaMetrics automatically resets response
|
||||
@@ -2450,6 +2520,20 @@ Adhering `KISS` principle simplifies the resulting code and architecture, so it
|
||||
|
||||
Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
|
||||
|
||||
## Images in documentation
|
||||
|
||||
Please, keep image size and number of images per single page low. Keep the docs page as lightweight as possible.
|
||||
|
||||
If the page needs to have many images, consider using WEB-optimized image format [webp](https://developers.google.com/speed/webp).
|
||||
When adding a new doc with many images use `webp` format right away. Or use a Makefile command below to
|
||||
convert already existing images at `docs` folder automatically to `web` format:
|
||||
|
||||
```console
|
||||
make docs-images-to-webp
|
||||
```
|
||||
|
||||
Once conversion is done, update the path to images in your docs and verify everything is correct.
|
||||
|
||||
## VictoriaMetrics Logo
|
||||
|
||||
[Zip](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/VM_logo.zip) contains three folders with different image orientations (main color and inverted version).
|
||||
@@ -2487,6 +2571,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
```
|
||||
-bigMergeConcurrency int
|
||||
Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
|
||||
-blockcache.missesBeforeCaching int
|
||||
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
|
||||
-cacheExpireDuration duration
|
||||
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
||||
-configAuthKey string
|
||||
@@ -2494,7 +2580,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-csvTrimTimestamp duration
|
||||
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
The maximum size in bytes of a single DataDog POST request to /datadog/api/v2/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
@@ -2507,7 +2593,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-downsampling.period array
|
||||
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-dryRun
|
||||
Whether to check config files without running VictoriaMetrics. The following config files are checked: -promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag
|
||||
@@ -2541,6 +2627,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
@@ -2554,12 +2646,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8428")
|
||||
TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol (default ":8428")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760)
|
||||
-influx.databaseNames array
|
||||
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -2608,6 +2700,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 1000)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
@@ -2615,7 +2709,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration.
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
@@ -2628,8 +2722,13 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metrics.exposeMetadata
|
||||
Whether to expose TYPE and HELP metadata at the /metrics page, which is exposed at -httpListenAddr . The metadata may be needed when the /metrics page is consumed by systems, which require this information. For example, Managed Prometheus in Google Cloud - https://cloud.google.com/stackdriver/docs/managed-prometheus/troubleshooting#missing-metric-type
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-newrelic.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single NewRelic request to /newrelic/infra/v2/metrics/events/bulk
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
|
||||
-opentsdbHTTPListenAddr.useProxyProtocol
|
||||
@@ -2657,6 +2756,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
If non-empty, then the label with this name and the -promscrape.cluster.memberNum value is added to all the scraped metrics. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info
|
||||
-promscrape.cluster.memberNum string
|
||||
The number of vmagent instance in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name. See also -promscrape.cluster.memberLabel . See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default "0")
|
||||
-promscrape.cluster.memberURLTemplate string
|
||||
An optional template for URL to access vmagent instance with the given -promscrape.cluster.memberNum value. Every %d occurence in the template is substituted with -promscrape.cluster.memberNum at urls to vmagent instances responsible for scraping the given target at /service-discovery page. For example -promscrape.cluster.memberURLTemplate='http://vmagent-%d:8429/targets'. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more details
|
||||
-promscrape.cluster.membersCount int
|
||||
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default 1)
|
||||
-promscrape.cluster.name string
|
||||
@@ -2670,7 +2771,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-promscrape.config.strictParse
|
||||
Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields (default true)
|
||||
-promscrape.configCheckInterval duration
|
||||
Interval for checking for changes in '-promscrape.config' file. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
Interval for checking for changes in -promscrape.config file. By default, the checking is disabled. See how to reload -promscrape.config file at https://docs.victoriametrics.com/vmagent.html#configuration-update
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
@@ -2742,22 +2843,27 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The delay for suppressing repeated scrape errors logging per each scrape targets. This may be used for reducing the number of log lines related to scrape errors. See also -promscrape.suppressScrapeErrors
|
||||
-promscrape.yandexcloudSDCheckInterval duration
|
||||
Interval for checking for changes in Yandex Cloud API. This works only if yandexcloud_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#yandexcloud_sd_configs for details (default 30s)
|
||||
-pushmetrics.disableCompression
|
||||
Whether to disable request body compression when pushing metrics to every -pushmetrics.url
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Optional labels to add to metrics pushed to every -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.header array
|
||||
Optional HTTP request header to send to every -pushmetrics.url . For example, -pushmetrics.header='Authorization: Basic foobar' adds 'Authorization: Basic foobar' header to every request to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
Interval for pushing metrics to every -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-relabelConfig string
|
||||
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. The path can point either to local file or to http url. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
|
||||
-retentionFilter array
|
||||
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-retentionPeriod value
|
||||
Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. See also -retentionFilter
|
||||
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
|
||||
-retentionTimezoneOffset duration
|
||||
The offset for performing indexdb rotation. If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)
|
||||
-search.cacheTimestampOffset duration
|
||||
@@ -2773,12 +2879,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.latencyOffset duration
|
||||
The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
|
||||
-search.logQueryMemoryUsage size
|
||||
Log queries, which require more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
|
||||
Log query and increment vm_memory_intensive_queries_total metric each time the query requires more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.logSlowQueryDuration duration
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
|
||||
-search.maxConcurrentRequests int
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration and -search.maxMemoryPerQuery (default 8)
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration and -search.maxMemoryPerQuery (default 16)
|
||||
-search.maxExportDuration duration
|
||||
The maximum duration for /api/v1/export call (default 720h0m0s)
|
||||
-search.maxExportSeries int
|
||||
@@ -2797,7 +2903,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests . See also -search.logQueryMemoryUsage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.maxPointsPerTimeseries int
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph (default 30000)
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph. See also -search.maxResponseSeries (default 30000)
|
||||
-search.maxPointsSubqueryPerTimeseries int
|
||||
The maximum number of points per series, which can be generated by subquery. See https://valyala.medium.com/prometheus-subqueries-in-victoriametrics-9b1492b720b3 (default 100000)
|
||||
-search.maxQueryDuration duration
|
||||
@@ -2807,6 +2913,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
-search.maxQueueDuration duration
|
||||
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
|
||||
-search.maxResponseSeries int
|
||||
The maximum number of time series which can be returned from /api/v1/query and /api/v1/query_range . The limit is disabled if it equals to 0. See also -search.maxPointsPerTimeseries and -search.maxUniqueTimeseries
|
||||
-search.maxSamplesPerQuery int
|
||||
The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries (default 1000000000)
|
||||
-search.maxSamplesPerSeries int
|
||||
@@ -2832,9 +2940,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.maxUniqueTimeseries int
|
||||
The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage (default 300000)
|
||||
-search.maxWorkersPerQuery int
|
||||
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 4)
|
||||
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 16)
|
||||
-search.minStalenessInterval duration
|
||||
The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
|
||||
-search.minWindowForInstantRollupOptimization value
|
||||
Enable cache-based optimization for repeated queries to /api/v1/query (aka instant queries), which contain rollup functions with lookbehind window exceeding the given value
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 3h)
|
||||
-search.noStaleMarkers
|
||||
Set this flag to true if the database doesn't contain Prometheus stale markers, so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets
|
||||
-search.queryStats.lastQueriesCount int
|
||||
@@ -2861,7 +2972,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The timeout for creating new snapshot. If set, make sure that timeout is lower than backup period
|
||||
-snapshotsMaxAge value
|
||||
Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
|
||||
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-storage.cacheSizeIndexDBDataBlocks size
|
||||
|
||||
@@ -37,7 +37,6 @@ func main() {
|
||||
cgroup.SetGOGC(*gogc)
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
logger.Infof("starting VictoriaLogs at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
@@ -49,8 +48,10 @@ func main() {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started VictoriaLogs in %.3f seconds; see https://docs.victoriametrics.com/VictoriaLogs/", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
|
||||
startTime = time.Now()
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -48,7 +48,6 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if promscrape.IsDryRun() {
|
||||
*dryRun = true
|
||||
@@ -74,13 +73,16 @@ func main() {
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsert.Init()
|
||||
|
||||
startSelfScraper()
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started VictoriaMetrics in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
stopSelfScraper()
|
||||
|
||||
@@ -89,8 +91,8 @@ func main() {
|
||||
if err := httpserver.Stop(*httpListenAddr); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
vminsert.Stop()
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
vminsert.Stop()
|
||||
|
||||
vmstorage.Stop()
|
||||
vmselect.Stop()
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -54,15 +55,14 @@ var (
|
||||
)
|
||||
|
||||
type test struct {
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
ResultQuery Query `json:"result_query"`
|
||||
ResultQueryRange QueryRange `json:"result_query_range"`
|
||||
Issue string `json:"issue"`
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
ResultQuery Query `json:"result_query"`
|
||||
Issue string `json:"issue"`
|
||||
}
|
||||
|
||||
type Metric struct {
|
||||
@@ -80,42 +80,90 @@ type Series struct {
|
||||
Status string `json:"status"`
|
||||
Data []map[string]string `json:"data"`
|
||||
}
|
||||
|
||||
type Query struct {
|
||||
Status string `json:"status"`
|
||||
Data QueryData `json:"data"`
|
||||
}
|
||||
type QueryData struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []QueryDataResult `json:"result"`
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type QueryDataResult struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Value []interface{} `json:"value"`
|
||||
const rtVector, rtMatrix = "vector", "matrix"
|
||||
|
||||
func (q *Query) metrics() ([]Metric, error) {
|
||||
switch q.Data.ResultType {
|
||||
case rtVector:
|
||||
var r QueryInstant
|
||||
if err := json.Unmarshal(q.Data.Result, &r.Result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.metrics()
|
||||
case rtMatrix:
|
||||
var r QueryRange
|
||||
if err := json.Unmarshal(q.Data.Result, &r.Result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.metrics()
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown result type %q", q.Data.ResultType)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *QueryDataResult) UnmarshalJSON(b []byte) error {
|
||||
type plain QueryDataResult
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(r))
|
||||
type QueryInstant struct {
|
||||
Result []struct {
|
||||
Labels map[string]string `json:"metric"`
|
||||
TV [2]interface{} `json:"value"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
func (q QueryInstant) metrics() ([]Metric, error) {
|
||||
result := make([]Metric, len(q.Result))
|
||||
for i, res := range q.Result {
|
||||
f, err := strconv.ParseFloat(res.TV[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
|
||||
}
|
||||
var m Metric
|
||||
m.Metric = res.Labels
|
||||
m.Timestamps = append(m.Timestamps, int64(res.TV[0].(float64)))
|
||||
m.Values = append(m.Values, f)
|
||||
result[i] = m
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type QueryRange struct {
|
||||
Status string `json:"status"`
|
||||
Data QueryRangeData `json:"data"`
|
||||
}
|
||||
type QueryRangeData struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []QueryRangeDataResult `json:"result"`
|
||||
Result []struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Values [][]interface{} `json:"values"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
type QueryRangeDataResult struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Values [][]interface{} `json:"values"`
|
||||
func (q QueryRange) metrics() ([]Metric, error) {
|
||||
var result []Metric
|
||||
for i, res := range q.Result {
|
||||
var m Metric
|
||||
for _, tv := range res.Values {
|
||||
f, err := strconv.ParseFloat(tv[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, tv[1], err)
|
||||
}
|
||||
m.Values = append(m.Values, f)
|
||||
m.Timestamps = append(m.Timestamps, int64(tv[0].(float64)))
|
||||
}
|
||||
if len(m.Values) < 1 || len(m.Timestamps) < 1 {
|
||||
return nil, fmt.Errorf("metric %v contains no values", res)
|
||||
}
|
||||
m.Metric = q.Result[i].Metric
|
||||
result = append(result, m)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (r *QueryRangeDataResult) UnmarshalJSON(b []byte) error {
|
||||
type plain QueryRangeDataResult
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(r))
|
||||
func (q *Query) UnmarshalJSON(b []byte) error {
|
||||
type plain Query
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(q))
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
@@ -197,6 +245,9 @@ func TestWriteRead(t *testing.T) {
|
||||
func testWrite(t *testing.T) {
|
||||
t.Run("prometheus", func(t *testing.T) {
|
||||
for _, test := range readIn("prometheus", t, insertionTime) {
|
||||
if test.Data == nil {
|
||||
continue
|
||||
}
|
||||
s := newSuite(t)
|
||||
r := testutil.WriteRequest{}
|
||||
s.noError(json.Unmarshal([]byte(strings.Join(test.Data, "\n")), &r.Timeseries))
|
||||
@@ -272,17 +323,19 @@ func testRead(t *testing.T) {
|
||||
if err := checkSeriesResult(s, test.ResultSeries); err != nil {
|
||||
t.Fatalf("Series. %s fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
case strings.HasPrefix(q, "/api/v1/query_range"):
|
||||
queryResult := QueryRange{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryRangeResult(queryResult, test.ResultQueryRange); err != nil {
|
||||
t.Fatalf("Query Range. %s fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
case strings.HasPrefix(q, "/api/v1/query"):
|
||||
queryResult := Query{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryResult(queryResult, test.ResultQuery); err != nil {
|
||||
t.Fatalf("Query. %s fails with error: %s.%s", q, err, test.Issue)
|
||||
gotMetrics, err := queryResult.metrics()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse query response: %s", err)
|
||||
}
|
||||
expMetrics, err := test.ResultQuery.metrics()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse expected response: %s", err)
|
||||
}
|
||||
if err := checkMetricsResult(gotMetrics, expMetrics); err != nil {
|
||||
t.Fatalf("%q fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported read query %s", q)
|
||||
@@ -417,60 +470,6 @@ func removeIfFoundSeries(r map[string]string, contains []map[string]string) []ma
|
||||
return contains
|
||||
}
|
||||
|
||||
func checkQueryResult(got, want Query) error {
|
||||
if got.Status != want.Status {
|
||||
return fmt.Errorf("status mismatch %q - %q", want.Status, got.Status)
|
||||
}
|
||||
if got.Data.ResultType != want.Data.ResultType {
|
||||
return fmt.Errorf("result type mismatch %q - %q", want.Data.ResultType, got.Data.ResultType)
|
||||
}
|
||||
wantData := append([]QueryDataResult(nil), want.Data.Result...)
|
||||
for _, r := range got.Data.Result {
|
||||
wantData = removeIfFoundQueryData(r, wantData)
|
||||
}
|
||||
if len(wantData) > 0 {
|
||||
return fmt.Errorf("expected query result %+v not found in %+v", wantData, got.Data.Result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeIfFoundQueryData(r QueryDataResult, contains []QueryDataResult) []QueryDataResult {
|
||||
for i, item := range contains {
|
||||
if reflect.DeepEqual(r.Metric, item.Metric) && reflect.DeepEqual(r.Value[0], item.Value[0]) && reflect.DeepEqual(r.Value[1], item.Value[1]) {
|
||||
contains[i] = contains[len(contains)-1]
|
||||
return contains[:len(contains)-1]
|
||||
}
|
||||
}
|
||||
return contains
|
||||
}
|
||||
|
||||
func checkQueryRangeResult(got, want QueryRange) error {
|
||||
if got.Status != want.Status {
|
||||
return fmt.Errorf("status mismatch %q - %q", want.Status, got.Status)
|
||||
}
|
||||
if got.Data.ResultType != want.Data.ResultType {
|
||||
return fmt.Errorf("result type mismatch %q - %q", want.Data.ResultType, got.Data.ResultType)
|
||||
}
|
||||
wantData := append([]QueryRangeDataResult(nil), want.Data.Result...)
|
||||
for _, r := range got.Data.Result {
|
||||
wantData = removeIfFoundQueryRangeData(r, wantData)
|
||||
}
|
||||
if len(wantData) > 0 {
|
||||
return fmt.Errorf("expected query range result %+v not found in %+v", wantData, got.Data.Result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeIfFoundQueryRangeData(r QueryRangeDataResult, contains []QueryRangeDataResult) []QueryRangeDataResult {
|
||||
for i, item := range contains {
|
||||
if reflect.DeepEqual(r.Metric, item.Metric) && reflect.DeepEqual(r.Values, item.Values) {
|
||||
contains[i] = contains[len(contains)-1]
|
||||
return contains[:len(contains)-1]
|
||||
}
|
||||
}
|
||||
return contains
|
||||
}
|
||||
|
||||
type suite struct{ t *testing.T }
|
||||
|
||||
func newSuite(t *testing.T) *suite { return &suite{t: t} }
|
||||
|
||||
@@ -98,7 +98,7 @@ func addLabel(dst []prompb.Label, key, value string) []prompb.Label {
|
||||
dst = append(dst, prompb.Label{})
|
||||
}
|
||||
lb := &dst[len(dst)-1]
|
||||
lb.Name = bytesutil.ToUnsafeBytes(key)
|
||||
lb.Value = bytesutil.ToUnsafeBytes(value)
|
||||
lb.Name = key
|
||||
lb.Value = value
|
||||
return dst
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"not_nan_not_inf;item=y 3 {TIME_S-1m}",
|
||||
"not_nan_not_inf;item=y 1 {TIME_S-2m}"],
|
||||
"query": ["/api/v1/query_range?query=1/(not_nan_not_inf-1)!=inf!=nan&start={TIME_S-3m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"empty_label_match;foo=bar 2 {TIME_S-1m}",
|
||||
"empty_label_match;foo=baz 3 {TIME_S-1m}"],
|
||||
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S-1m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"max_lookback_set 4 {TIME_S-150s}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=max_lookback_set&start={TIME_S-150s}&end={TIME_S}&step=10s&max_lookback=1s"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"max_lookback_set"},"values":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"max_lookback_unset 4 {TIME_S-150s}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=max_lookback_unset&start={TIME_S-150s}&end={TIME_S}&step=10s"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"max_lookback_unset"},"values":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"not_nan_as_missing_data;item=y 3 {TIME_S-1m}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=not_nan_as_missing_data>1&start={TIME_S-2m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
12
app/victoria-metrics/testdata/prometheus/instant-matrix.json
vendored
Normal file
12
app/victoria-metrics/testdata/prometheus/instant-matrix.json
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "instant query with look-behind window",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"foo\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}]}]"],
|
||||
"query": ["/api/v1/query?query=foo[5m]"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"foo"},"values":[["{TIME_S-60s}", "1"]]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
11
app/victoria-metrics/testdata/prometheus/instant-scalar.json
vendored
Normal file
11
app/victoria-metrics/testdata/prometheus/instant-scalar.json
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"name": "instant scalar query",
|
||||
"query": ["/api/v1/query?query=42&time={TIME_S}"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"vector",
|
||||
"result":[{"metric":{},"value":["{TIME_S}", "42"]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
13
app/victoria-metrics/testdata/prometheus/issue-5553-too-big-lookback.json
vendored
Normal file
13
app/victoria-metrics/testdata/prometheus/issue-5553-too-big-lookback.json
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "too big look-behind window",
|
||||
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5553",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"foo\"},{\"name\":\"issue\",\"value\":\"5553\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}]}]"],
|
||||
"query": ["/api/v1/query?query=foo{issue=\"5553\"}[100y]"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"foo", "issue": "5553"},"values":[["{TIME_S-60s}", "1"]]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
18
app/victoria-metrics/testdata/prometheus/query-range.json
vendored
Normal file
18
app/victoria-metrics/testdata/prometheus/query-range.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "query range",
|
||||
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5553",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"bar\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}, {\"value\":2,\"timestamp\":\"{TIME_MS-120s}\"}, {\"value\":1,\"timestamp\":\"{TIME_MS-180s}\"}]}]"],
|
||||
"query": ["/api/v1/query_range?query=bar&step=30s&start={TIME_MS-180s}"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[
|
||||
{
|
||||
"metric":{"__name__":"bar"},
|
||||
"values":[["{TIME_S-180s}", "1"],["{TIME_S-150s}", "1"],["{TIME_S-120s}", "2"],["{TIME_S-90s}", "2"], ["{TIME_S-60s}", "1"], ["{TIME_S-30s}", "1"], ["{TIME_S}", "1"]]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -105,7 +105,7 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
if err != nil {
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s, stream fields: %s", n, err, cp.StreamFields)
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
@@ -120,10 +120,10 @@ func compressData(s string) string {
|
||||
var bb bytes.Buffer
|
||||
zw := gzip.NewWriter(&bb)
|
||||
if _, err := zw.Write([]byte(s)); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when compressing data: %s", err))
|
||||
panic(fmt.Errorf("unexpected error when compressing data: %w", err))
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err))
|
||||
panic(fmt.Errorf("unexpected error when closing gzip writer: %w", err))
|
||||
}
|
||||
return bb.String()
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
|
||||
r.Reset(dataBytes)
|
||||
_, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -29,7 +29,7 @@ func benchmarkParseJSONRequest(b *testing.B, streams, rows, labels int) {
|
||||
for pb.Next() {
|
||||
_, err := parseJSONRequest(data, func(timestamp int64, fields []logstorage.Field) {})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -84,7 +84,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
|
||||
|
||||
err = req.Unmarshal(bb.B)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse request body: %s", err)
|
||||
return 0, fmt.Errorf("cannot parse request body: %w", err)
|
||||
}
|
||||
|
||||
var commonFields []logstorage.Field
|
||||
@@ -97,7 +97,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
|
||||
// Labels are same for all entries in the stream.
|
||||
commonFields, err = parsePromLabels(commonFields[:0], stream.Labels)
|
||||
if err != nil {
|
||||
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %s", stream.Labels, err)
|
||||
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %w", stream.Labels, err)
|
||||
}
|
||||
fields := commonFields
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ func benchmarkParseProtobufRequest(b *testing.B, streams, rows, labels int) {
|
||||
for pb.Next() {
|
||||
_, err := parseProtobufRequest(body, func(timestamp int64, fields []logstorage.Field) {})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
{
|
||||
"files": {
|
||||
"main.css": "./static/css/main.9a224445.css",
|
||||
"main.js": "./static/js/main.02178f4b.js",
|
||||
"static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.957b90ab4cb4852eec26.md",
|
||||
"main.css": "./static/css/main.d1313636.css",
|
||||
"main.js": "./static/js/main.1919fefe.js",
|
||||
"static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
|
||||
"index.html": "./index.html"
|
||||
},
|
||||
"entrypoints": [
|
||||
"static/css/main.9a224445.css",
|
||||
"static/js/main.02178f4b.js"
|
||||
"static/css/main.d1313636.css",
|
||||
"static/js/main.1919fefe.js"
|
||||
]
|
||||
}
|
||||
@@ -1 +1 @@
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.02178f4b.js"></script><link href="./static/css/main.9a224445.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.1919fefe.js"></script><link href="./static/css/main.d1313636.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
File diff suppressed because one or more lines are too long
1
app/vlselect/vmui/static/css/main.d1313636.css
Normal file
1
app/vlselect/vmui/static/css/main.d1313636.css
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
2
app/vlselect/vmui/static/js/main.1919fefe.js
Normal file
2
app/vlselect/vmui/static/js/main.1919fefe.js
Normal file
File diff suppressed because one or more lines are too long
@@ -7,7 +7,7 @@
|
||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
||||
|
||||
/**
|
||||
* @remix-run/router v1.7.2
|
||||
* @remix-run/router v1.10.0
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router DOM v6.14.2
|
||||
* React Router DOM v6.17.0
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -29,7 +29,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router v6.14.2
|
||||
* React Router v6.17.0
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -1,11 +1,11 @@
|
||||
---
|
||||
sort: 14
|
||||
weight: 14
|
||||
sort: 23
|
||||
weight: 23
|
||||
title: MetricsQL
|
||||
menu:
|
||||
docs:
|
||||
parent: "victoriametrics"
|
||||
weight: 14
|
||||
parent: 'victoriametrics'
|
||||
weight: 23
|
||||
aliases:
|
||||
- /ExtendedPromQL.html
|
||||
- /MetricsQL.html
|
||||
@@ -21,7 +21,8 @@ However, there are some [intentional differences](https://medium.com/@romanhavro
|
||||
|
||||
[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/metricsql) can be used for parsing MetricsQL in external apps.
|
||||
|
||||
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
|
||||
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085)
|
||||
and introduction into [basic querying via MetricsQL](https://docs.victoriametrics.com/keyConcepts.html#metricsql).
|
||||
|
||||
The following functionality is implemented differently in MetricsQL compared to PromQL. This improves user experience:
|
||||
|
||||
@@ -109,7 +110,7 @@ The list of MetricsQL features on top of PromQL:
|
||||
* [histogram_quantile](#histogram_quantile) accepts optional third arg - `boundsLabel`.
|
||||
In this case it returns `lower` and `upper` bounds for the estimated percentile.
|
||||
See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
|
||||
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`.
|
||||
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`. See also [drop_empty_series](#drop_empty_series).
|
||||
* `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`.
|
||||
* `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`.
|
||||
* `WITH` templates. This feature simplifies writing and managing complex queries.
|
||||
@@ -531,7 +532,7 @@ See also [duration_over_time](#duration_over_time) and [lag](#lag).
|
||||
`mad_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation)
|
||||
over raw samples on the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
|
||||
|
||||
See also [mad](#mad) and [range_mad](#range_mad).
|
||||
See also [mad](#mad), [range_mad](#range_mad) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
|
||||
#### max_over_time
|
||||
|
||||
@@ -561,6 +562,18 @@ This function is supported by PromQL. See also [tmin_over_time](#tmin_over_time)
|
||||
for raw samples on the given lookbehind window `d`. It is calculated individually per each time series returned
|
||||
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). It is expected that raw sample values are discrete.
|
||||
|
||||
#### outlier_iqr_over_time
|
||||
|
||||
`outlier_iqr_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns the last sample on the given lookbehind window `d`
|
||||
if its value is either smaller than the `q25-1.5*iqr` or bigger than `q75+1.5*iqr` where:
|
||||
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) over raw samples on the lookbehind window `d`
|
||||
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) over raw samples on the lookbehind window `d`.
|
||||
|
||||
The `outlier_iqr_over_time()` is useful for detecting anomalies in gauge values based on the previous history of values.
|
||||
For example, `outlier_iqr_over_time(memory_usage_bytes[1h])` triggers when `memory_usage_bytes` suddenly goes outside the usual value range for the last 24 hours.
|
||||
|
||||
See also [outliers_iqr](#outliers_iqr).
|
||||
|
||||
#### predict_linear
|
||||
|
||||
`predict_linear(series_selector[d], t)` is a [rollup function](#rollup-functions), which calculates the value `t` seconds in the future using
|
||||
@@ -865,7 +878,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
|
||||
|
||||
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore).
|
||||
See also [zscore](#zscore), [range_trim_zscore](#range_trim_zscore) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
|
||||
|
||||
### Transform functions
|
||||
@@ -1055,6 +1068,17 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
||||
|
||||
This function is supported by PromQL. See also [rad](#rad).
|
||||
|
||||
#### drop_empty_series
|
||||
|
||||
`drop_empty_series(q)` is a [transform function](#transform-functions), which drops empty series from `q`.
|
||||
|
||||
This function can be used when `default` operator should be applied only to non-empty series. For example,
|
||||
`drop_empty_series(temperature < 30) default 42` returns series, which have at least a single sample smaller than 30 on the selected time range,
|
||||
while filling gaps in the returned series with 42.
|
||||
|
||||
On the other hand `(temperature < 30) default 40` returns all the `temperature` series, even if they have no samples smaller than 30,
|
||||
by replacing all the values bigger or equal to 30 with 40.
|
||||
|
||||
#### end
|
||||
|
||||
`end()` is a [transform function](#transform-functions), which returns the unix timestamp in seconds for the last point.
|
||||
@@ -1591,7 +1615,7 @@ which maps `label` values from `src_*` to `dst*` for all the time series returne
|
||||
which drops time series from `q` with `label` not matching the given `regexp`.
|
||||
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
|
||||
|
||||
See also [label_mismatch](#label_mismatch).
|
||||
See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
|
||||
|
||||
#### label_mismatch
|
||||
|
||||
@@ -1599,7 +1623,7 @@ See also [label_mismatch](#label_mismatch).
|
||||
which drops time series from `q` with `label` matching the given `regexp`.
|
||||
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
|
||||
|
||||
See also [label_match](#label_match).
|
||||
See also [label_match](#label_match) and [labels_equal](#labels_equal).
|
||||
|
||||
#### label_move
|
||||
|
||||
@@ -1642,23 +1666,30 @@ for the given `label` for every time series returned by `q`.
|
||||
For example, if `label_value(foo, "bar")` is applied to `foo{bar="1.234"}`, then it will return a time series
|
||||
`foo{bar="1.234"}` with `1.234` value. Function will return no data for non-numeric label values.
|
||||
|
||||
#### labels_equal
|
||||
|
||||
`labels_equal(q, "label1", "label2", ...)` is [label manipulation function](#label-manipulation-functions), which returns `q` series with identical values for the listed labels
|
||||
"label1", "label2", etc.
|
||||
|
||||
See also [label_match](#label_match) and [label_mismatch](#label_mismatch).
|
||||
|
||||
#### sort_by_label
|
||||
|
||||
`sort_by_label(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
|
||||
`sort_by_label(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
|
||||
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
|
||||
|
||||
See also [sort_by_label_desc](#sort_by_label_desc) and [sort_by_label_numeric](#sort_by_label_numeric).
|
||||
|
||||
#### sort_by_label_desc
|
||||
|
||||
`sort_by_label_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
|
||||
`sort_by_label_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
|
||||
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
|
||||
|
||||
See also [sort_by_label](#sort_by_label) and [sort_by_label_numeric_desc](#sort_by_label_numeric_desc).
|
||||
|
||||
#### sort_by_label_numeric
|
||||
|
||||
`sort_by_label_numeric(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
|
||||
`sort_by_label_numeric(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
|
||||
using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
|
||||
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")` would return series
|
||||
in the following order of `bar` label values: `1`, `2`, `15` and `101`.
|
||||
@@ -1667,7 +1698,7 @@ See also [sort_by_label_numeric_desc](#sort_by_label_numeric_desc) and [sort_by_
|
||||
|
||||
#### sort_by_label_numeric_desc
|
||||
|
||||
`sort_by_label_numeric_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
|
||||
`sort_by_label_numeric_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
|
||||
by the given set of labels using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
|
||||
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")`
|
||||
would return series in the following order of `bar` label values: `101`, `15`, `2` and `1`.
|
||||
@@ -1839,20 +1870,33 @@ This function is supported by PromQL.
|
||||
`mode(q) by (group_labels)` is [aggregate function](#aggregate-functions), which returns [mode](https://en.wikipedia.org/wiki/Mode_(statistics))
|
||||
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
|
||||
|
||||
#### outliers_iqr
|
||||
|
||||
`outliers_iqr(q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least a single point
|
||||
outside e.g. [Interquartile range outlier bounds](https://en.wikipedia.org/wiki/Interquartile_range) `[q25-1.5*iqr .. q75+1.5*iqr]`
|
||||
comparing to other time series at the given point, where:
|
||||
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) calculated independently per each point on the graph across `q` series.
|
||||
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) calculated independently per each point on the graph across `q` series.
|
||||
|
||||
The `outliers_iqr()` is useful for detecting anomalous series in the group of series. For example, `outliers_iqr(temperature) by (country)` returns
|
||||
per-country series with anomalous outlier values comparing to the rest of per-country series.
|
||||
|
||||
See also [outliers_mad](#outliers_mad), [outliersk](#outliersk) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
|
||||
#### outliers_mad
|
||||
|
||||
`outliers_mad(tolerance, q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least
|
||||
a single point outside [Median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) (aka MAD) multiplied by `tolerance`.
|
||||
E.g. it returns time series with at least a single point below `median(q) - mad(q)` or a single point above `median(q) + mad(q)`.
|
||||
|
||||
See also [outliersk](#outliersk) and [mad](#mad).
|
||||
See also [outliers_iqr](#outliers_iqr), [outliersk](#outliersk) and [mad](#mad).
|
||||
|
||||
#### outliersk
|
||||
|
||||
`outliersk(k, q)` is [aggregate function](#aggregate-functions), which returns up to `k` time series with the biggest standard deviation (aka outliers)
|
||||
out of time series returned by `q`.
|
||||
|
||||
See also [outliers_mad](#outliers_mad).
|
||||
See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad).
|
||||
|
||||
#### quantile
|
||||
|
||||
@@ -1972,7 +2016,7 @@ See also [bottomk_min](#bottomk_min).
|
||||
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
|
||||
This function is useful for detecting anomalies in the group of related time series.
|
||||
|
||||
See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore).
|
||||
See also [zscore_over_time](#zscore_over_time), [range_trim_zscore](#range_trim_zscore) and [outliers_iqr](#outliers_iqr).
|
||||
|
||||
## Subqueries
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -65,7 +65,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package datadog
|
||||
package datadogv1
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
@@ -8,33 +8,32 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadog"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadog"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadog"}`)
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv1"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv1"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv1"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, func(series []datadog.Series) error {
|
||||
return stream.Parse(req.Body, ce, func(series []datadogv1.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmarshal.Label) error {
|
||||
func insertRows(at *auth.Token, series []datadogv1.Series, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -63,7 +62,7 @@ func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmar
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadog.SplitTag(tag)
|
||||
name, value := datadogutils.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
@@ -88,7 +87,9 @@ func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmar
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
102
app/vmagent/datadogv2/request_handler.go
Normal file
102
app/vmagent/datadogv2/request_handler.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package datadogv2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv2"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv2"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv2"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v2/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ct := req.Header.Get("Content-Type")
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, ct, func(series []datadogv2.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadogv2.Series, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range series {
|
||||
ss := &series[i]
|
||||
rowsTotal += len(ss.Points)
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: ss.Metric,
|
||||
})
|
||||
for _, rs := range ss.Resources {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: rs.Type,
|
||||
Value: rs.Name,
|
||||
})
|
||||
}
|
||||
if ss.SourceTypeName != "" {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "source_type_name",
|
||||
Value: ss.SourceTypeName,
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadogutils.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for _, pt := range ss.Points {
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Timestamp: pt.Timestamp * 1000,
|
||||
Value: pt.Value,
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
|
||||
@@ -20,10 +21,12 @@ var (
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return stream.Parse(r, insertRows)
|
||||
return stream.Parse(r, false, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -56,7 +59,9 @@ func insertRows(rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -36,9 +36,9 @@ var (
|
||||
// InsertHandlerForReader processes remote write for influx line protocol.
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, isGzipped bool) error {
|
||||
return stream.Parse(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
return insertRows(at, db, rows, nil)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -130,7 +130,9 @@ func insertRows(at *auth.Token, db string, rows []parser.Row, extraLabels []prom
|
||||
ctx.ctx.Labels = labels
|
||||
ctx.ctx.Samples = samples
|
||||
ctx.commonLabels = commonLabels
|
||||
remotewrite.Push(at, &ctx.ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -11,10 +11,9 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
|
||||
@@ -43,12 +42,13 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -97,7 +97,6 @@ func main() {
|
||||
remotewrite.InitSecretFlags()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if promscrape.IsDryRun() {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
@@ -126,7 +125,7 @@ func main() {
|
||||
common.StartUnmarshalWorkers()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
return influx.InsertHandlerForReader(nil, r, false)
|
||||
})
|
||||
}
|
||||
if len(*graphiteListenAddr) > 0 {
|
||||
@@ -141,15 +140,17 @@ func main() {
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(remotewrite.Push)
|
||||
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
|
||||
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
}
|
||||
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime = time.Now()
|
||||
if len(*httpListenAddr) > 0 {
|
||||
@@ -346,9 +347,20 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/v1/series":
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -572,9 +584,19 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/v1/series":
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -632,8 +654,11 @@ var (
|
||||
|
||||
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
|
||||
|
||||
datadogWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogv1WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogv1WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
|
||||
datadogv2WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
datadogv2WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
|
||||
datadogValidateRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/validate", protocol="datadog"}`)
|
||||
datadogCheckRunRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/check_run", protocol="datadog"}`)
|
||||
|
||||
@@ -84,6 +84,8 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompbmarshal
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -76,7 +76,9 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompbmarshal
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
|
||||
@@ -59,7 +59,9 @@ func insertRows(at *auth.Token, tss []prompbmarshal.TimeSeries, extraLabels []pr
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -56,7 +56,9 @@ func insertRows(rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(nil, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -64,7 +64,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -73,7 +73,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
@@ -48,8 +47,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
|
||||
for i := range ts.Labels {
|
||||
label := &ts.Labels[i]
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(label.Name),
|
||||
Value: bytesutil.ToUnsafeString(label.Value),
|
||||
Name: label.Name,
|
||||
Value: label.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
@@ -69,7 +68,9 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -58,8 +58,10 @@ var (
|
||||
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("remoteWrite.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
|
||||
awsUseSigv4 = flagutil.NewArrayBool("remoteWrite.aws.useSigv4", "Enables SigV4 request signing for the corresponding -remoteWrite.url. "+
|
||||
"It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled")
|
||||
@@ -106,12 +108,15 @@ type client struct {
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
authCfg, err := getAuthConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tlsCfg, err := authCfg.NewTLSConfig()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize tls config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tlsCfg := authCfg.NewTLSConfig()
|
||||
awsCfg, err := getAWSAPIConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("FATAL: cannot initialize AWS Config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Fatalf("cannot initialize AWS Config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tr := &http.Transport{
|
||||
DialContext: statDial,
|
||||
@@ -231,10 +236,16 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
clientSecret := oauth2ClientSecret.GetOptionalArg(argIdx)
|
||||
clientSecretFile := oauth2ClientSecretFile.GetOptionalArg(argIdx)
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(argIdx)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
oauth2Cfg = &promauth.OAuth2Config{
|
||||
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
|
||||
}
|
||||
@@ -302,7 +313,7 @@ func (c *client) runWorker() {
|
||||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
@@ -311,11 +322,11 @@ func (c *client) runWorker() {
|
||||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -328,15 +339,25 @@ func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil && errors.Is(err, io.EOF) {
|
||||
// it is likely connection became stale.
|
||||
// So we do one more attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, _ = c.newRequest(url, body)
|
||||
resp, err = c.hc.Do(req)
|
||||
if err == nil {
|
||||
return resp, nil
|
||||
}
|
||||
return resp, err
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return nil, err
|
||||
}
|
||||
// It is likely connection became stale or timed out during the first request.
|
||||
// Make another attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, err = c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
@@ -362,8 +383,7 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
if c.awsCfg != nil {
|
||||
sigv4Hash := awsapi.HashHex(body)
|
||||
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
|
||||
// there is no need in retry, request will be rejected by client.Do and retried by code below
|
||||
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
|
||||
return nil, fmt.Errorf("cannot sign remoteWrite request with AWS sigv4: %w", err)
|
||||
}
|
||||
}
|
||||
return req, nil
|
||||
|
||||
@@ -37,9 +37,9 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(pushBlock func(block []byte), isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.fq = fq
|
||||
ps.wr.isVMRemoteWrite = isVMRemoteWrite
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
@@ -57,10 +57,11 @@ func (ps *pendingSeries) MustStop() {
|
||||
ps.periodicFlusherWG.Wait()
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
|
||||
func (ps *pendingSeries) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
ps.mu.Lock()
|
||||
ps.wr.push(tss)
|
||||
ok := ps.wr.tryPush(tss)
|
||||
ps.mu.Unlock()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) periodicFlusher() {
|
||||
@@ -70,18 +71,20 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
}
|
||||
ticker := time.NewTicker(*flushInterval)
|
||||
defer ticker.Stop()
|
||||
mustStop := false
|
||||
for !mustStop {
|
||||
for {
|
||||
select {
|
||||
case <-ps.stopCh:
|
||||
mustStop = true
|
||||
ps.mu.Lock()
|
||||
ps.wr.mustFlushOnStop()
|
||||
ps.mu.Unlock()
|
||||
return
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
ps.mu.Lock()
|
||||
ps.wr.flush()
|
||||
_ = ps.wr.tryFlush()
|
||||
ps.mu.Unlock()
|
||||
}
|
||||
}
|
||||
@@ -90,16 +93,16 @@ type writeRequest struct {
|
||||
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
|
||||
lastFlushTime uint64
|
||||
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
// The queue to send blocks to.
|
||||
fq *persistentqueue.FastQueue
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite bool
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
// How many significant figures must be left before sending the writeRequest to fq.
|
||||
significantFigures int
|
||||
|
||||
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
|
||||
// How many decimal digits after point must be left before sending the writeRequest to fq.
|
||||
roundDigits int
|
||||
|
||||
wr prompbmarshal.WriteRequest
|
||||
@@ -112,7 +115,7 @@ type writeRequest struct {
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset lastFlushTime, pushBlock, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
// Do not reset lastFlushTime, fq, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
@@ -130,23 +133,40 @@ func (wr *writeRequest) reset() {
|
||||
wr.buf = wr.buf[:0]
|
||||
}
|
||||
|
||||
func (wr *writeRequest) flush() {
|
||||
// mustFlushOnStop force pushes wr data into wr.fq
|
||||
//
|
||||
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
|
||||
func (wr *writeRequest) mustFlushOnStop() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock, wr.isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(&wr.wr, wr.mustWriteBlock, wr.isVMRemoteWrite) {
|
||||
logger.Panicf("BUG: final flush must always return true")
|
||||
}
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
func (wr *writeRequest) adjustSampleValues() {
|
||||
samples := wr.samples
|
||||
if n := wr.significantFigures; n > 0 {
|
||||
func (wr *writeRequest) mustWriteBlock(block []byte) bool {
|
||||
wr.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryFlush() bool {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
if !tryPushWriteRequest(&wr.wr, wr.fq.TryWriteBlock, wr.isVMRemoteWrite) {
|
||||
return false
|
||||
}
|
||||
wr.reset()
|
||||
return true
|
||||
}
|
||||
|
||||
func adjustSampleValues(samples []prompbmarshal.Sample, significantFigures, roundDigits int) {
|
||||
if n := significantFigures; n > 0 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
|
||||
}
|
||||
}
|
||||
if n := wr.roundDigits; n < 100 {
|
||||
if n := roundDigits; n < 100 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
|
||||
@@ -154,21 +174,27 @@ func (wr *writeRequest) adjustSampleValues() {
|
||||
}
|
||||
}
|
||||
|
||||
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
|
||||
func (wr *writeRequest) tryPush(src []prompbmarshal.TimeSeries) bool {
|
||||
tssDst := wr.tss
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
for i := range src {
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
|
||||
if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
|
||||
wr.tss = tssDst
|
||||
wr.flush()
|
||||
if !wr.tryFlush() {
|
||||
return false
|
||||
}
|
||||
tssDst = wr.tss
|
||||
}
|
||||
tsSrc := &src[i]
|
||||
adjustSampleValues(tsSrc.Samples, wr.significantFigures, wr.roundDigits)
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], tsSrc)
|
||||
}
|
||||
|
||||
wr.tss = tssDst
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
@@ -196,13 +222,13 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
wr.buf = buf
|
||||
}
|
||||
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte), isVMRemoteWrite bool) {
|
||||
func tryPushWriteRequest(wr *prompbmarshal.WriteRequest, tryPushBlock func(block []byte) bool, isVMRemoteWrite bool) bool {
|
||||
if len(wr.Timeseries) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
return true
|
||||
}
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
bb.B = wr.MarshalProtobuf(bb.B[:0])
|
||||
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
|
||||
zb := snappyBufPool.Get()
|
||||
if isVMRemoteWrite {
|
||||
@@ -212,11 +238,13 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
}
|
||||
writeRequestBufPool.Put(bb)
|
||||
if len(zb.B) <= persistentqueue.MaxBlockSize {
|
||||
pushBlock(zb.B)
|
||||
if !tryPushBlock(zb.B) {
|
||||
return false
|
||||
}
|
||||
blockSizeRows.Update(float64(len(wr.Timeseries)))
|
||||
blockSizeBytes.Update(float64(len(zb.B)))
|
||||
snappyBufPool.Put(zb)
|
||||
return
|
||||
return true
|
||||
}
|
||||
snappyBufPool.Put(zb)
|
||||
} else {
|
||||
@@ -229,23 +257,36 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
samples := wr.Timeseries[0].Samples
|
||||
if len(samples) == 1 {
|
||||
logger.Warnf("dropping a sample for metric with too long labels exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
|
||||
return
|
||||
return true
|
||||
}
|
||||
n := len(samples) / 2
|
||||
wr.Timeseries[0].Samples = samples[:n]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
wr.Timeseries[0].Samples = samples[n:]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return
|
||||
return true
|
||||
}
|
||||
timeseries := wr.Timeseries
|
||||
n := len(timeseries) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries[n:]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
|
||||
@@ -26,13 +26,16 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
t.Helper()
|
||||
wr := newTestWriteRequest(rowsCount, 20)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
pushBlock := func(block []byte) bool {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
return true
|
||||
}
|
||||
if !tryPushWriteRequest(wr, pushBlock, isVMRemoteWrite) {
|
||||
t.Fatalf("cannot push data to to remote storage")
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
|
||||
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
|
||||
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
|
||||
@@ -40,7 +43,7 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
}
|
||||
|
||||
// Check Prometheus remote write
|
||||
f(false, expectedBlockLenProm, 0)
|
||||
f(false, expectedBlockLenProm, 3)
|
||||
|
||||
// Check VictoriaMetrics remote write
|
||||
f(true, expectedBlockLenVM, 15)
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/klauspost/compress/s2"
|
||||
)
|
||||
@@ -22,7 +21,7 @@ func benchmarkCompressWriteRequest(b *testing.B, compressFunc func(dst, src []by
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
b.Run(fmt.Sprintf("rows_%d", rowsCount), func(b *testing.B) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
data := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(rowsCount))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
|
||||
@@ -3,6 +3,7 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
@@ -92,6 +93,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
rctx.reset()
|
||||
tssDst := tss[:0]
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
@@ -120,6 +122,7 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
||||
if len(extraLabels) == 0 {
|
||||
return
|
||||
}
|
||||
rctx.reset()
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
@@ -139,6 +142,34 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) tenantToLabels(tss []prompbmarshal.TimeSeries, accountID, projectID uint32) {
|
||||
rctx.reset()
|
||||
accountIDStr := strconv.FormatUint(uint64(accountID), 10)
|
||||
projectIDStr := strconv.FormatUint(uint64(projectID), 10)
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labelsLen := len(labels)
|
||||
for _, label := range ts.Labels {
|
||||
labelName := label.Name
|
||||
if labelName == "vm_account_id" || labelName == "vm_project_id" {
|
||||
continue
|
||||
}
|
||||
labels = append(labels, label)
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_account_id",
|
||||
Value: accountIDStr,
|
||||
})
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_project_id",
|
||||
Value: projectIDStr,
|
||||
})
|
||||
ts.Labels = labels[labelsLen:]
|
||||
}
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
type relabelCtx struct {
|
||||
// pool for labels, which are used during the relabeling.
|
||||
labels []prompbmarshal.Label
|
||||
@@ -160,7 +191,7 @@ func getRelabelCtx() *relabelCtx {
|
||||
}
|
||||
|
||||
func putRelabelCtx(rctx *relabelCtx) {
|
||||
rctx.labels = rctx.labels[:0]
|
||||
rctx.reset()
|
||||
relabelCtxPool.Put(rctx)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,14 +3,16 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
@@ -24,6 +26,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -34,17 +37,22 @@ var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
|
||||
"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. "+
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set. "+
|
||||
"See also -remoteWrite.multitenantURL")
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set")
|
||||
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. "+
|
||||
"This flag is deprecated in favor of -enableMultitenantHandlers . See https://docs.victoriametrics.com/vmagent.html#multitenancy")
|
||||
enableMultitenantHandlers = flag.Bool("enableMultitenantHandlers", false, "Whether to process incoming data via multitenant insert handlers according to "+
|
||||
"https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format . By default incoming data is processed via single-node insert handlers "+
|
||||
"according to https://docs.victoriametrics.com/#how-to-import-time-series-data ."+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details")
|
||||
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
|
||||
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
shardByURLLabels = flag.String("remoteWrite.shardByURL.labels", "", "Comma-separated list of label names for sharding across all the -remoteWrite.url. All labels of timeseries are used by default. "+
|
||||
"See also -remoteWrite.shardByURL and https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
|
||||
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
@@ -83,6 +91,11 @@ var (
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before being aggregated. "+
|
||||
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
|
||||
disableOnDiskQueue = flag.Bool("remoteWrite.disableOnDiskQueue", false, "Whether to disable storing pending data to -remoteWrite.tmpDataPath "+
|
||||
"when the configured remote storage systems cannot keep up with the data ingestion rate. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence ."+
|
||||
"See also -remoteWrite.dropSamplesOnOverload")
|
||||
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
||||
"cannot be pushed into the configured remote storage systems in a timely manner. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -96,12 +109,18 @@ var (
|
||||
// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
|
||||
defaultAuthToken = &auth.Token{}
|
||||
|
||||
shardLabelsFilter map[string]struct{}
|
||||
// ErrQueueFullHTTPRetry must be returned when TryPush() returns false.
|
||||
ErrQueueFullHTTPRetry = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("remote storage systems cannot keep up with the data ingestion rate; retry the request later " +
|
||||
"or remove -remoteWrite.disableOnDiskQueue from vmagent command-line flags, so it could save pending data to -remoteWrite.tmpDataPath; " +
|
||||
"see https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence"),
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
}
|
||||
)
|
||||
|
||||
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
|
||||
// MultitenancyEnabled returns true if -enableMultitenantHandlers or -remoteWrite.multitenantURL is specified.
|
||||
func MultitenancyEnabled() bool {
|
||||
return len(*remoteWriteMultitenantURLs) > 0
|
||||
return *enableMultitenantHandlers || len(*remoteWriteMultitenantURLs) > 0
|
||||
}
|
||||
|
||||
// Contains the current relabelConfigs.
|
||||
@@ -121,6 +140,8 @@ func InitSecretFlags() {
|
||||
}
|
||||
}
|
||||
|
||||
var shardByURLLabelsMap map[string]struct{}
|
||||
|
||||
// Init initializes remotewrite.
|
||||
//
|
||||
// It must be called after flag.Parse().
|
||||
@@ -157,6 +178,13 @@ func Init() {
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
if len(*shardByURLLabels) > 0 {
|
||||
m := make(map[string]struct{}, len(*shardByURLLabels))
|
||||
for _, label := range *shardByURLLabels {
|
||||
m[label] = struct{}{}
|
||||
}
|
||||
shardByURLLabelsMap = m
|
||||
}
|
||||
initLabelsGlobal()
|
||||
|
||||
// Register SIGHUP handler for config reload before loadRelabelConfigs.
|
||||
@@ -175,12 +203,7 @@ func Init() {
|
||||
if len(*remoteWriteURLs) > 0 {
|
||||
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
|
||||
}
|
||||
|
||||
if *shardByURLLabels != "" {
|
||||
for _, label := range strings.Split(*shardByURLLabels, ",") {
|
||||
shardLabelsFilter[strings.TrimSpace(label)] = struct{}{}
|
||||
}
|
||||
}
|
||||
dropDanglingQueues()
|
||||
|
||||
// Start config reloader.
|
||||
configReloaderWG.Add(1)
|
||||
@@ -198,6 +221,42 @@ func Init() {
|
||||
}()
|
||||
}
|
||||
|
||||
func dropDanglingQueues() {
|
||||
if *keepDanglingQueues {
|
||||
return
|
||||
}
|
||||
if len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Do not drop dangling queues for *remoteWriteMultitenantURLs, since it is impossible to determine
|
||||
// unused queues for multitenant urls - they are created on demand when new sample for the given
|
||||
// tenant is pushed to remote storage.
|
||||
return
|
||||
}
|
||||
// Remove dangling persistent queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
//
|
||||
existingQueues := make(map[string]struct{}, len(rwctxsDefault))
|
||||
for _, rwctx := range rwctxsDefault {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxsDefault))
|
||||
}
|
||||
}
|
||||
|
||||
func reloadRelabelConfigs() {
|
||||
relabelConfigReloads.Inc()
|
||||
logger.Infof("reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
|
||||
@@ -217,7 +276,7 @@ func reloadRelabelConfigs() {
|
||||
var (
|
||||
relabelConfigReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
|
||||
relabelConfigReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
|
||||
relabelConfigSuccess = metrics.NewCounter(`vmagent_relabel_config_last_reload_successful`)
|
||||
relabelConfigSuccess = metrics.NewGauge(`vmagent_relabel_config_last_reload_successful`, nil)
|
||||
relabelConfigTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
@@ -271,33 +330,6 @@ func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
|
||||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
}
|
||||
|
||||
if !*keepDanglingQueues {
|
||||
// Remove dangling queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
existingQueues := make(map[string]struct{}, len(rwctxs))
|
||||
for _, rwctx := range rwctxs {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxs))
|
||||
}
|
||||
}
|
||||
|
||||
return rwctxs
|
||||
}
|
||||
|
||||
@@ -306,7 +338,7 @@ var configReloaderWG sync.WaitGroup
|
||||
|
||||
// Stop stops remotewrite.
|
||||
//
|
||||
// It is expected that nobody calls Push during and after the call to this func.
|
||||
// It is expected that nobody calls TryPush during and after the call to this func.
|
||||
func Stop() {
|
||||
close(configReloaderStopCh)
|
||||
configReloaderWG.Wait()
|
||||
@@ -316,7 +348,7 @@ func Stop() {
|
||||
}
|
||||
rwctxsDefault = nil
|
||||
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call Push during the Stop call.
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call TryPush during the Stop call.
|
||||
for _, rwctxs := range rwctxsMap {
|
||||
for _, rwctx := range rwctxs {
|
||||
rwctx.MustStop()
|
||||
@@ -332,24 +364,47 @@ func Stop() {
|
||||
}
|
||||
}
|
||||
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured `-remoteWrite.url`.
|
||||
// If at isn't nil, the data is pushed to the configured `-remoteWrite.multitenantURL`.
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// Note that wr may be modified by Push because of relabeling and rounding.
|
||||
func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
|
||||
// PushDropSamplesOnFailure can modify wr contents.
|
||||
func PushDropSamplesOnFailure(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
_ = tryPush(at, wr, true)
|
||||
}
|
||||
|
||||
// TryPush tries sending wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// TryPush can modify wr contents, so the caller must re-initialize wr before calling TryPush() after unsuccessful attempt.
|
||||
// TryPush may send partial data from wr on unsuccessful attempt, so repeated call for the same wr may send the data multiple times.
|
||||
//
|
||||
// The caller must return ErrQueueFullHTTPRetry to the client, which sends wr, if TryPush returns false.
|
||||
func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
|
||||
return tryPush(at, wr, *dropSamplesOnOverload)
|
||||
}
|
||||
|
||||
func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
|
||||
tss := wr.Timeseries
|
||||
|
||||
if at == nil && MultitenancyEnabled() {
|
||||
// Write data to default tenant if at isn't set when multitenancy is enabled.
|
||||
at = defaultAuthToken
|
||||
}
|
||||
|
||||
var tenantRctx *relabelCtx
|
||||
var rwctxs []*remoteWriteCtx
|
||||
if at == nil {
|
||||
rwctxs = rwctxsDefault
|
||||
} else if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
// Convert at to (vm_account_id, vm_project_id) labels.
|
||||
tenantRctx = getRelabelCtx()
|
||||
defer putRelabelCtx(tenantRctx)
|
||||
rwctxs = rwctxsDefault
|
||||
} else {
|
||||
if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
logger.Panicf("BUG: -remoteWrite.multitenantURL command-line flag must be set when __tenant_id__=%q label is set", at)
|
||||
}
|
||||
rwctxsMapLock.Lock()
|
||||
tenantID := tenantmetrics.TenantID{
|
||||
AccountID: at.AccountID,
|
||||
@@ -363,18 +418,37 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
rwctxsMapLock.Unlock()
|
||||
}
|
||||
|
||||
rowsCount := getRowsCount(tss)
|
||||
|
||||
if *disableOnDiskQueue {
|
||||
// Quick check whether writes to configured remote storage systems are blocked.
|
||||
// This allows saving CPU time spent on relabeling and block compression
|
||||
// if some of remote storage systems cannot keep up with the data ingestion rate.
|
||||
for _, rwctx := range rwctxs {
|
||||
if rwctx.fq.IsWriteBlocked() {
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
// Just drop samples
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var rctx *relabelCtx
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcsGlobal := rcs.global
|
||||
if pcsGlobal.Len() > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
defer putRelabelCtx(rctx)
|
||||
}
|
||||
tss := wr.Timeseries
|
||||
rowsCount := getRowsCount(tss)
|
||||
globalRowsPushedBeforeRelabel.Add(rowsCount)
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
|
||||
for len(tss) > 0 {
|
||||
// Process big tss in smaller blocks in order to reduce the maximum memory usage
|
||||
samplesCount := 0
|
||||
@@ -382,7 +456,7 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
i := 0
|
||||
for i < len(tss) {
|
||||
samplesCount += len(tss[i].Samples)
|
||||
labelsCount += len(tss[i].Labels)
|
||||
labelsCount += len(tss[i].Samples) * len(tss[i].Labels)
|
||||
i++
|
||||
if samplesCount >= maxSamplesPerBlock || labelsCount >= maxLabelsPerBlock {
|
||||
break
|
||||
@@ -395,6 +469,9 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
} else {
|
||||
tss = nil
|
||||
}
|
||||
if tenantRctx != nil {
|
||||
tenantRctx.tenantToLabels(tssBlock, at.AccountID, at.ProjectID)
|
||||
}
|
||||
if rctx != nil {
|
||||
rowsCountBeforeRelabel := getRowsCount(tssBlock)
|
||||
tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal)
|
||||
@@ -403,25 +480,35 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
}
|
||||
sortLabelsIfNeeded(tssBlock)
|
||||
tssBlock = limitSeriesCardinality(tssBlock)
|
||||
pushBlockToRemoteStorages(rwctxs, tssBlock)
|
||||
if rctx != nil {
|
||||
rctx.reset()
|
||||
if !tryPushBlockToRemoteStorages(rwctxs, tssBlock) {
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushBlockToRemoteStorages must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
if rctx != nil {
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) {
|
||||
var (
|
||||
samplesDropped = metrics.NewCounter(`vmagent_remotewrite_samples_dropped_total`)
|
||||
pushFailures = metrics.NewCounter(`vmagent_remotewrite_push_failures_total`)
|
||||
)
|
||||
|
||||
func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) bool {
|
||||
if len(tssBlock) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
if len(rwctxs) == 1 {
|
||||
// Fast path - just push data to the configured single remote storage
|
||||
rwctxs[0].Push(tssBlock)
|
||||
return
|
||||
return rwctxs[0].TryPush(tssBlock)
|
||||
}
|
||||
|
||||
// We need to push tssBlock to multiple remote storages.
|
||||
@@ -429,15 +516,28 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
||||
if *shardByURL {
|
||||
// Shard the data among rwctxs
|
||||
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
|
||||
tmpLabels := promutils.GetLabels()
|
||||
for _, ts := range tssBlock {
|
||||
h := getLabelsHash(ts.Labels, shardLabelsFilter)
|
||||
hashLabels := ts.Labels
|
||||
if len(shardByURLLabelsMap) > 0 {
|
||||
hashLabels = tmpLabels.Labels[:0]
|
||||
for _, label := range ts.Labels {
|
||||
if _, ok := shardByURLLabelsMap[label.Name]; ok {
|
||||
hashLabels = append(hashLabels, label)
|
||||
}
|
||||
}
|
||||
}
|
||||
h := getLabelsHash(hashLabels)
|
||||
idx := h % uint64(len(tssByURL))
|
||||
tssByURL[idx] = append(tssByURL[idx], ts)
|
||||
}
|
||||
promutils.PutLabels(tmpLabels)
|
||||
|
||||
// Push sharded data to remote storages in parallel in order to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for i, rwctx := range rwctxs {
|
||||
tssShard := tssByURL[i]
|
||||
if len(tssShard) == 0 {
|
||||
@@ -445,11 +545,13 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
||||
}
|
||||
go func(rwctx *remoteWriteCtx, tss []prompbmarshal.TimeSeries) {
|
||||
defer wg.Done()
|
||||
rwctx.Push(tss)
|
||||
if !rwctx.TryPush(tss) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
}(rwctx, tssShard)
|
||||
}
|
||||
wg.Wait()
|
||||
return
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
}
|
||||
|
||||
// Replicate data among rwctxs.
|
||||
@@ -457,13 +559,17 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for _, rwctx := range rwctxs {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
rwctx.Push(tssBlock)
|
||||
if !rwctx.TryPush(tssBlock) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
}
|
||||
|
||||
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
|
||||
@@ -483,7 +589,7 @@ func limitSeriesCardinality(tss []prompbmarshal.TimeSeries) []prompbmarshal.Time
|
||||
dst := make([]prompbmarshal.TimeSeries, 0, len(tss))
|
||||
for i := range tss {
|
||||
labels := tss[i].Labels
|
||||
h := getLabelsHash(labels, nil)
|
||||
h := getLabelsHash(labels)
|
||||
if hourlySeriesLimiter != nil && !hourlySeriesLimiter.Add(h) {
|
||||
hourlySeriesLimitRowsDropped.Add(len(tss[i].Samples))
|
||||
logSkippedSeries(labels, "-remoteWrite.maxHourlySeries", hourlySeriesLimiter.MaxItems())
|
||||
@@ -507,16 +613,10 @@ var (
|
||||
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
|
||||
)
|
||||
|
||||
func getLabelsHash(labels []prompbmarshal.Label, filterLabels map[string]struct{}) uint64 {
|
||||
func getLabelsHash(labels []prompbmarshal.Label) uint64 {
|
||||
bb := labelsHashBufPool.Get()
|
||||
b := bb.B[:0]
|
||||
for _, label := range labels {
|
||||
if len(filterLabels) > 0 {
|
||||
_, ok := filterLabels[label.Name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
b = append(b, label.Name...)
|
||||
b = append(b, label.Value...)
|
||||
}
|
||||
@@ -589,13 +689,19 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
|
||||
maxPendingBytes = persistentqueue.DefaultChunkFileSize
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes)
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, *disableOnDiskQueue)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queue_blocked{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
if fq.IsWriteBlocked() {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
@@ -617,7 +723,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
}
|
||||
pss := make([]*pendingSeries, pssLen)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, c.useVMProto, sf, rd)
|
||||
pss[i] = newPendingSeries(fq, c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
@@ -634,7 +740,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(argIdx)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggr.config=%q: %s", sasFile, err)
|
||||
}
|
||||
@@ -670,7 +776,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.rowsDroppedByRelabel = nil
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
func (rwctx *remoteWriteCtx) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
@@ -708,7 +814,9 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
}
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
rwctx.pushInternal(tss)
|
||||
|
||||
// Try pushing the data to remote storage
|
||||
ok := rwctx.tryPushInternal(tss)
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
if rctx != nil {
|
||||
@@ -716,6 +824,8 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
var matchIdxsPool bytesutil.ByteBufferPool
|
||||
@@ -735,7 +845,21 @@ func dropAggregatedSeries(src []prompbmarshal.TimeSeries, matchIdxs []byte, drop
|
||||
return dst
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompbmarshal.TimeSeries) {
|
||||
if rwctx.tryPushInternal(tss) {
|
||||
return
|
||||
}
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushInternal must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if *dropSamplesOnOverload {
|
||||
rowsCount := getRowsCount(tss)
|
||||
samplesDropped.Add(rowsCount)
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) tryPushInternal(tss []prompbmarshal.TimeSeries) bool {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
if len(labelsGlobal) > 0 {
|
||||
@@ -749,13 +873,16 @@ func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
|
||||
ok := pss[idx].TryPush(tss)
|
||||
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
@@ -768,7 +895,7 @@ func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", sasFile)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, sasFile)).Inc()
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(rwctx.idx)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
if err != nil {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, sasFile)).Inc()
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(0)
|
||||
@@ -820,6 +947,7 @@ func CheckStreamAggrConfigs() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAggregators returns aggregators for all the configured remote writes.
|
||||
func GetAggregators() map[string]*streamaggr.Aggregators {
|
||||
var result = map[string]*streamaggr.Aggregators{}
|
||||
|
||||
|
||||
@@ -76,7 +76,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -1,246 +1,3 @@
|
||||
See vmalert-tool docs [here](https://docs.victoriametrics.com/vmalert-tool.html).
|
||||
|
||||
# vmalert-tool
|
||||
|
||||
VMAlert command-line tool
|
||||
|
||||
## Unit testing for rules
|
||||
|
||||
You can use `vmalert-tool` to run unit tests for alerting and recording rules.
|
||||
It will perform the following actions:
|
||||
* sets up an isolated VictoriaMetrics instance;
|
||||
* simulates the periodic ingestion of time series;
|
||||
* queries the ingested data for recording and alerting rules evaluation like [vmalert](https://docs.victoriametrics.com/vmalert.html);
|
||||
* checks whether the firing alerts or resulting recording rules match the expected results.
|
||||
|
||||
See how to run vmalert-tool for unit test below:
|
||||
|
||||
```
|
||||
# Run vmalert-tool with one or multiple test files via --files cmd-line flag
|
||||
./vmalert-tool unittest --files test1.yaml --files test2.yaml
|
||||
```
|
||||
|
||||
vmalert-tool unittest is compatible with [Prometheus config format for tests](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-file-format)
|
||||
except `promql_expr_test` field. Use `metricsql_expr_test` field name instead. The name is different because vmalert-tool
|
||||
validates and executes [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) expressions,
|
||||
which aren't always backward compatible with [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
|
||||
|
||||
### Test file format
|
||||
|
||||
The configuration format for files specified in `--files` cmd-line flag is the following:
|
||||
|
||||
```yaml
|
||||
# Path to the files or http url containing [rule groups](https://docs.victoriametrics.com/vmalert.html#groups) configuration.
|
||||
# Enterprise version of vmalert-tool supports S3 and GCS paths to rules.
|
||||
rule_files:
|
||||
[ - <string> ]
|
||||
|
||||
# The evaluation interval for rules specified in `rule_files`
|
||||
[ evaluation_interval: <duration> | default = 1m ]
|
||||
|
||||
# Groups listed below will be evaluated by order.
|
||||
# Not All the groups need not be mentioned, if not, they will be evaluated by define order in rule_files.
|
||||
group_eval_order:
|
||||
[ - <string> ]
|
||||
|
||||
# The list of unit test files to be checked during evaluation.
|
||||
tests:
|
||||
[ - <test_group> ]
|
||||
```
|
||||
|
||||
#### `<test_group>`
|
||||
|
||||
```yaml
|
||||
# Interval between samples for input series
|
||||
interval: <duration>
|
||||
# Time series to persist into the database according to configured <interval> before running tests.
|
||||
input_series:
|
||||
[ - <series> ]
|
||||
|
||||
# Name of the test group, optional
|
||||
[ name: <string> ]
|
||||
|
||||
# Unit tests for alerting rules
|
||||
alert_rule_test:
|
||||
[ - <alert_test_case> ]
|
||||
|
||||
# Unit tests for Metricsql expressions.
|
||||
metricsql_expr_test:
|
||||
[ - <metricsql_expr_test> ]
|
||||
|
||||
# External labels accessible for templating.
|
||||
external_labels:
|
||||
[ <labelname>: <string> ... ]
|
||||
|
||||
```
|
||||
|
||||
#### `<series>`
|
||||
|
||||
```yaml
|
||||
# series in the following format '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
series: <string>
|
||||
|
||||
# values support several special equations:
|
||||
# 'a+bxc' becomes 'a a+b a+(2*b) a+(3*b) … a+(c*b)'
|
||||
# Read this as series starts at a, then c further samples incrementing by b.
|
||||
# 'a-bxc' becomes 'a a-b a-(2*b) a-(3*b) … a-(c*b)'
|
||||
# Read this as series starts at a, then c further samples decrementing by b (or incrementing by negative b).
|
||||
# '_' represents a missing sample from scrape
|
||||
# 'stale' indicates a stale sample
|
||||
# Examples:
|
||||
# 1. '-2+4x3' becomes '-2 2 6 10' - series starts at -2, then 3 further samples incrementing by 4.
|
||||
# 2. ' 1-2x4' becomes '1 -1 -3 -5 -7' - series starts at 1, then 4 further samples decrementing by 2.
|
||||
# 3. ' 1x4' becomes '1 1 1 1 1' - shorthand for '1+0x4', series starts at 1, then 4 further samples incrementing by 0.
|
||||
# 4. ' 1 _x3 stale' becomes '1 _ _ _ stale' - the missing sample cannot increment, so 3 missing samples are produced by the '_x3' expression.
|
||||
values: <string>
|
||||
```
|
||||
|
||||
#### `<alert_test_case>`
|
||||
|
||||
vmalert by default adds `alertgroup` and `alertname` to the generated alerts and time series.
|
||||
So you will need to specify both `groupname` and `alertname` under a single `<alert_test_case>`,
|
||||
but no need to add them under `exp_alerts`.
|
||||
You can also pass `--disableAlertgroupLabel` to skip `alertgroup` check.
|
||||
|
||||
```yaml
|
||||
# The time elapsed from time=0s when this alerting rule should be checked.
|
||||
# Means this rule should be firing at this point, or shouldn't be firing if 'exp_alerts' is empty.
|
||||
eval_time: <duration>
|
||||
|
||||
# Name of the group name to be tested.
|
||||
groupname: <string>
|
||||
|
||||
# Name of the alert to be tested.
|
||||
alertname: <string>
|
||||
|
||||
# List of the expected alerts that are firing under the given alertname at
|
||||
# the given evaluation time. If you want to test if an alerting rule should
|
||||
# not be firing, then you can mention only the fields above and leave 'exp_alerts' empty.
|
||||
exp_alerts:
|
||||
[ - <alert> ]
|
||||
```
|
||||
|
||||
#### `<alert>`
|
||||
|
||||
```yaml
|
||||
# These are the expanded labels and annotations of the expected alert.
|
||||
# Note: labels also include the labels of the sample associated with the alert
|
||||
exp_labels:
|
||||
[ <labelname>: <string> ]
|
||||
exp_annotations:
|
||||
[ <labelname>: <string> ]
|
||||
```
|
||||
|
||||
#### `<metricsql_expr_test>`
|
||||
|
||||
```yaml
|
||||
# Expression to evaluate
|
||||
expr: <string>
|
||||
|
||||
# The time elapsed from time=0s when this expression be evaluated.
|
||||
eval_time: <duration>
|
||||
|
||||
# Expected samples at the given evaluation time.
|
||||
exp_samples:
|
||||
[ - <sample> ]
|
||||
```
|
||||
|
||||
#### `<sample>`
|
||||
|
||||
```yaml
|
||||
# Labels of the sample in usual series notation '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
labels: <string>
|
||||
|
||||
# The expected value of the Metricsql expression.
|
||||
value: <number>
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
This is an example input file for unit testing which will pass.
|
||||
`test.yaml` is the test file which follows the syntax above and `alerts.yaml` contains the alerting rules.
|
||||
|
||||
With `rules.yaml` in the same directory, run `./vmalert-tool unittest --files=./unittest/testdata/test.yaml`.
|
||||
|
||||
#### `test.yaml`
|
||||
|
||||
```yaml
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
evaluation_interval: 1m
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'up{job="prometheus", instance="localhost:9090"}'
|
||||
values: "0+0x1440"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: suquery_interval_test
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- labels: '{__name__="suquery_interval_test", datacenter="dc-123", instance="localhost:9090", job="prometheus"}'
|
||||
value: 1
|
||||
|
||||
alert_rule_test:
|
||||
- eval_time: 2h
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
job: prometheus
|
||||
severity: page
|
||||
instance: localhost:9090
|
||||
datacenter: dc-123
|
||||
exp_annotations:
|
||||
summary: "Instance localhost:9090 down"
|
||||
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: AlwaysFiring
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
datacenter: dc-123
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts: []
|
||||
|
||||
external_labels:
|
||||
datacenter: dc-123
|
||||
```
|
||||
|
||||
#### `alerts.yaml`
|
||||
|
||||
```yaml
|
||||
# This is the rules file.
|
||||
|
||||
groups:
|
||||
- name: group1
|
||||
rules:
|
||||
- alert: InstanceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||
- alert: AlwaysFiring
|
||||
expr: 1
|
||||
|
||||
- name: group2
|
||||
rules:
|
||||
- record: job:test:count_over_time1m
|
||||
expr: sum without(instance) (count_over_time(test[1m]))
|
||||
- record: suquery_interval_test
|
||||
expr: count_over_time(up[5m:])
|
||||
```
|
||||
vmalert-tool docs can be edited at [docs/vmalert-tool.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmalert-tool.md).
|
||||
|
||||
12
app/vmalert-tool/multiarch/Dockerfile
Normal file
12
app/vmalert-tool/multiarch/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8429
|
||||
ENTRYPOINT ["/vmalert-tool-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmalert-tool-linux-${TARGETARCH}-prod ./vmalert-tool-prod
|
||||
@@ -21,6 +21,11 @@ tests:
|
||||
groupname: group2
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts: []
|
||||
- eval_time: 150s
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts:
|
||||
- {}
|
||||
- eval_time: 6m
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
|
||||
@@ -336,7 +336,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
if disableAlertgroupLabel {
|
||||
g.Name = ""
|
||||
}
|
||||
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name]; !ok {
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name]; !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := gotAlertsMap[g.Name]; !ok {
|
||||
@@ -347,7 +347,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
if !isAlertRule {
|
||||
continue
|
||||
}
|
||||
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name][ar.Name]; ok {
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name][ar.Name]; ok {
|
||||
for _, got := range ar.GetAlerts() {
|
||||
if got.State != notifier.StateFiring {
|
||||
continue
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,11 +19,14 @@ import (
|
||||
// Group contains list of Rules grouped into
|
||||
// entity with one name and evaluation interval
|
||||
type Group struct {
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutils.Duration `yaml:"interval,omitempty"`
|
||||
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutils.Duration `yaml:"interval,omitempty"`
|
||||
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *promutils.Duration `yaml:"eval_delay,omitempty"`
|
||||
Limit int `yaml:"limit,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
@@ -233,7 +236,7 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
|
||||
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
}
|
||||
return parse(files, validateTplFn, validateExpressions)
|
||||
}
|
||||
@@ -242,11 +245,11 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
|
||||
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
}
|
||||
groups, err := parse(files, validateTplFn, validateExpressions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %s: %s", pathPatterns, err)
|
||||
return nil, fmt.Errorf("failed to parse %s: %w", pathPatterns, err)
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
|
||||
@@ -106,7 +106,7 @@ func TestParseBad(t *testing.T) {
|
||||
},
|
||||
{
|
||||
[]string{"http://unreachable-url"},
|
||||
"failed to read",
|
||||
"failed to",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
|
||||
@@ -49,7 +49,7 @@ func (fs *FS) Read(files []string) (map[string][]byte, error) {
|
||||
path, resp.StatusCode, http.StatusOK, data)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read %q: %s", path, err)
|
||||
return nil, fmt.Errorf("cannot read %q: %w", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ groups:
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
eval_delay: 30s
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: sum(vm_tcplistener_conns) by (instance) > 1
|
||||
|
||||
@@ -37,13 +37,17 @@ var (
|
||||
tlsCAFile = flag.String("datasource.tlsCAFile", "", `Optional path to TLS CA file to use for verifying connections to -datasource.url. By default, system CA is used`)
|
||||
tlsServerName = flag.String("datasource.tlsServerName", "", `Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used`)
|
||||
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url. ")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url.")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url. ")
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url.")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url")
|
||||
oauth2EndpointParams = flag.String("datasource.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -datasource.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Will be deprecated soon, please adjust "-search.latencyOffset" at datasource side `+
|
||||
`or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. `+
|
||||
`For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
|
||||
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
|
||||
"If set to 0, rule's evaluation interval will be used instead.")
|
||||
@@ -83,7 +87,10 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
return nil, fmt.Errorf("datasource.url is empty")
|
||||
}
|
||||
if !*queryTimeAlignment {
|
||||
logger.Warnf("flag `datasource.queryTimeAlignment` is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead")
|
||||
logger.Warnf("flag `-datasource.queryTimeAlignment` is deprecated and will be removed in next releases. Please use `eval_alignment` in rule group instead.")
|
||||
}
|
||||
if *lookBack != 0 {
|
||||
logger.Warnf("flag `-datasource.lookback` will be deprecated soon. Please use `-rule.evalDelay` command-line flag instead. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 for details.")
|
||||
}
|
||||
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
@@ -103,17 +110,21 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
extraParams.Set("round_digits", fmt.Sprintf("%d", *roundDigits))
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
}
|
||||
_, err = authCfg.GetAuthHeader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %s", *addr, err)
|
||||
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
|
||||
}
|
||||
|
||||
return &VMStorage{
|
||||
|
||||
@@ -142,24 +142,30 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Resu
|
||||
return Result{}, nil, err
|
||||
}
|
||||
resp, err := s.do(ctx, req)
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, _ = s.newQueryRequest(query, ts)
|
||||
resp, err = s.do(ctx, req)
|
||||
}
|
||||
if err != nil {
|
||||
return Result{}, req, err
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// Return unexpected error to the caller.
|
||||
return Result{}, nil, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = s.newQueryRequest(query, ts)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = s.do(ctx, req)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
|
||||
// Process the received response.
|
||||
parseFn := parsePrometheusResponse
|
||||
if s.dataSourceType != datasourcePrometheus {
|
||||
parseFn = parseGraphiteResponse
|
||||
}
|
||||
result, err := parseFn(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return result, req, err
|
||||
}
|
||||
|
||||
@@ -177,23 +183,31 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
|
||||
return res, fmt.Errorf("end param is missing")
|
||||
}
|
||||
req, err := s.newQueryRangeRequest(query, start, end)
|
||||
if err != nil {
|
||||
return Result{}, err
|
||||
}
|
||||
resp, err := s.do(ctx, req)
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, _ = s.newQueryRangeRequest(query, start, end)
|
||||
resp, err = s.do(ctx, req)
|
||||
}
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
return parsePrometheusResponse(req, resp)
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// Return unexpected error to the caller.
|
||||
return res, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = s.newQueryRangeRequest(query, start, end)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = s.do(ctx, req)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Process the received response.
|
||||
res, err = parsePrometheusResponse(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return res, err
|
||||
}
|
||||
|
||||
func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
|
||||
@@ -219,7 +233,7 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
|
||||
func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*http.Request, error) {
|
||||
req, err := s.newRequest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %s", s.datasourceURL, err)
|
||||
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", s.datasourceURL, err)
|
||||
}
|
||||
s.setPrometheusRangeReqParams(req, query, start, end)
|
||||
return req, nil
|
||||
@@ -228,7 +242,7 @@ func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*h
|
||||
func (s *VMStorage) newQueryRequest(query string, ts time.Time) (*http.Request, error) {
|
||||
req, err := s.newRequest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query request to datasource %q: %s", s.datasourceURL, err)
|
||||
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", s.datasourceURL, err)
|
||||
}
|
||||
switch s.dataSourceType {
|
||||
case "", datasourcePrometheus:
|
||||
|
||||
@@ -112,14 +112,14 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
|
||||
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
|
||||
}
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
var pi promInstant
|
||||
if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
|
||||
return res, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
|
||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
|
||||
@@ -47,8 +47,8 @@ all files with prefix rule_ in folder dir.
|
||||
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
|
||||
`)
|
||||
|
||||
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions
|
||||
for rules annotations templating. Flag can be specified multiple times.
|
||||
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions `+
|
||||
`for rules annotations templating. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule.templates="/path/to/file". Path to a single file with go templates
|
||||
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
|
||||
@@ -59,7 +59,7 @@ absolute path to all .tpl files in root.
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -96,7 +96,6 @@ func main() {
|
||||
notifier.InitSecretFlags()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
logger.Warnf("flag `remoteRead.ignoreRestoreErrors` is deprecated and will be removed in next releases.")
|
||||
@@ -118,9 +117,9 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
|
||||
eu, err := getExternalURL(*externalURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init `external.url`: %s", err)
|
||||
logger.Fatalf("failed to init `-external.url`: %s", err)
|
||||
}
|
||||
|
||||
alertURLGeneratorFn, err = getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
|
||||
@@ -182,8 +181,11 @@ func main() {
|
||||
rh := &requestHandler{m: manager}
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, rh.handler)
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("service received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
if err := httpserver.Stop(*httpListenAddr); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
@@ -194,7 +196,7 @@ func main() {
|
||||
var (
|
||||
configReloads = metrics.NewCounter(`vmalert_config_last_reload_total`)
|
||||
configReloadErrors = metrics.NewCounter(`vmalert_config_last_reload_errors_total`)
|
||||
configSuccess = metrics.NewCounter(`vmalert_config_last_reload_successful`)
|
||||
configSuccess = metrics.NewGauge(`vmalert_config_last_reload_successful`, nil)
|
||||
configTimestamp = metrics.NewCounter(`vmalert_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
@@ -230,7 +232,9 @@ func newManager(ctx context.Context) (*manager, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init remoteWrite: %w", err)
|
||||
}
|
||||
manager.rw = rw
|
||||
if rw != nil {
|
||||
manager.rw = rw
|
||||
}
|
||||
|
||||
rr, err := remoteread.Init()
|
||||
if err != nil {
|
||||
@@ -241,14 +245,26 @@ func newManager(ctx context.Context) (*manager, error) {
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL, error) {
|
||||
if externalURL != "" {
|
||||
return url.Parse(externalURL)
|
||||
func getExternalURL(customURL string) (*url.URL, error) {
|
||||
if customURL == "" {
|
||||
// use local hostname as external URL
|
||||
return getHostnameAsExternalURL(*httpListenAddr, httpserver.IsTLS())
|
||||
}
|
||||
hname, err := os.Hostname()
|
||||
u, err := url.Parse(customURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return nil, fmt.Errorf("invalid scheme %q in url %q, only 'http' and 'https' are supported", u.Scheme, u.String())
|
||||
}
|
||||
return u, nil
|
||||
}
|
||||
|
||||
func getHostnameAsExternalURL(httpListenAddr string, isSecure bool) (*url.URL, error) {
|
||||
hname, err := os.Hostname()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get hostname: %w", err)
|
||||
}
|
||||
port := ""
|
||||
if ipport := strings.Split(httpListenAddr, ":"); len(ipport) > 1 {
|
||||
port = ":" + ipport[1]
|
||||
|
||||
@@ -22,22 +22,29 @@ func init() {
|
||||
}
|
||||
|
||||
func TestGetExternalURL(t *testing.T) {
|
||||
expURL := "https://vicotriametrics.com/path"
|
||||
u, err := getExternalURL(expURL, "", false)
|
||||
invalidURL := "victoriametrics.com/path"
|
||||
_, err := getExternalURL(invalidURL)
|
||||
if err == nil {
|
||||
t.Errorf("expected error, got nil")
|
||||
}
|
||||
|
||||
expURL := "https://victoriametrics.com/path"
|
||||
u, err := getExternalURL(expURL)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if u.String() != expURL {
|
||||
t.Errorf("unexpected url want %s, got %s", expURL, u.String())
|
||||
t.Errorf("unexpected url: want %q, got %s", expURL, u.String())
|
||||
}
|
||||
|
||||
h, _ := os.Hostname()
|
||||
expURL = fmt.Sprintf("https://%s:4242", h)
|
||||
u, err = getExternalURL("", "0.0.0.0:4242", true)
|
||||
expURL = fmt.Sprintf("http://%s:8880", h)
|
||||
u, err = getExternalURL("")
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if u.String() != expURL {
|
||||
t.Errorf("unexpected url want %s, got %s", expURL, u.String())
|
||||
t.Errorf("unexpected url: want %s, got %s", expURL, u.String())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +141,7 @@ groups:
|
||||
t.Fatalf("expected to have config error %s; got nil instead", cErr)
|
||||
}
|
||||
if cfgSuc != 0 {
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 0; got %d instead", cfgSuc)
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 0; got %v instead", cfgSuc)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -143,7 +150,7 @@ groups:
|
||||
t.Fatalf("unexpected config error: %s", cErr)
|
||||
}
|
||||
if cfgSuc != 1 {
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 1; got %d instead", cfgSuc)
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 1; got %v instead", cfgSuc)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -191,7 +191,7 @@ func (a Alert) toPromLabels(relabelCfg *promrelabel.ParsedConfigs) []prompbmarsh
|
||||
var labels []prompbmarshal.Label
|
||||
for k, v := range a.Labels {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: k,
|
||||
Name: promrelabel.SanitizeMetricName(k),
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -237,6 +237,11 @@ func TestAlert_toPromLabels(t *testing.T) {
|
||||
[]prompbmarshal.Label{{Name: "a", Value: "baz"}, {Name: "foo", Value: "bar"}},
|
||||
nil,
|
||||
)
|
||||
fn(
|
||||
map[string]string{"foo.bar": "baz", "service!name": "qux"},
|
||||
[]prompbmarshal.Label{{Name: "foo_bar", Value: "baz"}, {Name: "service_name", Value: "qux"}},
|
||||
nil,
|
||||
)
|
||||
|
||||
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
|
||||
- target_label: "foo"
|
||||
|
||||
@@ -144,7 +144,7 @@ func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg proma
|
||||
aCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(ba.Username, ba.Password.String(), ba.PasswordFile),
|
||||
utils.WithBearer(authCfg.BearerToken.String(), authCfg.BearerTokenFile),
|
||||
utils.WithOAuth(oauth.ClientID, oauth.ClientSecretFile, oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";")))
|
||||
utils.WithOAuth(oauth.ClientID, oauth.ClientSecretFile, oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";"), oauth.EndpointParams))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package notifier
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"gopkg.in/yaml.v2"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
@@ -11,6 +10,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
@@ -142,26 +143,23 @@ func parseLabels(target string, metaLabels *promutils.Labels, cfg *Config) (stri
|
||||
if labels.Len() == 0 {
|
||||
return "", nil, nil
|
||||
}
|
||||
schemeRelabeled := labels.Get("__scheme__")
|
||||
if len(schemeRelabeled) == 0 {
|
||||
schemeRelabeled = "http"
|
||||
scheme := labels.Get("__scheme__")
|
||||
if len(scheme) == 0 {
|
||||
scheme = "http"
|
||||
}
|
||||
addressRelabeled := labels.Get("__address__")
|
||||
if len(addressRelabeled) == 0 {
|
||||
alertsPath := labels.Get("__alerts_path__")
|
||||
if !strings.HasPrefix(alertsPath, "/") {
|
||||
alertsPath = "/" + alertsPath
|
||||
}
|
||||
address := labels.Get("__address__")
|
||||
if len(address) == 0 {
|
||||
return "", nil, nil
|
||||
}
|
||||
if strings.Contains(addressRelabeled, "/") {
|
||||
return "", nil, nil
|
||||
}
|
||||
addressRelabeled = addMissingPort(schemeRelabeled, addressRelabeled)
|
||||
alertsPathRelabeled := labels.Get("__alerts_path__")
|
||||
if !strings.HasPrefix(alertsPathRelabeled, "/") {
|
||||
alertsPathRelabeled = "/" + alertsPathRelabeled
|
||||
}
|
||||
u := fmt.Sprintf("%s://%s%s", schemeRelabeled, addressRelabeled, alertsPathRelabeled)
|
||||
address = addMissingPort(scheme, address)
|
||||
u := fmt.Sprintf("%s://%s%s", scheme, address, alertsPath)
|
||||
if _, err := url.Parse(u); err != nil {
|
||||
return "", nil, fmt.Errorf("invalid url %q for scheme=%q (%q), target=%q, metrics_path=%q (%q): %w",
|
||||
u, cfg.Scheme, schemeRelabeled, target, addressRelabeled, alertsPathRelabeled, err)
|
||||
u, cfg.Scheme, scheme, target, address, alertsPath, err)
|
||||
}
|
||||
return u, labels, nil
|
||||
}
|
||||
@@ -181,9 +179,24 @@ func addMissingPort(scheme, target string) string {
|
||||
func mergeLabels(target string, metaLabels *promutils.Labels, cfg *Config) *promutils.Labels {
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
||||
m := promutils.NewLabels(3 + metaLabels.Len())
|
||||
m.Add("__address__", target)
|
||||
m.Add("__scheme__", cfg.Scheme)
|
||||
m.Add("__alerts_path__", path.Join("/", cfg.PathPrefix, alertManagerPath))
|
||||
address := target
|
||||
scheme := cfg.Scheme
|
||||
alertsPath := path.Join("/", cfg.PathPrefix, alertManagerPath)
|
||||
// try to extract optional scheme and alertsPath from __address__.
|
||||
if strings.HasPrefix(address, "http://") {
|
||||
scheme = "http"
|
||||
address = address[len("http://"):]
|
||||
} else if strings.HasPrefix(address, "https://") {
|
||||
scheme = "https"
|
||||
address = address[len("https://"):]
|
||||
}
|
||||
if n := strings.IndexByte(address, '/'); n >= 0 {
|
||||
alertsPath = address[n:]
|
||||
address = address[:n]
|
||||
}
|
||||
m.Add("__address__", address)
|
||||
m.Add("__scheme__", scheme)
|
||||
m.Add("__alerts_path__", alertsPath)
|
||||
m.AddFrom(metaLabels)
|
||||
return m
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ func (cw *configWatcher) reload(path string) error {
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||
targets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
for _, err := range errors {
|
||||
return fmt.Errorf("failed to init notifier for %q: %s", typeK, err)
|
||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
|
||||
cw.setTargets(typeK, targets)
|
||||
@@ -107,7 +107,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
|
||||
}
|
||||
updateTargets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
for _, err := range errors {
|
||||
logger.Errorf("failed to init notifier for %q: %s", typeK, err)
|
||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
cw.setTargets(typeK, updateTargets)
|
||||
}
|
||||
@@ -118,7 +118,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
|
||||
func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator) ([]Target, []error) {
|
||||
metaLabels, err := labelsFn()
|
||||
if err != nil {
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %s", err)}
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||
}
|
||||
var targets []Target
|
||||
var errors []error
|
||||
@@ -167,11 +167,11 @@ func (cw *configWatcher) start() error {
|
||||
for _, target := range cfg.Targets {
|
||||
address, labels, err := parseLabels(target, nil, cw.cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse labels for target %q: %s", target, err)
|
||||
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
||||
}
|
||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init alertmanager for addr %q: %s", address, err)
|
||||
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
||||
}
|
||||
targets = append(targets, Target{
|
||||
Notifier: notifier,
|
||||
@@ -189,14 +189,14 @@ func (cw *configWatcher) start() error {
|
||||
sdc := &cw.cfg.ConsulSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("got labels err: %s", err)
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
}
|
||||
labels = append(labels, targetLabels...)
|
||||
}
|
||||
return labels, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start consulSD discovery: %s", err)
|
||||
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,14 +207,14 @@ func (cw *configWatcher) start() error {
|
||||
sdc := &cw.cfg.DNSSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("got labels err: %s", err)
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
}
|
||||
labels = append(labels, targetLabels...)
|
||||
}
|
||||
return labels, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start DNSSD discovery: %s", err)
|
||||
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -318,3 +318,47 @@ func TestMergeHTTPClientConfigs(t *testing.T) {
|
||||
t.Fatalf("expected BasicAuth tp be present")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLabels(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
target string
|
||||
cfg *Config
|
||||
expectedAddress string
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
"invalid address",
|
||||
"invalid:*//url",
|
||||
&Config{},
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"use some default params",
|
||||
"alertmanager:9093",
|
||||
&Config{PathPrefix: "test"},
|
||||
"http://alertmanager:9093/test/api/v2/alerts",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"use target address",
|
||||
"https://alertmanager:9093/api/v1/alerts",
|
||||
&Config{Scheme: "http", PathPrefix: "test"},
|
||||
"https://alertmanager:9093/api/v1/alerts",
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
address, _, err := parseLabels(tc.target, nil, tc.cfg)
|
||||
if err == nil == tc.expectedErr {
|
||||
t.Fatalf("unexpected error; got %t; want %t", err != nil, tc.expectedErr)
|
||||
}
|
||||
if address != tc.expectedAddress {
|
||||
t.Fatalf("unexpected address; got %q; want %q", address, tc.expectedAddress)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ var (
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
blackHole = flag.Bool("notifier.blackhole", false, "Whether to blackhole alerting notifications. "+
|
||||
"Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). "+
|
||||
"`-notifier.url`, `-notifier.config` and `-notifier.blackhole` are mutually exclusive.")
|
||||
"-notifier.url, -notifier.config and -notifier.blackhole are mutually exclusive.")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
@@ -46,6 +46,8 @@ var (
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("notifier.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("notifier.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -notifier.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("notifier.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("notifier.oauth2.scopes", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'. "+
|
||||
@@ -90,7 +92,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
|
||||
externalLabels = extLabels
|
||||
eu, err := url.Parse(externalURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse external URL: %s", err)
|
||||
return nil, fmt.Errorf("failed to parse external URL: %w", err)
|
||||
}
|
||||
|
||||
templates.UpdateWithFuncs(templates.FuncsWithExternalURL(eu))
|
||||
@@ -116,7 +118,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
|
||||
if len(*addrs) > 0 {
|
||||
notifiers, err := notifiersFromFlags(gen)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create notifier from flag values: %s", err)
|
||||
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||
}
|
||||
staticNotifiersFn = func() []Notifier {
|
||||
return notifiers
|
||||
@@ -126,7 +128,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
|
||||
|
||||
cw, err = newWatcher(*configPath, gen)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init config watcher: %s", err)
|
||||
return nil, fmt.Errorf("failed to init config watcher: %w", err)
|
||||
}
|
||||
return cw.notifiers, nil
|
||||
}
|
||||
@@ -141,6 +143,11 @@ func InitSecretFlags() {
|
||||
func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
var notifiers []Notifier
|
||||
for i, addr := range *addrs {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(i)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -notifier.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
authCfg := promauth.HTTPClientConfig{
|
||||
TLSConfig: &promauth.TLSConfig{
|
||||
CAFile: tlsCAFile.GetOptionalArg(i),
|
||||
@@ -160,6 +167,7 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
ClientID: oauth2ClientID.GetOptionalArg(i),
|
||||
ClientSecret: promauth.NewSecret(oauth2ClientSecret.GetOptionalArg(i)),
|
||||
ClientSecretFile: oauth2ClientSecretFile.GetOptionalArg(i),
|
||||
EndpointParams: endpointParams,
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(i), ";"),
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(i),
|
||||
},
|
||||
|
||||
@@ -5,6 +5,7 @@ static_configs:
|
||||
- targets:
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
- https://localhost:9093/test/api/v2/alerts
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: bar
|
||||
|
||||
@@ -41,8 +41,10 @@ var (
|
||||
oauth2ClientID = flag.String("remoteRead.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteRead.url.")
|
||||
oauth2ClientSecret = flag.String("remoteRead.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteRead.url.")
|
||||
oauth2ClientSecretFile = flag.String("remoteRead.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -remoteRead.url.")
|
||||
oauth2TokenURL = flag.String("remoteRead.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -remoteRead.url. ")
|
||||
oauth2Scopes = flag.String("remoteRead.oauth2.scopes", "", "Optional OAuth2 scopes to use for -remoteRead.url. Scopes must be delimited by ';'.")
|
||||
oauth2EndpointParams = flag.String("remoteRead.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -remoteRead.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("remoteRead.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -remoteRead.url. ")
|
||||
oauth2Scopes = flag.String("remoteRead.oauth2.scopes", "", "Optional OAuth2 scopes to use for -remoteRead.url. Scopes must be delimited by ';'.")
|
||||
)
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||
@@ -63,10 +65,14 @@ func Init() (datasource.QuerierBuilder, error) {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteRead.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
|
||||
@@ -3,6 +3,7 @@ package remotewrite
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -117,12 +118,17 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
// Push adds timeseries into queue for writing into remote storage.
|
||||
// Push returns and error if client is stopped or if queue is full.
|
||||
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
|
||||
rwTotal.Inc()
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(s.Samples))
|
||||
return fmt.Errorf("client is closed")
|
||||
case c.input <- s:
|
||||
return nil
|
||||
default:
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(s.Samples))
|
||||
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
|
||||
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
|
||||
c.maxQueueSize)
|
||||
@@ -181,11 +187,13 @@ func (c *Client) run(ctx context.Context) {
|
||||
}
|
||||
|
||||
var (
|
||||
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||
|
||||
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
|
||||
@@ -200,15 +208,10 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
defer prompbmarshal.ResetWriteRequest(wr)
|
||||
defer wr.Reset()
|
||||
defer bufferFlushDuration.UpdateDuration(time.Now())
|
||||
|
||||
data, err := wr.Marshal()
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal WriteRequest: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
b := snappy.Encode(nil, data)
|
||||
|
||||
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
|
||||
@@ -222,6 +225,11 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
L:
|
||||
for attempts := 0; ; attempts++ {
|
||||
err := c.send(ctx, b)
|
||||
if errors.Is(err, io.EOF) {
|
||||
// Something in the middle between client and destination might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
err = c.send(ctx, b)
|
||||
}
|
||||
if err == nil {
|
||||
sentRows.Add(len(wr.Timeseries))
|
||||
sentBytes.Add(len(b))
|
||||
@@ -259,8 +267,12 @@ L:
|
||||
|
||||
}
|
||||
|
||||
droppedRows.Add(len(wr.Timeseries))
|
||||
droppedBytes.Add(len(b))
|
||||
rwErrors.Inc()
|
||||
rows := 0
|
||||
for _, ts := range wr.Timeseries {
|
||||
rows += len(ts.Samples)
|
||||
}
|
||||
droppedRows.Add(rows)
|
||||
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
|
||||
len(wr.Timeseries))
|
||||
}
|
||||
@@ -282,7 +294,9 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
if c.authCfg != nil {
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return &nonRetriableError{err: err}
|
||||
return &nonRetriableError{
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
}
|
||||
if !*disablePathAppend {
|
||||
@@ -301,13 +315,14 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
// Prometheus remote Write compatible receivers MUST
|
||||
switch resp.StatusCode / 100 {
|
||||
case 2:
|
||||
// respond with a HTTP 2xx status code when the write is successful.
|
||||
// respond with HTTP 2xx status code when write is successful.
|
||||
return nil
|
||||
case 4:
|
||||
if resp.StatusCode != http.StatusTooManyRequests {
|
||||
// MUST NOT retry write requests on HTTP 4xx responses other than 429
|
||||
return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||
resp.StatusCode, req.URL.Redacted(), body)}
|
||||
return &nonRetriableError{
|
||||
err: fmt.Errorf("unexpected response code %d for %s. Response body %q", resp.StatusCode, req.URL.Redacted(), body),
|
||||
}
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
|
||||
@@ -140,7 +140,7 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
wr := &prompb.WriteRequest{}
|
||||
if err := wr.Unmarshal(b); err != nil {
|
||||
if err := wr.UnmarshalProtobuf(b); err != nil {
|
||||
rw.err(w, fmt.Errorf("unmarhsal err: %w", err))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -49,10 +49,7 @@ func (c *DebugClient) Push(s prompbmarshal.TimeSeries) error {
|
||||
c.wg.Add(1)
|
||||
defer c.wg.Done()
|
||||
wr := &prompbmarshal.WriteRequest{Timeseries: []prompbmarshal.TimeSeries{s}}
|
||||
data, err := wr.Marshal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal the given time series: %w", err)
|
||||
}
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
|
||||
return c.send(data)
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ var (
|
||||
|
||||
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines max number of timeseries to be flushed at once")
|
||||
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote querier")
|
||||
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote write endpoint")
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
@@ -41,11 +41,13 @@ var (
|
||||
tlsServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default, the server name from -remoteWrite.url is used")
|
||||
|
||||
oauth2ClientID = flag.String("remoteWrite.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteWrite.url.")
|
||||
oauth2ClientSecret = flag.String("remoteWrite.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteWrite.url.")
|
||||
oauth2ClientSecretFile = flag.String("remoteWrite.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -remoteWrite.url.")
|
||||
oauth2TokenURL = flag.String("remoteWrite.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -notifier.url.")
|
||||
oauth2Scopes = flag.String("remoteWrite.oauth2.scopes", "", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'.")
|
||||
oauth2ClientID = flag.String("remoteWrite.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteWrite.url")
|
||||
oauth2ClientSecret = flag.String("remoteWrite.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flag.String("remoteWrite.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -remoteWrite.url")
|
||||
oauth2EndpointParams = flag.String("remoteWrite.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("remoteWrite.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -notifier.url.")
|
||||
oauth2Scopes = flag.String("remoteWrite.oauth2.scopes", "", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'.")
|
||||
)
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||
@@ -67,10 +69,14 @@ func Init(ctx context.Context) (*Client, error) {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
|
||||
@@ -36,11 +36,11 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
|
||||
}
|
||||
tFrom, err := time.Parse(time.RFC3339, *replayFrom)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %q: %s", *replayFrom, err)
|
||||
return fmt.Errorf("failed to parse %q: %w", *replayFrom, err)
|
||||
}
|
||||
tTo, err := time.Parse(time.RFC3339, *replayTo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %q: %s", *replayTo, err)
|
||||
return fmt.Errorf("failed to parse %q: %w", *replayTo, err)
|
||||
}
|
||||
if !tTo.After(tFrom) {
|
||||
return fmt.Errorf("replay.timeTo must be bigger than replay.timeFrom")
|
||||
|
||||
@@ -30,6 +30,7 @@ type AlertingRule struct {
|
||||
Annotations map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
File string
|
||||
EvalInterval time.Duration
|
||||
Debug bool
|
||||
|
||||
@@ -47,7 +48,7 @@ type AlertingRule struct {
|
||||
}
|
||||
|
||||
type alertingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
errors *utils.Counter
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
@@ -67,6 +68,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
Annotations: cfg.Annotations,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
File: group.File,
|
||||
EvalInterval: group.Interval,
|
||||
Debug: cfg.Debug,
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
@@ -91,7 +93,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
entries: make([]StateEntry, entrySize),
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, file=%q, id="%d"`, ar.Name, group.Name, group.File, ar.ID())
|
||||
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.alertsMu.RLock()
|
||||
@@ -116,14 +118,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.Err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{%s}`, labels))
|
||||
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
@@ -242,11 +237,30 @@ type labelSet struct {
|
||||
origin map[string]string
|
||||
// processed labels includes origin labels
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
// in case of conflicts, extra labels are preferred.
|
||||
// in case of key conflicts, origin labels are renamed with prefix `exported_` and extra labels are preferred.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5161
|
||||
// used as labels attached to notifier.Alert and ALERTS series written to remote storage.
|
||||
processed map[string]string
|
||||
}
|
||||
|
||||
// add adds a value v with key k to origin and processed label sets.
|
||||
// On k conflicts in processed set, the passed v is preferred.
|
||||
// On k conflicts in origin set, the original value is preferred and copied
|
||||
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
||||
func (ls *labelSet) add(k, v string) {
|
||||
ls.processed[k] = v
|
||||
ov, ok := ls.origin[k]
|
||||
if !ok {
|
||||
ls.origin[k] = v
|
||||
return
|
||||
}
|
||||
if ov != v {
|
||||
// copy value only if v and ov are different
|
||||
key := fmt.Sprintf("exported_%s", k)
|
||||
ls.processed[key] = ov
|
||||
}
|
||||
}
|
||||
|
||||
// toLabels converts labels from given Metric
|
||||
// to labelSet which contains original and processed labels.
|
||||
func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
||||
@@ -269,50 +283,49 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
Expr: ar.Expr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.processed[k] = v
|
||||
if _, ok := ls.origin[k]; !ok {
|
||||
ls.origin[k] = v
|
||||
}
|
||||
ls.add(k, v)
|
||||
}
|
||||
|
||||
// set additional labels to identify group and rule name
|
||||
if ar.Name != "" {
|
||||
ls.processed[alertNameLabel] = ar.Name
|
||||
if _, ok := ls.origin[alertNameLabel]; !ok {
|
||||
ls.origin[alertNameLabel] = ar.Name
|
||||
}
|
||||
ls.add(alertNameLabel, ar.Name)
|
||||
}
|
||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||
ls.processed[alertGroupNameLabel] = ar.GroupName
|
||||
if _, ok := ls.origin[alertGroupNameLabel]; !ok {
|
||||
ls.origin[alertGroupNameLabel] = ar.GroupName
|
||||
}
|
||||
ls.add(alertGroupNameLabel, ar.GroupName)
|
||||
}
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
// execRange executes alerting rule on the given time range similarly to exec.
|
||||
// It doesn't update internal states of the Rule and meant to be used just
|
||||
// to get time series for backfilling.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
||||
// When making consecutive calls make sure to respect time linearity for start and end params,
|
||||
// as this function modifies AlertingRule alerts state.
|
||||
// It is not thread safe.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as a result.
|
||||
func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var result []prompbmarshal.TimeSeries
|
||||
holdAlertState := make(map[uint64]*notifier.Alert)
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
|
||||
}
|
||||
for _, s := range res.Data {
|
||||
ls, err := ar.toLabels(s, qFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
if ar.For == 0 { // if alert is instant
|
||||
|
||||
// if alert is instant, For: 0
|
||||
if ar.For == 0 {
|
||||
a.State = notifier.StateFiring
|
||||
for i := range s.Values {
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
@@ -324,18 +337,32 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
prevT := time.Time{}
|
||||
for i := range s.Values {
|
||||
at := time.Unix(s.Timestamps[i], 0)
|
||||
// try to restore alert's state on the first iteration
|
||||
if at.Equal(start) {
|
||||
if _, ok := ar.alerts[h]; ok {
|
||||
a = ar.alerts[h]
|
||||
prevT = at
|
||||
}
|
||||
}
|
||||
if at.Sub(prevT) > ar.EvalInterval {
|
||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = at
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For {
|
||||
a.Start = time.Time{}
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
||||
a.State = notifier.StateFiring
|
||||
a.Start = at
|
||||
}
|
||||
prevT = at
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
|
||||
// save alert's state on last iteration, so it can be used on the next execRange call
|
||||
if at.Equal(end) {
|
||||
holdAlertState[h] = a
|
||||
}
|
||||
}
|
||||
}
|
||||
ar.alerts = holdAlertState
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -360,6 +387,9 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
if curState.Err != nil {
|
||||
ar.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
ar.alertsMu.Lock()
|
||||
@@ -388,13 +418,12 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
for _, m := range res.Data {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
curState.Err = fmt.Errorf("failed to expand labels: %s", err)
|
||||
curState.Err = fmt.Errorf("failed to expand labels: %w", err)
|
||||
return nil, curState.Err
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
if _, ok := updated[h]; ok {
|
||||
// duplicate may be caused by extra labels
|
||||
// conflicting with the metric labels
|
||||
// duplicate may be caused the removal of `__name__` label
|
||||
curState.Err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||
return nil, curState.Err
|
||||
}
|
||||
@@ -513,7 +542,7 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
if ls == nil {
|
||||
ls, err = ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
}
|
||||
}
|
||||
a := ¬ifier.Alert{
|
||||
@@ -591,44 +620,41 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, a := range ar.alerts {
|
||||
nameStr := fmt.Sprintf("%s=%q", alertNameLabel, ar.Name)
|
||||
if !*disableAlertGroupLabel {
|
||||
nameStr = fmt.Sprintf("%s=%q,%s=%q", alertGroupNameLabel, ar.GroupName, alertNameLabel, ar.Name)
|
||||
}
|
||||
var labelsFilter string
|
||||
for k, v := range ar.Labels {
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s%s}[%ds])",
|
||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||
}
|
||||
|
||||
if len(res.Data) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
return nil
|
||||
}
|
||||
for _, series := range res.Data {
|
||||
series.DelLabel("__name__")
|
||||
labelSet := make(map[string]string, len(series.Labels))
|
||||
for _, v := range series.Labels {
|
||||
labelSet[v.Name] = v.Value
|
||||
}
|
||||
id := hash(labelSet)
|
||||
a, ok := ar.alerts[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if a.Restored || a.State != notifier.StatePending {
|
||||
continue
|
||||
}
|
||||
|
||||
var labelsFilter []string
|
||||
for k, v := range a.Labels {
|
||||
labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
|
||||
}
|
||||
sort.Strings(labelsFilter)
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
|
||||
alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
|
||||
|
||||
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
qMetrics := res.Data
|
||||
if len(qMetrics) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
continue
|
||||
}
|
||||
|
||||
// only one series expected in response
|
||||
m := qMetrics[0]
|
||||
// __name__ supposed to be alertForStateMetricName
|
||||
m.DelLabel("__name__")
|
||||
|
||||
// we assume that restore query contains all label matchers,
|
||||
// so all received labels will match anyway if their number is equal.
|
||||
if len(m.Labels) != len(a.Labels) {
|
||||
ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
|
||||
a.ActiveAt = time.Unix(int64(series.Values[0]), 0)
|
||||
a.Restored = true
|
||||
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package rule
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
@@ -346,15 +348,18 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
data []datasource.Metric
|
||||
expAlerts []*notifier.Alert
|
||||
rule *AlertingRule
|
||||
data []datasource.Metric
|
||||
expAlerts []*notifier.Alert
|
||||
expHoldAlertStateAlerts map[uint64]*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestAlertingRule("empty", 0),
|
||||
[]datasource.Metric{},
|
||||
nil,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("empty labels", 0),
|
||||
@@ -364,6 +369,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing", 0),
|
||||
@@ -376,6 +382,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
State: notifier.StateFiring,
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing-on-range", 0),
|
||||
@@ -387,6 +394,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending", time.Second),
|
||||
@@ -398,6 +406,16 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-pending"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-pending",
|
||||
Labels: map[string]string{"alertname": "for-pending"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-firing", 3*time.Second),
|
||||
@@ -409,6 +427,38 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-firing"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-firing",
|
||||
Labels: map[string]string{"alertname": "for-firing"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-hold-pending", time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 2, 5}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-hold-pending"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-hold-pending",
|
||||
Labels: map[string]string{"alertname": "for-hold-pending"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
|
||||
@@ -422,9 +472,10 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
|
||||
newTestAlertingRule("multi-series", 3*time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
||||
{
|
||||
@@ -436,7 +487,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
//
|
||||
{
|
||||
State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
|
||||
Labels: map[string]string{
|
||||
@@ -450,6 +500,29 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{"alertname": "multi-series"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "multi-series",
|
||||
Labels: map[string]string{"alertname": "multi-series"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
},
|
||||
hash(map[string]string{"alertname": "multi-series", "foo": "bar"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "multi-series",
|
||||
Labels: map[string]string{"alertname": "multi-series", "foo": "bar"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
|
||||
@@ -477,16 +550,16 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
"source": "vm",
|
||||
}},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
tc.rule.q = fq
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
fq.Add(tc.data...)
|
||||
gotTS, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
|
||||
gotTS, err := tc.rule.execRange(context.TODO(), time.Unix(1, 0), time.Unix(5, 0))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
@@ -512,6 +585,11 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
|
||||
}
|
||||
}
|
||||
if tc.expHoldAlertStateAlerts != nil {
|
||||
if !reflect.DeepEqual(tc.expHoldAlertStateAlerts, tc.rule.alerts) {
|
||||
t.Fatalf("expected hold alerts state: \n%v but got \n%v", tc.expHoldAlertStateAlerts, tc.rule.alerts)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -564,6 +642,9 @@ func TestGroup_Restore(t *testing.T) {
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
if got.Name != exp.Name {
|
||||
t.Fatalf("expected alertname %q; got %q", exp.Name, got.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -579,6 +660,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
@@ -592,6 +674,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -599,7 +682,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||
// two rules, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
stateMetric("bar", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
@@ -607,9 +690,11 @@ func TestGroup_Restore(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "bar",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -627,9 +712,11 @@ func TestGroup_Restore(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "bar",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -642,6 +729,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
@@ -654,6 +742,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -666,6 +755,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
@@ -678,14 +768,16 @@ func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
ar.q = fq
|
||||
|
||||
// successful attempt
|
||||
// label `job` will be overridden by rule extra label, the original value will be reserved by "exported_job"
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
_, err := ar.exec(context.TODO(), time.Now(), 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// label `job` will collide with rule extra label and will make both time series equal
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
// label `__name__` will be omitted and get duplicated results here
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo_1", "job", "bar"))
|
||||
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
||||
if !errors.Is(err, errDuplicate) {
|
||||
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
||||
@@ -809,20 +901,22 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "foo"}): {
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "override label",
|
||||
"exported_alertname": "override",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `first: Too high connection number for "foo"`,
|
||||
"description": `override: It is 2 connections for "foo"`,
|
||||
},
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "bar"}): {
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "bar"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"instance": "bar",
|
||||
alertNameLabel: "override label",
|
||||
"exported_alertname": "override",
|
||||
"instance": "bar",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `second: Too high connection number for "bar"`,
|
||||
@@ -851,14 +945,18 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"exported_alertgroup": "originGroupname",
|
||||
"instance": "foo",
|
||||
}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"exported_alertgroup": "originGroupname",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||
@@ -990,6 +1088,9 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
metrics: &alertingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{alertname=%q}`, name)),
|
||||
},
|
||||
}
|
||||
return &rule
|
||||
}
|
||||
@@ -999,3 +1100,54 @@ func newTestAlertingRuleWithKeepFiring(name string, waitFor, keepFiringFor time.
|
||||
rule.KeepFiringFor = keepFiringFor
|
||||
return rule
|
||||
}
|
||||
|
||||
func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
metric := datasource.Metric{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "instance", Value: "0.0.0.0:8800"},
|
||||
{Name: "group", Value: "vmalert"},
|
||||
{Name: "alertname", Value: "ConfigurationReloadFailure"},
|
||||
},
|
||||
Values: []float64{1},
|
||||
Timestamps: []int64{time.Now().UnixNano()},
|
||||
}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Labels: map[string]string{
|
||||
"instance": "override", // this should override instance with new value
|
||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||
},
|
||||
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
||||
Name: "AlertingRulesError",
|
||||
GroupName: "vmalert",
|
||||
}
|
||||
|
||||
expectedOriginLabels := map[string]string{
|
||||
"instance": "0.0.0.0:8800",
|
||||
"group": "vmalert",
|
||||
"alertname": "ConfigurationReloadFailure",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
expectedProcessedLabels := map[string]string{
|
||||
"instance": "override",
|
||||
"exported_instance": "0.0.0.0:8800",
|
||||
"alertname": "AlertingRulesError",
|
||||
"exported_alertname": "ConfigurationReloadFailure",
|
||||
"group": "vmalert",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
ls, err := ar.toLabels(metric, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
||||
t.Errorf("origin labels mismatch, got: %v, want: %v", ls.origin, expectedOriginLabels)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.processed, expectedProcessedLabels) {
|
||||
t.Errorf("processed labels mismatch, got: %v, want: %v", ls.processed, expectedProcessedLabels)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +31,9 @@ var (
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||
"which by default is 4 times evaluationInterval of the parent ")
|
||||
"which by default is 4 times evaluationInterval of the parent group")
|
||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the `time` parameter for rule evaluation requests to compensate intentional data delay from the datasource."+
|
||||
"Normally, should be equal to `-search.latencyOffset` (cmd-line flag configured for VictoriaMetrics single-node or vmselect).")
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
@@ -39,13 +41,16 @@ var (
|
||||
|
||||
// Group is an entity for grouping rules
|
||||
type Group struct {
|
||||
mu sync.RWMutex
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type config.Type
|
||||
Interval time.Duration
|
||||
EvalOffset *time.Duration
|
||||
mu sync.RWMutex
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type config.Type
|
||||
Interval time.Duration
|
||||
EvalOffset *time.Duration
|
||||
// EvalDelay will adjust timestamp for rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *time.Duration
|
||||
Limit int
|
||||
Concurrency int
|
||||
Checksum string
|
||||
@@ -139,6 +144,9 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
if cfg.EvalOffset != nil {
|
||||
g.EvalOffset = &cfg.EvalOffset.D
|
||||
}
|
||||
if cfg.EvalDelay != nil {
|
||||
g.EvalDelay = &cfg.EvalDelay.D
|
||||
}
|
||||
for _, h := range cfg.Headers {
|
||||
g.Headers[h.Key] = h.Value
|
||||
}
|
||||
@@ -331,7 +339,7 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
g.infof("started")
|
||||
@@ -520,7 +528,7 @@ func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
if len(g.Rules) < 1 {
|
||||
return nil
|
||||
@@ -581,10 +589,14 @@ func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||
// to 10:30, to the previous evaluationInterval.
|
||||
return ts.Add(-g.Interval)
|
||||
}
|
||||
// EvalOffset shouldn't interfere with evalAlignment,
|
||||
// so we return it immediately
|
||||
// when `eval_offset` is using, ts shouldn't be effect by `eval_alignment` and `eval_delay`
|
||||
// since it should be always aligned.
|
||||
return ts
|
||||
}
|
||||
|
||||
timestamp = timestamp.Add(-g.getEvalDelay())
|
||||
|
||||
// always apply the alignment as a last step
|
||||
if g.evalAlignment == nil || *g.evalAlignment {
|
||||
// align query time with interval to get similar result with grafana when plotting time series.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
@@ -594,6 +606,13 @@ func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||
return timestamp
|
||||
}
|
||||
|
||||
func (g *Group) getEvalDelay() time.Duration {
|
||||
if g.EvalDelay != nil {
|
||||
return *g.EvalDelay
|
||||
}
|
||||
return *evalDelay
|
||||
}
|
||||
|
||||
// executor contains group's notify and rw configs
|
||||
type executor struct {
|
||||
Notifiers func() []notifier.Notifier
|
||||
@@ -602,11 +621,11 @@ type executor struct {
|
||||
Rw remotewrite.RWClient
|
||||
|
||||
previouslySentSeriesToRWMu sync.Mutex
|
||||
// PreviouslySentSeriesToRW stores series sent to RW on previous iteration
|
||||
// previouslySentSeriesToRW stores series sent to RW on previous iteration
|
||||
// map[ruleID]map[ruleLabels][]prompb.Label
|
||||
// where `ruleID` is ID of the Rule within a Group
|
||||
// and `ruleLabels` is []prompb.Label marshalled to a string
|
||||
PreviouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
|
||||
previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
|
||||
}
|
||||
|
||||
// execConcurrently executes rules concurrently if concurrency>1
|
||||
@@ -644,9 +663,6 @@ var (
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
remoteWriteTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||
)
|
||||
|
||||
func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
|
||||
@@ -667,9 +683,7 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
|
||||
var lastErr error
|
||||
for _, ts := range tss {
|
||||
remoteWriteTotal.Inc()
|
||||
if err := e.Rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
lastErr = fmt.Errorf("rule %q: remote write failure: %w", r, err)
|
||||
}
|
||||
}
|
||||
@@ -723,7 +737,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
|
||||
var staleS []prompbmarshal.TimeSeries
|
||||
// check whether there are series which disappeared and need to be marked as stale
|
||||
e.previouslySentSeriesToRWMu.Lock()
|
||||
for key, labels := range e.PreviouslySentSeriesToRW[rID] {
|
||||
for key, labels := range e.previouslySentSeriesToRW[rID] {
|
||||
if _, ok := ruleLabels[key]; ok {
|
||||
continue
|
||||
}
|
||||
@@ -732,7 +746,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
|
||||
staleS = append(staleS, ss)
|
||||
}
|
||||
// set previous series to current
|
||||
e.PreviouslySentSeriesToRW[rID] = ruleLabels
|
||||
e.previouslySentSeriesToRW[rID] = ruleLabels
|
||||
e.previouslySentSeriesToRWMu.Unlock()
|
||||
|
||||
return staleS
|
||||
@@ -750,14 +764,14 @@ func (e *executor) purgeStaleSeries(activeRules []Rule) {
|
||||
|
||||
for _, rule := range activeRules {
|
||||
id := rule.ID()
|
||||
prev, ok := e.PreviouslySentSeriesToRW[id]
|
||||
prev, ok := e.previouslySentSeriesToRW[id]
|
||||
if ok {
|
||||
// keep previous series for staleness detection
|
||||
newPreviouslySentSeriesToRW[id] = prev
|
||||
}
|
||||
}
|
||||
e.PreviouslySentSeriesToRW = nil
|
||||
e.PreviouslySentSeriesToRW = newPreviouslySentSeriesToRW
|
||||
e.previouslySentSeriesToRW = nil
|
||||
e.previouslySentSeriesToRW = newPreviouslySentSeriesToRW
|
||||
|
||||
e.previouslySentSeriesToRWMu.Unlock()
|
||||
}
|
||||
|
||||
@@ -321,7 +321,7 @@ func TestResolveDuration(t *testing.T) {
|
||||
func TestGetStaleSeries(t *testing.T) {
|
||||
ts := time.Now()
|
||||
e := &executor{
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
|
||||
t.Helper()
|
||||
@@ -414,7 +414,7 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||
f := func(curRules, newRules, expStaleRules []Rule) {
|
||||
t.Helper()
|
||||
e := &executor{
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
// seed executor with series for
|
||||
// current rules
|
||||
@@ -424,13 +424,13 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||
|
||||
e.purgeStaleSeries(newRules)
|
||||
|
||||
if len(e.PreviouslySentSeriesToRW) != len(expStaleRules) {
|
||||
if len(e.previouslySentSeriesToRW) != len(expStaleRules) {
|
||||
t.Fatalf("expected to get %d stale series, got %d",
|
||||
len(expStaleRules), len(e.PreviouslySentSeriesToRW))
|
||||
len(expStaleRules), len(e.previouslySentSeriesToRW))
|
||||
}
|
||||
|
||||
for _, exp := range expStaleRules {
|
||||
if _, ok := e.PreviouslySentSeriesToRW[exp.ID()]; !ok {
|
||||
if _, ok := e.previouslySentSeriesToRW[exp.ID()]; !ok {
|
||||
t.Fatalf("expected to have rule %d; got nil instead", exp.ID())
|
||||
}
|
||||
}
|
||||
@@ -515,7 +515,7 @@ func TestFaultyRW(t *testing.T) {
|
||||
|
||||
e := &executor{
|
||||
Rw: &remotewrite.Client{},
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
err := e.exec(context.Background(), r, time.Now(), 0, 10)
|
||||
@@ -628,6 +628,7 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
|
||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
offset := 30 * time.Minute
|
||||
evalDelay := 1 * time.Minute
|
||||
disableAlign := false
|
||||
testCases := []struct {
|
||||
name string
|
||||
@@ -635,7 +636,7 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
originTS, expTS string
|
||||
}{
|
||||
{
|
||||
"with query align",
|
||||
"with query align + default evalDelay",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
},
|
||||
@@ -643,16 +644,16 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"without query align",
|
||||
"without query align + default evalDelay",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
evalAlignment: &disableAlign,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:10:30+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset, find previous offset point",
|
||||
"with eval_offset, find previous offset point + default evalDelay",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
@@ -661,7 +662,7 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
"2023-08-28T10:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset",
|
||||
"with eval_offset + default evalDelay",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
@@ -669,14 +670,44 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"1h interval with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"1m interval with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Minute,
|
||||
},
|
||||
"2023-08-28T11:41:13+00:00",
|
||||
"2023-08-28T11:40:00+00:00",
|
||||
},
|
||||
{
|
||||
"disable alignment with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Hour,
|
||||
evalAlignment: &disableAlign,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:40:00+00:00",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
originT, _ := time.Parse(time.RFC3339, tc.originTS)
|
||||
expT, _ := time.Parse(time.RFC3339, tc.expTS)
|
||||
gotTS := tc.g.adjustReqTimestamp(originT)
|
||||
if !gotTS.Equal(expT) {
|
||||
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
|
||||
}
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
originT, _ := time.Parse(time.RFC3339, tc.originTS)
|
||||
expT, _ := time.Parse(time.RFC3339, tc.expTS)
|
||||
gotTS := tc.g.adjustReqTimestamp(originT)
|
||||
if !gotTS.Equal(expT) {
|
||||
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,12 +17,14 @@ import (
|
||||
// to evaluate configured Expression and
|
||||
// return TimeSeries as result.
|
||||
type RecordingRule struct {
|
||||
Type config.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
Labels map[string]string
|
||||
GroupID uint64
|
||||
Type config.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
Labels map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
File string
|
||||
|
||||
q datasource.Querier
|
||||
|
||||
@@ -34,7 +36,7 @@ type RecordingRule struct {
|
||||
}
|
||||
|
||||
type recordingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
errors *utils.Counter
|
||||
samples *utils.Gauge
|
||||
}
|
||||
|
||||
@@ -52,13 +54,15 @@ func (rr *RecordingRule) ID() uint64 {
|
||||
// NewRecordingRule creates a new RecordingRule
|
||||
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
||||
rr := &RecordingRule{
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
File: group.File,
|
||||
metrics: &recordingRuleMetrics{},
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
@@ -78,15 +82,8 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
entries: make([]StateEntry, entrySize),
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
if e.Err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_recording_rules_errors_total{%s}`, labels))
|
||||
rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
@@ -138,6 +135,9 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
|
||||
defer func() {
|
||||
rr.state.add(curState)
|
||||
if curState.Err != nil {
|
||||
rr.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
@@ -194,6 +194,9 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSer
|
||||
labels["__name__"] = rr.Name
|
||||
// override existing labels with configured ones
|
||||
for k, v := range rr.Labels {
|
||||
if _, ok := labels[k]; ok && labels[k] != v {
|
||||
labels[fmt.Sprintf("exported_%s", k)] = labels[k]
|
||||
}
|
||||
labels[k] = v
|
||||
}
|
||||
return newTimeSeries(m.Values, m.Timestamps, labels)
|
||||
@@ -203,7 +206,7 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSer
|
||||
func (rr *RecordingRule) updateWith(r Rule) error {
|
||||
nr, ok := r.(*RecordingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("BUG: attempt to update recroding rule with wrong type %#v", r)
|
||||
return fmt.Errorf("BUG: attempt to update recording rule with wrong type %#v", r)
|
||||
}
|
||||
rr.Expr = nr.Expr
|
||||
rr.Labels = nr.Labels
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
@@ -60,7 +61,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin"),
|
||||
},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
@@ -69,9 +70,10 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
"source": "test",
|
||||
}),
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": "job:foo",
|
||||
"job": "bar",
|
||||
"source": "test",
|
||||
"__name__": "job:foo",
|
||||
"job": "bar",
|
||||
"source": "test",
|
||||
"exported_source": "origin",
|
||||
}),
|
||||
},
|
||||
},
|
||||
@@ -201,9 +203,15 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
||||
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
||||
}
|
||||
rule := &RecordingRule{Name: "job:foo", state: &ruleState{entries: make([]StateEntry, 10)}, Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
}}
|
||||
rule := &RecordingRule{Name: "job:foo",
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
},
|
||||
metrics: &recordingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(`vmalert_recording_rules_errors_total{alertname="job:foo"}`),
|
||||
},
|
||||
}
|
||||
var err error
|
||||
for _, testCase := range testCases {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
@@ -223,6 +231,9 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||
"job": "test",
|
||||
},
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
metrics: &recordingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(`vmalert_recording_rules_errors_total{alertname="job:foo"}`),
|
||||
},
|
||||
}
|
||||
fq := &datasource.FakeQuerier{}
|
||||
expErr := "connection reset by peer"
|
||||
@@ -244,10 +255,7 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||
fq.Add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
|
||||
|
||||
_, err = rr.exec(context.TODO(), time.Now(), 0)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to get err; got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), errDuplicate.Error()) {
|
||||
t.Fatalf("expected to get err %q; got %q insterad", errDuplicate, err)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,26 +43,26 @@ type ruleState struct {
|
||||
// StateEntry stores rule's execution states
|
||||
type StateEntry struct {
|
||||
// stores last moment of time rule.Exec was called
|
||||
Time time.Time
|
||||
Time time.Time `json:"time"`
|
||||
// stores the timesteamp with which rule.Exec was called
|
||||
At time.Time
|
||||
At time.Time `json:"at"`
|
||||
// stores the duration of the last rule.Exec call
|
||||
Duration time.Duration
|
||||
Duration time.Duration `json:"duration"`
|
||||
// stores last error that happened in Exec func
|
||||
// resets on every successful Exec
|
||||
// may be used as Health ruleState
|
||||
Err error
|
||||
Err error `json:"error"`
|
||||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
Samples int
|
||||
Samples int `json:"samples"`
|
||||
// stores the number of time series fetched during
|
||||
// the last evaluation.
|
||||
// Is supported by VictoriaMetrics only, starting from v1.90.0
|
||||
// If seriesFetched == nil, then this attribute was missing in
|
||||
// datasource response (unsupported).
|
||||
SeriesFetched *int
|
||||
SeriesFetched *int `json:"series_fetched"`
|
||||
// stores the curl command reflecting the HTTP request used during rule.Exec
|
||||
Curl string
|
||||
Curl string `json:"curl"`
|
||||
}
|
||||
|
||||
// GetLastEntry returns latest stateEntry of rule
|
||||
@@ -166,7 +166,7 @@ func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRul
|
||||
var n int
|
||||
for _, ts := range tss {
|
||||
if err := rw.Push(ts); err != nil {
|
||||
return n, fmt.Errorf("remote write failure: %s", err)
|
||||
return n, fmt.Errorf("remote write failure: %w", err)
|
||||
}
|
||||
n += len(ts.Samples)
|
||||
}
|
||||
|
||||
@@ -45,13 +45,14 @@ func WithBearer(token, tokenFile string) AuthConfigOptions {
|
||||
}
|
||||
|
||||
// WithOAuth returns AuthConfigOptions and set OAuth params based on given params
|
||||
func WithOAuth(clientID, clientSecret, clientSecretFile, tokenURL, scopes string) AuthConfigOptions {
|
||||
func WithOAuth(clientID, clientSecret, clientSecretFile, tokenURL, scopes string, endpointParams map[string]string) AuthConfigOptions {
|
||||
return func(config *promauth.HTTPClientConfig) {
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
config.OAuth2 = &promauth.OAuth2Config{
|
||||
ClientID: clientID,
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: tokenURL,
|
||||
Scopes: strings.Split(scopes, ";"),
|
||||
}
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 73 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 151 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 122 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 80 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 62 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 109 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user