Compare commits
425 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fcc8b14f86 | ||
|
|
26488726a8 | ||
|
|
a090de492c | ||
|
|
6939c53e48 | ||
|
|
c12bdd6c28 | ||
|
|
81b5db04f6 | ||
|
|
300d701df0 | ||
|
|
f768d5d797 | ||
|
|
17f8ed8948 | ||
|
|
ea2752ce62 | ||
|
|
32e60fe09d | ||
|
|
adf585f7ed | ||
|
|
bc7cf4950b | ||
|
|
a20c289228 | ||
|
|
c2373a8109 | ||
|
|
7007c6a760 | ||
|
|
583b6fe1e7 | ||
|
|
431aa16c8d | ||
|
|
e7844f2efd | ||
|
|
b2434ec340 | ||
|
|
5d66ee88bd | ||
|
|
b9b18b5fd8 | ||
|
|
b4aef0c141 | ||
|
|
b5978ed8f9 | ||
|
|
24eb1ad0c8 | ||
|
|
98b805544e | ||
|
|
c23e8bee89 | ||
|
|
9b555a0034 | ||
|
|
6c6c2c185f | ||
|
|
c20d68e28d | ||
|
|
491287ed15 | ||
|
|
4a9f8f4cb0 | ||
|
|
0ed291102d | ||
|
|
64780f4f02 | ||
|
|
1a6c3370bf | ||
|
|
b9dcaaa7f8 | ||
|
|
6ee1bfeb3c | ||
|
|
aaa526e8ff | ||
|
|
df59ac7f0e | ||
|
|
a7b11eff7c | ||
|
|
3bce55be0c | ||
|
|
bb7a419cc3 | ||
|
|
97937d58c4 | ||
|
|
3e0a117ddf | ||
|
|
e84c877503 | ||
|
|
7de19c3748 | ||
|
|
5a8daa725e | ||
|
|
2655c02d5e | ||
|
|
8e03bc6b53 | ||
|
|
4ac7e3a355 | ||
|
|
32c064a401 | ||
|
|
60fc2da6c1 | ||
|
|
25165656bb | ||
|
|
41e99765cc | ||
|
|
bc033a2b30 | ||
|
|
105cb44884 | ||
|
|
9ded04e643 | ||
|
|
fae801edd3 | ||
|
|
2582b1e15a | ||
|
|
b11f4ef5ea | ||
|
|
a95246d885 | ||
|
|
d71218d6ce | ||
|
|
e29fe0933b | ||
|
|
3c0aa14b5b | ||
|
|
56310ffb47 | ||
|
|
495fa9800a | ||
|
|
d5682858c0 | ||
|
|
c3a585cfe5 | ||
|
|
806c07ddd5 | ||
|
|
18df07e824 | ||
|
|
ac5b740750 | ||
|
|
ef12598ad4 | ||
|
|
4d961c70f7 | ||
|
|
f888a019fe | ||
|
|
fa566c68a6 | ||
|
|
5543c04061 | ||
|
|
8fb8b71295 | ||
|
|
1c58c00618 | ||
|
|
43ecd5d258 | ||
|
|
ae643ef1f1 | ||
|
|
05c9a4d7ce | ||
|
|
6c214397ed | ||
|
|
4d78954158 | ||
|
|
6d84b1beef | ||
|
|
41456d9569 | ||
|
|
1f1768d7af | ||
|
|
fac7c30f4e | ||
|
|
89e3c70ccd | ||
|
|
1c5163ae51 | ||
|
|
2adb38a9c4 | ||
|
|
15a15e5b99 | ||
|
|
114822d585 | ||
|
|
bf4742526d | ||
|
|
38231d5994 | ||
|
|
eb6def0695 | ||
|
|
633e6b48ad | ||
|
|
980338861f | ||
|
|
bc7d19c8ca | ||
|
|
9240bc36a3 | ||
|
|
e0399ec29a | ||
|
|
72a838a2a1 | ||
|
|
5dd37ad836 | ||
|
|
7345567c29 | ||
|
|
678234e9f0 | ||
|
|
508c608062 | ||
|
|
ffaf48b99e | ||
|
|
b606521745 | ||
|
|
3449d563bd | ||
|
|
9b4294e53e | ||
|
|
8b8d0e3677 | ||
|
|
b25ef138ce | ||
|
|
0e5e502b3c | ||
|
|
38b2a5bc44 | ||
|
|
1075fcfc8c | ||
|
|
da556cc329 | ||
|
|
df197723ae | ||
|
|
d3ee3e0ef5 | ||
|
|
9c0863babc | ||
|
|
1c7f990fad | ||
|
|
3f7ed7e6b2 | ||
|
|
4e3242b02d | ||
|
|
1f105dde98 | ||
|
|
7e68722686 | ||
|
|
0038102b98 | ||
|
|
0b2ea1a7c7 | ||
|
|
3d83f3347d | ||
|
|
4eb9926125 | ||
|
|
12f2c5679b | ||
|
|
90768aa418 | ||
|
|
b3598ba2c1 | ||
|
|
3ea1294ad2 | ||
|
|
7fba73ce11 | ||
|
|
fad212c39c | ||
|
|
c9f39fd51f | ||
|
|
8ab0ce3ded | ||
|
|
74448a7e57 | ||
|
|
873483a782 | ||
|
|
cfec258803 | ||
|
|
6a2a8cd426 | ||
|
|
dfa43da1a2 | ||
|
|
1af5faa4af | ||
|
|
5e17636994 | ||
|
|
c425ec3088 | ||
|
|
ec85d32e21 | ||
|
|
7e374c227f | ||
|
|
69ae1d30bf | ||
|
|
0a5ffb3bc1 | ||
|
|
f89d16fc4c | ||
|
|
ff33e60a3d | ||
|
|
dab160cd74 | ||
|
|
a3b3ea4d73 | ||
|
|
9a353ee695 | ||
|
|
0c06934a59 | ||
|
|
5b419cfb2b | ||
|
|
71681fd1ca | ||
|
|
cf03e11d89 | ||
|
|
5bdf62de5b | ||
|
|
3c3450fc53 | ||
|
|
befcd93305 | ||
|
|
cc6819869a | ||
|
|
8040bdc1d6 | ||
|
|
3a1ef3184d | ||
|
|
f3c5687a04 | ||
|
|
1683df11f0 | ||
|
|
f2f0468ae7 | ||
|
|
ecce2d6db1 | ||
|
|
0f39c0e897 | ||
|
|
41932db848 | ||
|
|
c830064c2f | ||
|
|
b0287867fe | ||
|
|
a9fd130980 | ||
|
|
30d77393a5 | ||
|
|
4e4d7f4cbe | ||
|
|
19c04549a5 | ||
|
|
db6495560c | ||
|
|
4073bb3303 | ||
|
|
d365157381 | ||
|
|
846d5a3ab8 | ||
|
|
481471b872 | ||
|
|
a74f6d63e0 | ||
|
|
6eae3f6c8a | ||
|
|
9d886a2eb0 | ||
|
|
b49b8fed3c | ||
|
|
3ac44baebe | ||
|
|
d0e4190969 | ||
|
|
388d020b7c | ||
|
|
ce4f26db02 | ||
|
|
190a6565ae | ||
|
|
7fc2bd0412 | ||
|
|
bfa73ebdf3 | ||
|
|
51cdf3676b | ||
|
|
74219a1727 | ||
|
|
041a1966c5 | ||
|
|
191e322879 | ||
|
|
544da241e8 | ||
|
|
0c78b891b0 | ||
|
|
f51b7fda8e | ||
|
|
4b42c8abbb | ||
|
|
e14e3d9c8c | ||
|
|
5106045048 | ||
|
|
be509b3995 | ||
|
|
9fd20202e1 | ||
|
|
03a97dc678 | ||
|
|
4b8088e377 | ||
|
|
70cd09e736 | ||
|
|
d2c94a0663 | ||
|
|
a47127c1a6 | ||
|
|
c005245741 | ||
|
|
f2229c2e42 | ||
|
|
f405384c8c | ||
|
|
dd25049858 | ||
|
|
0597718435 | ||
|
|
828aca82e9 | ||
|
|
e5a767cff8 | ||
|
|
eae585e8de | ||
|
|
d374595e31 | ||
|
|
91ccea236f | ||
|
|
fe2d9f6646 | ||
|
|
b79d4cc988 | ||
|
|
e34f77aed4 | ||
|
|
bbea02f82b | ||
|
|
fdefc8a816 | ||
|
|
095d982976 | ||
|
|
105c6b2eb7 | ||
|
|
33df9bee22 | ||
|
|
463455665b | ||
|
|
eb08f5c7e5 | ||
|
|
1aa39efec1 | ||
|
|
07e5d6f0fb | ||
|
|
f75874f5df | ||
|
|
aecfabe318 | ||
|
|
47307c7a37 | ||
|
|
d0ca448093 | ||
|
|
35dd6e5e8e | ||
|
|
52692d001a | ||
|
|
95edeffbc6 | ||
|
|
910a39ad72 | ||
|
|
1f477aba41 | ||
|
|
be20501376 | ||
|
|
43d7de4afe | ||
|
|
34b69dcf58 | ||
|
|
8ba483eca3 | ||
|
|
7575f5c501 | ||
|
|
b4ba8d0d76 | ||
|
|
9678235eea | ||
|
|
fb90a56de2 | ||
|
|
8c1dcf4743 | ||
|
|
01f9edda64 | ||
|
|
160cc9debd | ||
|
|
67160d08a2 | ||
|
|
5ebd5a0d7b | ||
|
|
7a31f8a6c9 | ||
|
|
7cfde237ec | ||
|
|
5a88bc973f | ||
|
|
a35e52114b | ||
|
|
df012f1553 | ||
|
|
326a77c697 | ||
|
|
bc3feebf69 | ||
|
|
11e2d41c77 | ||
|
|
afaf7f0b74 | ||
|
|
4b529562ce | ||
|
|
873f0deaa6 | ||
|
|
0379a0eb82 | ||
|
|
5ddccbc2b9 | ||
|
|
779bbc2e91 | ||
|
|
664fa5cb78 | ||
|
|
317834f876 | ||
|
|
9253c24dd6 | ||
|
|
cd277e3f84 | ||
|
|
0a6a2e455d | ||
|
|
5896fb129d | ||
|
|
0b0f565c31 | ||
|
|
72dbd24b22 | ||
|
|
df88baef07 | ||
|
|
66c76a4d4d | ||
|
|
2afb068f0f | ||
|
|
68be182075 | ||
|
|
6f15ca4a16 | ||
|
|
463a6e9ac6 | ||
|
|
304fe05650 | ||
|
|
7df8d19831 | ||
|
|
1a5cdb4790 | ||
|
|
0f91f83639 | ||
|
|
242472086b | ||
|
|
6af732b6f7 | ||
|
|
e0fc5ef140 | ||
|
|
1e02efd511 | ||
|
|
39c405ed4d | ||
|
|
27668c9d01 | ||
|
|
e13dc04fbf | ||
|
|
8fb68152e6 | ||
|
|
75196d7234 | ||
|
|
51df2248f0 | ||
|
|
635da5fab7 | ||
|
|
ce8ae450fc | ||
|
|
6d03779870 | ||
|
|
a5bc9d93cc | ||
|
|
3d3b0e31e0 | ||
|
|
b1fed78e0b | ||
|
|
c7504daa7a | ||
|
|
042267541f | ||
|
|
b05e1512d4 | ||
|
|
1065deccf8 | ||
|
|
8efe694160 | ||
|
|
74b09ab4de | ||
|
|
6acf28715b | ||
|
|
adc69b872c | ||
|
|
02a0a7f428 | ||
|
|
e373bb84d5 | ||
|
|
802adf3b65 | ||
|
|
b39e9257eb | ||
|
|
cb90d09c9d | ||
|
|
3a8b9cc81e | ||
|
|
7cb8ed8271 | ||
|
|
efbe25a678 | ||
|
|
67468a0c46 | ||
|
|
935bec447b | ||
|
|
65bc460323 | ||
|
|
e4f5039509 | ||
|
|
97373b7786 | ||
|
|
17e2b4f814 | ||
|
|
06c73df55a | ||
|
|
bc550e22d7 | ||
|
|
fdbbbf33ca | ||
|
|
902f1e5fdc | ||
|
|
0160435802 | ||
|
|
a28cc6ebec | ||
|
|
17900e39d7 | ||
|
|
d1aa15688a | ||
|
|
7c7a32efd7 | ||
|
|
f5c4fcc250 | ||
|
|
3532f52f4b | ||
|
|
c7a2e4e90a | ||
|
|
41291b6290 | ||
|
|
f62e03b3d2 | ||
|
|
487f6380d0 | ||
|
|
e1359c904c | ||
|
|
82a6e4efe5 | ||
|
|
760a530305 | ||
|
|
1911320c86 | ||
|
|
8eddccfbb4 | ||
|
|
837f6f0975 | ||
|
|
ec5b72c879 | ||
|
|
ac65c6b178 | ||
|
|
41f7940f97 | ||
|
|
7ca783dee9 | ||
|
|
48228031e4 | ||
|
|
98d0f81f21 | ||
|
|
543f218fe9 | ||
|
|
2291958648 | ||
|
|
5424632ba3 | ||
|
|
d7897e0d70 | ||
|
|
8a0bb4bf17 | ||
|
|
d024fcf37f | ||
|
|
5e38dde18d | ||
|
|
f42ec79958 | ||
|
|
c5b5895162 | ||
|
|
3d57cb3234 | ||
|
|
255bede1f2 | ||
|
|
fc2e7a30b3 | ||
|
|
48f0aa8483 | ||
|
|
5034aa0773 | ||
|
|
090cb2c9de | ||
|
|
a7800cdb95 | ||
|
|
2cd9cda12c | ||
|
|
0cf55ded34 | ||
|
|
06d2d933fb | ||
|
|
752f89f13f | ||
|
|
93b8bf66aa | ||
|
|
1831c731a3 | ||
|
|
e1fb9d9230 | ||
|
|
348482c575 | ||
|
|
0b0776a440 | ||
|
|
334a739ff6 | ||
|
|
0715b4e121 | ||
|
|
389f34cb57 | ||
|
|
3a15c9ffb3 | ||
|
|
2b420b5c0a | ||
|
|
fb835ad658 | ||
|
|
d493da562e | ||
|
|
5d1ce9891b | ||
|
|
74fda0b311 | ||
|
|
95f12c7e28 | ||
|
|
e96b4410a1 | ||
|
|
c160a49908 | ||
|
|
a007a5a8a4 | ||
|
|
f3d47c3dc3 | ||
|
|
ba803a7cd2 | ||
|
|
81ddee4f3a | ||
|
|
fbab838dc0 | ||
|
|
cbe4a5c251 | ||
|
|
1ef6b7f32b | ||
|
|
20025d4fd6 | ||
|
|
3ffa8975d4 | ||
|
|
d3f919df3e | ||
|
|
4b7e6b36ce | ||
|
|
ae3107153c | ||
|
|
34a26397d7 | ||
|
|
75059f3feb | ||
|
|
727709da67 | ||
|
|
faee0e43d1 | ||
|
|
c8e6e47e2a | ||
|
|
657f3bdd21 | ||
|
|
3545633934 | ||
|
|
e9d86d7e52 | ||
|
|
aefd744abb | ||
|
|
aae06e003e | ||
|
|
84b3c1d3bc | ||
|
|
2ea03cf80d | ||
|
|
1fbd0dd9d8 | ||
|
|
8924fea33a | ||
|
|
8dfc874be3 | ||
|
|
61035419d5 | ||
|
|
2cbdb1db22 | ||
|
|
d389a4fcf3 | ||
|
|
ec7cac3641 | ||
|
|
0a28c8e91b | ||
|
|
98e73a4022 | ||
|
|
17c45d1206 | ||
|
|
85bf63078c | ||
|
|
7d4873bcef | ||
|
|
a79a439dac | ||
|
|
4c25ee3597 | ||
|
|
ebd76588da | ||
|
|
9b248e3b2f |
4
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -60,8 +60,8 @@ body:
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176/)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
|
||||
26
.github/workflows/check-licenses.yml
vendored
@@ -14,13 +14,25 @@ jobs:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||
|
||||
- name: Check License
|
||||
run: |
|
||||
make check-licenses
|
||||
run: make check-licenses
|
||||
|
||||
4
.github/workflows/codeql-analysis-js.yml
vendored
@@ -36,11 +36,11 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "javascript"
|
||||
|
||||
25
.github/workflows/codeql-analysis.yml
vendored
@@ -55,16 +55,27 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -75,7 +86,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@@ -89,4 +100,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
uses: github/codeql-action/analyze@v3
|
||||
|
||||
100
.github/workflows/main.yml
vendored
@@ -7,6 +7,8 @@ on:
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
@@ -14,6 +16,8 @@ on:
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -30,18 +34,55 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-all-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-all-
|
||||
|
||||
- name: Run check-all
|
||||
run: |
|
||||
make install-golangci-lint
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
|
||||
build:
|
||||
needs: lint
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-crossbuild-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-crossbuild-
|
||||
|
||||
- name: Build
|
||||
run: make crossbuild
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
@@ -54,43 +95,26 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
run: make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
build:
|
||||
needs: test
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
||||
2
.github/workflows/sync-docs.yml
vendored
@@ -7,7 +7,7 @@ on:
|
||||
- 'docs/**'
|
||||
workflow_dispatch: {}
|
||||
env:
|
||||
PAGEFIND_VERSION: "1.0.3"
|
||||
PAGEFIND_VERSION: "1.0.4"
|
||||
HUGO_VERSION: "latest"
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout and to commit back image update
|
||||
|
||||
1
.gitignore
vendored
@@ -22,3 +22,4 @@ Gemfile.lock
|
||||
/_site
|
||||
_site
|
||||
*.tmp
|
||||
/docs/.jekyll-metadata
|
||||
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2023 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2024 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
52
Makefile
@@ -1,5 +1,7 @@
|
||||
PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
MAKE_CONCURRENCY ?= $(shell getconf _NPROCESSORS_ONLN)
|
||||
MAKE_PARALLEL := $(MAKE) -j $(MAKE_CONCURRENCY)
|
||||
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
|
||||
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
|
||||
@@ -15,6 +17,7 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TA
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
include app/*/Makefile
|
||||
include docs/Makefile
|
||||
include deployment/*/Makefile
|
||||
include dashboards/Makefile
|
||||
include snap/local/Makefile
|
||||
@@ -163,12 +166,14 @@ vmutils-windows-amd64: \
|
||||
vmrestore-windows-amd64 \
|
||||
vmctl-windows-amd64
|
||||
|
||||
crossbuild:
|
||||
$(MAKE_PARALLEL) victoria-metrics-crossbuild vmutils-crossbuild
|
||||
|
||||
victoria-metrics-crossbuild: \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-amd64 \
|
||||
victoria-metrics-linux-arm64 \
|
||||
victoria-metrics-linux-arm \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-ppc64le \
|
||||
victoria-metrics-darwin-amd64 \
|
||||
victoria-metrics-darwin-arm64 \
|
||||
@@ -180,7 +185,6 @@ vmutils-crossbuild: \
|
||||
vmutils-linux-amd64 \
|
||||
vmutils-linux-arm64 \
|
||||
vmutils-linux-arm \
|
||||
vmutils-linux-386 \
|
||||
vmutils-linux-ppc64le \
|
||||
vmutils-darwin-amd64 \
|
||||
vmutils-darwin-arm64 \
|
||||
@@ -190,14 +194,15 @@ vmutils-crossbuild: \
|
||||
|
||||
publish-release:
|
||||
rm -rf bin/*
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
git checkout $(TAG) && $(MAKE) release && LATEST_TAG=stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release && LATEST_TAG=cluster-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release && LATEST_TAG=enterprise-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release && LATEST_TAG=enterprise-cluster-stable $(MAKE) publish
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
release:
|
||||
$(MAKE_PARALLEL) \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-386 \
|
||||
@@ -256,16 +261,16 @@ release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod
|
||||
cd bin && rm -rf \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe
|
||||
|
||||
release-victoria-logs: \
|
||||
release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
release-victoria-logs:
|
||||
$(MAKE_PARALLEL) release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
|
||||
release-victoria-logs-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-logs-goos-goarch
|
||||
@@ -529,12 +534,3 @@ copy-docs:
|
||||
docs-sync:
|
||||
SRC=README.md DST=docs/README.md OLD_URL='' ORDER=0 TITLE=VictoriaMetrics $(MAKE) copy-docs
|
||||
SRC=README.md DST=docs/Single-server-VictoriaMetrics.md OLD_URL='/Single-server-VictoriaMetrics.html' TITLE=VictoriaMetrics ORDER=1 $(MAKE) copy-docs
|
||||
SRC=app/vmagent/README.md DST=docs/vmagent.md OLD_URL='/vmagent.html' ORDER=3 TITLE=vmagent $(MAKE) copy-docs
|
||||
SRC=app/vmalert/README.md DST=docs/vmalert.md OLD_URL='/vmalert.html' ORDER=4 TITLE=vmalert $(MAKE) copy-docs
|
||||
SRC=app/vmauth/README.md DST=docs/vmauth.md OLD_URL='/vmauth.html' ORDER=5 TITLE=vmauth $(MAKE) copy-docs
|
||||
SRC=app/vmbackup/README.md DST=docs/vmbackup.md OLD_URL='/vmbackup.html' ORDER=6 TITLE=vmbackup $(MAKE) copy-docs
|
||||
SRC=app/vmrestore/README.md DST=docs/vmrestore.md OLD_URL='/vmrestore.html' ORDER=7 TITLE=vmrestore $(MAKE) copy-docs
|
||||
SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
|
||||
SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
|
||||
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
|
||||
SRC=app/vmalert-tool/README.md DST=docs/vmalert-tool.md OLD_URL='' ORDER=12 TITLE=vmalert-tool $(MAKE) copy-docs
|
||||
|
||||
@@ -37,7 +37,6 @@ func main() {
|
||||
cgroup.SetGOGC(*gogc)
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
logger.Infof("starting VictoriaLogs at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
@@ -49,8 +48,10 @@ func main() {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started VictoriaLogs in %.3f seconds; see https://docs.victoriametrics.com/VictoriaLogs/", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
|
||||
startTime = time.Now()
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -48,7 +48,6 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if promscrape.IsDryRun() {
|
||||
*dryRun = true
|
||||
@@ -74,13 +73,16 @@ func main() {
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsert.Init()
|
||||
|
||||
startSelfScraper()
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started VictoriaMetrics in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
stopSelfScraper()
|
||||
|
||||
@@ -89,8 +91,8 @@ func main() {
|
||||
if err := httpserver.Stop(*httpListenAddr); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
vminsert.Stop()
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
vminsert.Stop()
|
||||
|
||||
vmstorage.Stop()
|
||||
vmselect.Stop()
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -54,15 +55,14 @@ var (
|
||||
)
|
||||
|
||||
type test struct {
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
ResultQuery Query `json:"result_query"`
|
||||
ResultQueryRange QueryRange `json:"result_query_range"`
|
||||
Issue string `json:"issue"`
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
ResultQuery Query `json:"result_query"`
|
||||
Issue string `json:"issue"`
|
||||
}
|
||||
|
||||
type Metric struct {
|
||||
@@ -80,42 +80,90 @@ type Series struct {
|
||||
Status string `json:"status"`
|
||||
Data []map[string]string `json:"data"`
|
||||
}
|
||||
|
||||
type Query struct {
|
||||
Status string `json:"status"`
|
||||
Data QueryData `json:"data"`
|
||||
}
|
||||
type QueryData struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []QueryDataResult `json:"result"`
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type QueryDataResult struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Value []interface{} `json:"value"`
|
||||
const rtVector, rtMatrix = "vector", "matrix"
|
||||
|
||||
func (q *Query) metrics() ([]Metric, error) {
|
||||
switch q.Data.ResultType {
|
||||
case rtVector:
|
||||
var r QueryInstant
|
||||
if err := json.Unmarshal(q.Data.Result, &r.Result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.metrics()
|
||||
case rtMatrix:
|
||||
var r QueryRange
|
||||
if err := json.Unmarshal(q.Data.Result, &r.Result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.metrics()
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown result type %q", q.Data.ResultType)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *QueryDataResult) UnmarshalJSON(b []byte) error {
|
||||
type plain QueryDataResult
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(r))
|
||||
type QueryInstant struct {
|
||||
Result []struct {
|
||||
Labels map[string]string `json:"metric"`
|
||||
TV [2]interface{} `json:"value"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
func (q QueryInstant) metrics() ([]Metric, error) {
|
||||
result := make([]Metric, len(q.Result))
|
||||
for i, res := range q.Result {
|
||||
f, err := strconv.ParseFloat(res.TV[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
|
||||
}
|
||||
var m Metric
|
||||
m.Metric = res.Labels
|
||||
m.Timestamps = append(m.Timestamps, int64(res.TV[0].(float64)))
|
||||
m.Values = append(m.Values, f)
|
||||
result[i] = m
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type QueryRange struct {
|
||||
Status string `json:"status"`
|
||||
Data QueryRangeData `json:"data"`
|
||||
}
|
||||
type QueryRangeData struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []QueryRangeDataResult `json:"result"`
|
||||
Result []struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Values [][]interface{} `json:"values"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
type QueryRangeDataResult struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Values [][]interface{} `json:"values"`
|
||||
func (q QueryRange) metrics() ([]Metric, error) {
|
||||
var result []Metric
|
||||
for i, res := range q.Result {
|
||||
var m Metric
|
||||
for _, tv := range res.Values {
|
||||
f, err := strconv.ParseFloat(tv[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, tv[1], err)
|
||||
}
|
||||
m.Values = append(m.Values, f)
|
||||
m.Timestamps = append(m.Timestamps, int64(tv[0].(float64)))
|
||||
}
|
||||
if len(m.Values) < 1 || len(m.Timestamps) < 1 {
|
||||
return nil, fmt.Errorf("metric %v contains no values", res)
|
||||
}
|
||||
m.Metric = q.Result[i].Metric
|
||||
result = append(result, m)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (r *QueryRangeDataResult) UnmarshalJSON(b []byte) error {
|
||||
type plain QueryRangeDataResult
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(r))
|
||||
func (q *Query) UnmarshalJSON(b []byte) error {
|
||||
type plain Query
|
||||
return json.Unmarshal(testutil.PopulateTimeTpl(b, insertionTime), (*plain)(q))
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
@@ -197,6 +245,9 @@ func TestWriteRead(t *testing.T) {
|
||||
func testWrite(t *testing.T) {
|
||||
t.Run("prometheus", func(t *testing.T) {
|
||||
for _, test := range readIn("prometheus", t, insertionTime) {
|
||||
if test.Data == nil {
|
||||
continue
|
||||
}
|
||||
s := newSuite(t)
|
||||
r := testutil.WriteRequest{}
|
||||
s.noError(json.Unmarshal([]byte(strings.Join(test.Data, "\n")), &r.Timeseries))
|
||||
@@ -272,17 +323,19 @@ func testRead(t *testing.T) {
|
||||
if err := checkSeriesResult(s, test.ResultSeries); err != nil {
|
||||
t.Fatalf("Series. %s fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
case strings.HasPrefix(q, "/api/v1/query_range"):
|
||||
queryResult := QueryRange{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryRangeResult(queryResult, test.ResultQueryRange); err != nil {
|
||||
t.Fatalf("Query Range. %s fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
case strings.HasPrefix(q, "/api/v1/query"):
|
||||
queryResult := Query{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryResult(queryResult, test.ResultQuery); err != nil {
|
||||
t.Fatalf("Query. %s fails with error: %s.%s", q, err, test.Issue)
|
||||
gotMetrics, err := queryResult.metrics()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse query response: %s", err)
|
||||
}
|
||||
expMetrics, err := test.ResultQuery.metrics()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse expected response: %s", err)
|
||||
}
|
||||
if err := checkMetricsResult(gotMetrics, expMetrics); err != nil {
|
||||
t.Fatalf("%q fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported read query %s", q)
|
||||
@@ -417,60 +470,6 @@ func removeIfFoundSeries(r map[string]string, contains []map[string]string) []ma
|
||||
return contains
|
||||
}
|
||||
|
||||
func checkQueryResult(got, want Query) error {
|
||||
if got.Status != want.Status {
|
||||
return fmt.Errorf("status mismatch %q - %q", want.Status, got.Status)
|
||||
}
|
||||
if got.Data.ResultType != want.Data.ResultType {
|
||||
return fmt.Errorf("result type mismatch %q - %q", want.Data.ResultType, got.Data.ResultType)
|
||||
}
|
||||
wantData := append([]QueryDataResult(nil), want.Data.Result...)
|
||||
for _, r := range got.Data.Result {
|
||||
wantData = removeIfFoundQueryData(r, wantData)
|
||||
}
|
||||
if len(wantData) > 0 {
|
||||
return fmt.Errorf("expected query result %+v not found in %+v", wantData, got.Data.Result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeIfFoundQueryData(r QueryDataResult, contains []QueryDataResult) []QueryDataResult {
|
||||
for i, item := range contains {
|
||||
if reflect.DeepEqual(r.Metric, item.Metric) && reflect.DeepEqual(r.Value[0], item.Value[0]) && reflect.DeepEqual(r.Value[1], item.Value[1]) {
|
||||
contains[i] = contains[len(contains)-1]
|
||||
return contains[:len(contains)-1]
|
||||
}
|
||||
}
|
||||
return contains
|
||||
}
|
||||
|
||||
func checkQueryRangeResult(got, want QueryRange) error {
|
||||
if got.Status != want.Status {
|
||||
return fmt.Errorf("status mismatch %q - %q", want.Status, got.Status)
|
||||
}
|
||||
if got.Data.ResultType != want.Data.ResultType {
|
||||
return fmt.Errorf("result type mismatch %q - %q", want.Data.ResultType, got.Data.ResultType)
|
||||
}
|
||||
wantData := append([]QueryRangeDataResult(nil), want.Data.Result...)
|
||||
for _, r := range got.Data.Result {
|
||||
wantData = removeIfFoundQueryRangeData(r, wantData)
|
||||
}
|
||||
if len(wantData) > 0 {
|
||||
return fmt.Errorf("expected query range result %+v not found in %+v", wantData, got.Data.Result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeIfFoundQueryRangeData(r QueryRangeDataResult, contains []QueryRangeDataResult) []QueryRangeDataResult {
|
||||
for i, item := range contains {
|
||||
if reflect.DeepEqual(r.Metric, item.Metric) && reflect.DeepEqual(r.Values, item.Values) {
|
||||
contains[i] = contains[len(contains)-1]
|
||||
return contains[:len(contains)-1]
|
||||
}
|
||||
}
|
||||
return contains
|
||||
}
|
||||
|
||||
type suite struct{ t *testing.T }
|
||||
|
||||
func newSuite(t *testing.T) *suite { return &suite{t: t} }
|
||||
|
||||
@@ -98,7 +98,7 @@ func addLabel(dst []prompb.Label, key, value string) []prompb.Label {
|
||||
dst = append(dst, prompb.Label{})
|
||||
}
|
||||
lb := &dst[len(dst)-1]
|
||||
lb.Name = bytesutil.ToUnsafeBytes(key)
|
||||
lb.Value = bytesutil.ToUnsafeBytes(value)
|
||||
lb.Name = key
|
||||
lb.Value = value
|
||||
return dst
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"not_nan_not_inf;item=y 3 {TIME_S-1m}",
|
||||
"not_nan_not_inf;item=y 1 {TIME_S-2m}"],
|
||||
"query": ["/api/v1/query_range?query=1/(not_nan_not_inf-1)!=inf!=nan&start={TIME_S-3m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"empty_label_match;foo=bar 2 {TIME_S-1m}",
|
||||
"empty_label_match;foo=baz 3 {TIME_S-1m}"],
|
||||
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S-1m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"max_lookback_set 4 {TIME_S-150s}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=max_lookback_set&start={TIME_S-150s}&end={TIME_S}&step=10s&max_lookback=1s"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"max_lookback_set"},"values":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"max_lookback_unset 4 {TIME_S-150s}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=max_lookback_unset&start={TIME_S-150s}&end={TIME_S}&step=10s"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"max_lookback_unset"},"values":[
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"not_nan_as_missing_data;item=y 3 {TIME_S-1m}"
|
||||
],
|
||||
"query": ["/api/v1/query_range?query=not_nan_as_missing_data>1&start={TIME_S-2m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
|
||||
12
app/victoria-metrics/testdata/prometheus/instant-matrix.json
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "instant query with look-behind window",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"foo\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}]}]"],
|
||||
"query": ["/api/v1/query?query=foo[5m]"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"foo"},"values":[["{TIME_S-60s}", "1"]]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
11
app/victoria-metrics/testdata/prometheus/instant-scalar.json
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"name": "instant scalar query",
|
||||
"query": ["/api/v1/query?query=42&time={TIME_S}"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"vector",
|
||||
"result":[{"metric":{},"value":["{TIME_S}", "42"]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
13
app/victoria-metrics/testdata/prometheus/issue-5553-too-big-lookback.json
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "too big look-behind window",
|
||||
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5553",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"foo\"},{\"name\":\"issue\",\"value\":\"5553\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}]}]"],
|
||||
"query": ["/api/v1/query?query=foo{issue=\"5553\"}[100y]"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[{"metric":{"__name__":"foo", "issue": "5553"},"values":[["{TIME_S-60s}", "1"]]}]
|
||||
}
|
||||
}
|
||||
}
|
||||
18
app/victoria-metrics/testdata/prometheus/query-range.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "query range",
|
||||
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5553",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"bar\"}],\"samples\":[{\"value\":1,\"timestamp\":\"{TIME_MS-60s}\"}, {\"value\":2,\"timestamp\":\"{TIME_MS-120s}\"}, {\"value\":1,\"timestamp\":\"{TIME_MS-180s}\"}]}]"],
|
||||
"query": ["/api/v1/query_range?query=bar&step=30s&start={TIME_MS-180s}"],
|
||||
"result_query": {
|
||||
"status": "success",
|
||||
"data":{
|
||||
"resultType":"matrix",
|
||||
"result":[
|
||||
{
|
||||
"metric":{"__name__":"bar"},
|
||||
"values":[["{TIME_S-180s}", "1"],["{TIME_S-150s}", "1"],["{TIME_S-120s}", "2"],["{TIME_S-90s}", "2"], ["{TIME_S-60s}", "1"], ["{TIME_S-30s}", "1"], ["{TIME_S}", "1"]]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -105,7 +105,7 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
if err != nil {
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s, stream fields: %s", n, err, cp.StreamFields)
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
@@ -65,7 +65,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package datadog
|
||||
package datadogv1
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
@@ -8,33 +8,32 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv1/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadog"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadog"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadog"}`)
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv1"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv1"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv1"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, func(series []datadog.Series) error {
|
||||
return stream.Parse(req.Body, ce, func(series []datadogv1.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmarshal.Label) error {
|
||||
func insertRows(at *auth.Token, series []datadogv1.Series, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -63,7 +62,7 @@ func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmar
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadog.SplitTag(tag)
|
||||
name, value := datadogutils.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
@@ -88,7 +87,9 @@ func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmar
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
102
app/vmagent/datadogv2/request_handler.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package datadogv2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadogv2/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadogv2"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadogv2"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadogv2"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v2/series request.
|
||||
//
|
||||
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ct := req.Header.Get("Content-Type")
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, ct, func(series []datadogv2.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, series []datadogv2.Series, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := 0
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range series {
|
||||
ss := &series[i]
|
||||
rowsTotal += len(ss.Points)
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: ss.Metric,
|
||||
})
|
||||
for _, rs := range ss.Resources {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: rs.Type,
|
||||
Value: rs.Name,
|
||||
})
|
||||
}
|
||||
if ss.SourceTypeName != "" {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "source_type_name",
|
||||
Value: ss.SourceTypeName,
|
||||
})
|
||||
}
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := datadogutils.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for _, pt := range ss.Points {
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Timestamp: pt.Timestamp * 1000,
|
||||
Value: pt.Value,
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
|
||||
@@ -20,10 +21,12 @@ var (
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return stream.Parse(r, insertRows)
|
||||
return stream.Parse(r, false, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -56,7 +59,9 @@ func insertRows(rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -36,9 +36,9 @@ var (
|
||||
// InsertHandlerForReader processes remote write for influx line protocol.
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, isGzipped bool) error {
|
||||
return stream.Parse(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
return insertRows(at, db, rows, nil)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -130,7 +130,9 @@ func insertRows(at *auth.Token, db string, rows []parser.Row, extraLabels []prom
|
||||
ctx.ctx.Labels = labels
|
||||
ctx.ctx.Samples = samples
|
||||
ctx.commonLabels = commonLabels
|
||||
remotewrite.Push(at, &ctx.ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -11,10 +11,9 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv1"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadogv2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
|
||||
@@ -42,12 +41,13 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -69,7 +69,8 @@ var (
|
||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
|
||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg")
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
||||
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag")
|
||||
@@ -96,7 +97,6 @@ func main() {
|
||||
remotewrite.InitSecretFlags()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if promscrape.IsDryRun() {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
@@ -125,7 +125,7 @@ func main() {
|
||||
common.StartUnmarshalWorkers()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
return influx.InsertHandlerForReader(nil, r, false)
|
||||
})
|
||||
}
|
||||
if len(*graphiteListenAddr) > 0 {
|
||||
@@ -140,15 +140,17 @@ func main() {
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(remotewrite.Push)
|
||||
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
|
||||
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
}
|
||||
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime = time.Now()
|
||||
if len(*httpListenAddr) > 0 {
|
||||
@@ -344,9 +346,20 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/v1/series":
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "/datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -409,7 +422,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return true
|
||||
case "/prometheus/config", "/config":
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey.Get(), "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeConfigRequests.Inc()
|
||||
@@ -418,7 +431,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/prometheus/api/v1/status/config", "/api/v1/status/config":
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#config
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey.Get(), "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeStatusConfigRequests.Inc()
|
||||
@@ -428,6 +441,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%q}}`, bb.B)
|
||||
return true
|
||||
case "/prometheus/-/reload", "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey.Get(), "reloadAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeConfigReloadRequests.Inc()
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
@@ -567,9 +583,19 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/v1/series":
|
||||
datadogWriteRequests.Inc()
|
||||
if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogWriteErrors.Inc()
|
||||
datadogv1WriteRequests.Inc()
|
||||
if err := datadogv1.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv1WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(202)
|
||||
fmt.Fprintf(w, `{"status":"ok"}`)
|
||||
return true
|
||||
case "datadog/api/v2/series":
|
||||
datadogv2WriteRequests.Inc()
|
||||
if err := datadogv2.InsertHandlerForHTTP(at, r); err != nil {
|
||||
datadogv2WriteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
@@ -627,8 +653,11 @@ var (
|
||||
|
||||
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
|
||||
|
||||
datadogWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogv1WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
datadogv1WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
|
||||
|
||||
datadogv2WriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
datadogv2WriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v2/series", protocol="datadog"}`)
|
||||
|
||||
datadogValidateRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/validate", protocol="datadog"}`)
|
||||
datadogCheckRunRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/check_run", protocol="datadog"}`)
|
||||
|
||||
@@ -84,6 +84,8 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompbmarshal
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -76,7 +76,9 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompbmarshal
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
|
||||
@@ -59,7 +59,9 @@ func insertRows(at *auth.Token, tss []prompbmarshal.TimeSeries, extraLabels []pr
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -56,7 +56,9 @@ func insertRows(rows []parser.Row) error {
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(nil, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -64,7 +64,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
||||
@@ -73,7 +73,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
@@ -48,8 +47,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
|
||||
for i := range ts.Labels {
|
||||
label := &ts.Labels[i]
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(label.Name),
|
||||
Value: bytesutil.ToUnsafeString(label.Value),
|
||||
Name: label.Name,
|
||||
Value: label.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
@@ -69,7 +68,9 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -58,8 +59,10 @@ var (
|
||||
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("remoteWrite.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
|
||||
awsUseSigv4 = flagutil.NewArrayBool("remoteWrite.aws.useSigv4", "Enables SigV4 request signing for the corresponding -remoteWrite.url. "+
|
||||
"It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled")
|
||||
@@ -234,10 +237,16 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
clientSecret := oauth2ClientSecret.GetOptionalArg(argIdx)
|
||||
clientSecretFile := oauth2ClientSecretFile.GetOptionalArg(argIdx)
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(argIdx)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
oauth2Cfg = &promauth.OAuth2Config{
|
||||
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
|
||||
}
|
||||
@@ -305,7 +314,7 @@ func (c *client) runWorker() {
|
||||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
@@ -314,11 +323,11 @@ func (c *client) runWorker() {
|
||||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -387,7 +396,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
// Otherwise it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.register(len(block), c.stopCh)
|
||||
retryDuration := time.Second
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(time.Minute)
|
||||
retryDuration := timeutil.AddJitterToDuration(time.Second)
|
||||
retriesCount := 0
|
||||
|
||||
again:
|
||||
@@ -397,8 +407,8 @@ again:
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
logger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
@@ -444,8 +454,8 @@ again:
|
||||
// Unexpected status code returned
|
||||
retriesCount++
|
||||
retryDuration *= 2
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
@@ -37,9 +39,9 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(pushBlock func(block []byte), isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.fq = fq
|
||||
ps.wr.isVMRemoteWrite = isVMRemoteWrite
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
@@ -57,10 +59,11 @@ func (ps *pendingSeries) MustStop() {
|
||||
ps.periodicFlusherWG.Wait()
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
|
||||
func (ps *pendingSeries) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
ps.mu.Lock()
|
||||
ps.wr.push(tss)
|
||||
ok := ps.wr.tryPush(tss)
|
||||
ps.mu.Unlock()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) periodicFlusher() {
|
||||
@@ -68,20 +71,23 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
ticker := time.NewTicker(*flushInterval)
|
||||
d := timeutil.AddJitterToDuration(*flushInterval)
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
mustStop := false
|
||||
for !mustStop {
|
||||
for {
|
||||
select {
|
||||
case <-ps.stopCh:
|
||||
mustStop = true
|
||||
ps.mu.Lock()
|
||||
ps.wr.mustFlushOnStop()
|
||||
ps.mu.Unlock()
|
||||
return
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
ps.mu.Lock()
|
||||
ps.wr.flush()
|
||||
_ = ps.wr.tryFlush()
|
||||
ps.mu.Unlock()
|
||||
}
|
||||
}
|
||||
@@ -90,29 +96,30 @@ type writeRequest struct {
|
||||
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
|
||||
lastFlushTime uint64
|
||||
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
// The queue to send blocks to.
|
||||
fq *persistentqueue.FastQueue
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite bool
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
// How many significant figures must be left before sending the writeRequest to fq.
|
||||
significantFigures int
|
||||
|
||||
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
|
||||
// How many decimal digits after point must be left before sending the writeRequest to fq.
|
||||
roundDigits int
|
||||
|
||||
wr prompbmarshal.WriteRequest
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
labels []prompbmarshal.Label
|
||||
samples []prompbmarshal.Sample
|
||||
buf []byte
|
||||
|
||||
// buf holds labels data
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset lastFlushTime, pushBlock, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
// Do not reset lastFlushTime, fq, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
@@ -130,23 +137,40 @@ func (wr *writeRequest) reset() {
|
||||
wr.buf = wr.buf[:0]
|
||||
}
|
||||
|
||||
func (wr *writeRequest) flush() {
|
||||
// mustFlushOnStop force pushes wr data into wr.fq
|
||||
//
|
||||
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
|
||||
func (wr *writeRequest) mustFlushOnStop() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock, wr.isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(&wr.wr, wr.mustWriteBlock, wr.isVMRemoteWrite) {
|
||||
logger.Panicf("BUG: final flush must always return true")
|
||||
}
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
func (wr *writeRequest) adjustSampleValues() {
|
||||
samples := wr.samples
|
||||
if n := wr.significantFigures; n > 0 {
|
||||
func (wr *writeRequest) mustWriteBlock(block []byte) bool {
|
||||
wr.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryFlush() bool {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
if !tryPushWriteRequest(&wr.wr, wr.fq.TryWriteBlock, wr.isVMRemoteWrite) {
|
||||
return false
|
||||
}
|
||||
wr.reset()
|
||||
return true
|
||||
}
|
||||
|
||||
func adjustSampleValues(samples []prompbmarshal.Sample, significantFigures, roundDigits int) {
|
||||
if n := significantFigures; n > 0 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
|
||||
}
|
||||
}
|
||||
if n := wr.roundDigits; n < 100 {
|
||||
if n := roundDigits; n < 100 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
|
||||
@@ -154,21 +178,27 @@ func (wr *writeRequest) adjustSampleValues() {
|
||||
}
|
||||
}
|
||||
|
||||
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
|
||||
func (wr *writeRequest) tryPush(src []prompbmarshal.TimeSeries) bool {
|
||||
tssDst := wr.tss
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
for i := range src {
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
|
||||
if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
|
||||
wr.tss = tssDst
|
||||
wr.flush()
|
||||
if !wr.tryFlush() {
|
||||
return false
|
||||
}
|
||||
tssDst = wr.tss
|
||||
}
|
||||
tsSrc := &src[i]
|
||||
adjustSampleValues(tsSrc.Samples, wr.significantFigures, wr.roundDigits)
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], tsSrc)
|
||||
}
|
||||
|
||||
wr.tss = tssDst
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
@@ -196,31 +226,45 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
wr.buf = buf
|
||||
}
|
||||
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte), isVMRemoteWrite bool) {
|
||||
// marshalConcurrency limits the maximum number of concurrent workers, which marshal and compress WriteRequest.
|
||||
var marshalConcurrencyCh = make(chan struct{}, cgroup.AvailableCPUs())
|
||||
|
||||
func tryPushWriteRequest(wr *prompbmarshal.WriteRequest, tryPushBlock func(block []byte) bool, isVMRemoteWrite bool) bool {
|
||||
if len(wr.Timeseries) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
marshalConcurrencyCh <- struct{}{}
|
||||
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
bb.B = wr.MarshalProtobuf(bb.B[:0])
|
||||
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
|
||||
zb := snappyBufPool.Get()
|
||||
zb := compressBufPool.Get()
|
||||
if isVMRemoteWrite {
|
||||
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, *vmProtoCompressLevel)
|
||||
} else {
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
}
|
||||
writeRequestBufPool.Put(bb)
|
||||
|
||||
<-marshalConcurrencyCh
|
||||
|
||||
if len(zb.B) <= persistentqueue.MaxBlockSize {
|
||||
pushBlock(zb.B)
|
||||
blockSizeRows.Update(float64(len(wr.Timeseries)))
|
||||
blockSizeBytes.Update(float64(len(zb.B)))
|
||||
snappyBufPool.Put(zb)
|
||||
return
|
||||
zbLen := len(zb.B)
|
||||
ok := tryPushBlock(zb.B)
|
||||
compressBufPool.Put(zb)
|
||||
if ok {
|
||||
blockSizeRows.Update(float64(len(wr.Timeseries)))
|
||||
blockSizeBytes.Update(float64(zbLen))
|
||||
}
|
||||
return ok
|
||||
}
|
||||
snappyBufPool.Put(zb)
|
||||
compressBufPool.Put(zb)
|
||||
} else {
|
||||
writeRequestBufPool.Put(bb)
|
||||
|
||||
<-marshalConcurrencyCh
|
||||
}
|
||||
|
||||
// Too big block. Recursively split it into smaller parts if possible.
|
||||
@@ -229,23 +273,36 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
samples := wr.Timeseries[0].Samples
|
||||
if len(samples) == 1 {
|
||||
logger.Warnf("dropping a sample for metric with too long labels exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
|
||||
return
|
||||
return true
|
||||
}
|
||||
n := len(samples) / 2
|
||||
wr.Timeseries[0].Samples = samples[:n]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
wr.Timeseries[0].Samples = samples[n:]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return
|
||||
return true
|
||||
}
|
||||
timeseries := wr.Timeseries
|
||||
n := len(timeseries) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries[n:]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -253,5 +310,7 @@ var (
|
||||
blockSizeRows = metrics.NewHistogram(`vmagent_remotewrite_block_size_rows`)
|
||||
)
|
||||
|
||||
var writeRequestBufPool bytesutil.ByteBufferPool
|
||||
var snappyBufPool bytesutil.ByteBufferPool
|
||||
var (
|
||||
writeRequestBufPool bytesutil.ByteBufferPool
|
||||
compressBufPool bytesutil.ByteBufferPool
|
||||
)
|
||||
|
||||
@@ -26,13 +26,16 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
t.Helper()
|
||||
wr := newTestWriteRequest(rowsCount, 20)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
pushBlock := func(block []byte) bool {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
return true
|
||||
}
|
||||
if !tryPushWriteRequest(wr, pushBlock, isVMRemoteWrite) {
|
||||
t.Fatalf("cannot push data to to remote storage")
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
|
||||
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
|
||||
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
|
||||
@@ -40,7 +43,7 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
}
|
||||
|
||||
// Check Prometheus remote write
|
||||
f(false, expectedBlockLenProm, 0)
|
||||
f(false, expectedBlockLenProm, 3)
|
||||
|
||||
// Check VictoriaMetrics remote write
|
||||
f(true, expectedBlockLenVM, 15)
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/klauspost/compress/s2"
|
||||
)
|
||||
@@ -22,7 +21,7 @@ func benchmarkCompressWriteRequest(b *testing.B, compressFunc func(dst, src []by
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
b.Run(fmt.Sprintf("rows_%d", rowsCount), func(b *testing.B) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
data := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(rowsCount))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
|
||||
@@ -3,6 +3,7 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
@@ -92,6 +93,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
rctx.reset()
|
||||
tssDst := tss[:0]
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
@@ -120,6 +122,7 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
||||
if len(extraLabels) == 0 {
|
||||
return
|
||||
}
|
||||
rctx.reset()
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
@@ -139,6 +142,34 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) tenantToLabels(tss []prompbmarshal.TimeSeries, accountID, projectID uint32) {
|
||||
rctx.reset()
|
||||
accountIDStr := strconv.FormatUint(uint64(accountID), 10)
|
||||
projectIDStr := strconv.FormatUint(uint64(projectID), 10)
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labelsLen := len(labels)
|
||||
for _, label := range ts.Labels {
|
||||
labelName := label.Name
|
||||
if labelName == "vm_account_id" || labelName == "vm_project_id" {
|
||||
continue
|
||||
}
|
||||
labels = append(labels, label)
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_account_id",
|
||||
Value: accountIDStr,
|
||||
})
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_project_id",
|
||||
Value: projectIDStr,
|
||||
})
|
||||
ts.Labels = labels[labelsLen:]
|
||||
}
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
type relabelCtx struct {
|
||||
// pool for labels, which are used during the relabeling.
|
||||
labels []prompbmarshal.Label
|
||||
@@ -160,7 +191,7 @@ func getRelabelCtx() *relabelCtx {
|
||||
}
|
||||
|
||||
func putRelabelCtx(rctx *relabelCtx) {
|
||||
rctx.labels = rctx.labels[:0]
|
||||
rctx.reset()
|
||||
relabelCtxPool.Put(rctx)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -10,6 +11,8 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
@@ -34,18 +37,22 @@ var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
|
||||
"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. "+
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set. "+
|
||||
"See also -remoteWrite.multitenantURL")
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set")
|
||||
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. "+
|
||||
"This flag is deprecated in favor of -enableMultitenantHandlers . See https://docs.victoriametrics.com/vmagent.html#multitenancy")
|
||||
enableMultitenantHandlers = flag.Bool("enableMultitenantHandlers", false, "Whether to process incoming data via multitenant insert handlers according to "+
|
||||
"https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format . By default incoming data is processed via single-node insert handlers "+
|
||||
"according to https://docs.victoriametrics.com/#how-to-import-time-series-data ."+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details")
|
||||
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
|
||||
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
|
||||
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
@@ -84,6 +91,11 @@ var (
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before being aggregated. "+
|
||||
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
|
||||
disableOnDiskQueue = flag.Bool("remoteWrite.disableOnDiskQueue", false, "Whether to disable storing pending data to -remoteWrite.tmpDataPath "+
|
||||
"when the configured remote storage systems cannot keep up with the data ingestion rate. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence ."+
|
||||
"See also -remoteWrite.dropSamplesOnOverload")
|
||||
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
||||
"cannot be pushed into the configured remote storage systems in a timely manner. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -96,11 +108,19 @@ var (
|
||||
|
||||
// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
|
||||
defaultAuthToken = &auth.Token{}
|
||||
|
||||
// ErrQueueFullHTTPRetry must be returned when TryPush() returns false.
|
||||
ErrQueueFullHTTPRetry = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("remote storage systems cannot keep up with the data ingestion rate; retry the request later " +
|
||||
"or remove -remoteWrite.disableOnDiskQueue from vmagent command-line flags, so it could save pending data to -remoteWrite.tmpDataPath; " +
|
||||
"see https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence"),
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
}
|
||||
)
|
||||
|
||||
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
|
||||
// MultitenancyEnabled returns true if -enableMultitenantHandlers or -remoteWrite.multitenantURL is specified.
|
||||
func MultitenancyEnabled() bool {
|
||||
return len(*remoteWriteMultitenantURLs) > 0
|
||||
return *enableMultitenantHandlers || len(*remoteWriteMultitenantURLs) > 0
|
||||
}
|
||||
|
||||
// Contains the current relabelConfigs.
|
||||
@@ -183,6 +203,7 @@ func Init() {
|
||||
if len(*remoteWriteURLs) > 0 {
|
||||
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
|
||||
}
|
||||
dropDanglingQueues()
|
||||
|
||||
// Start config reloader.
|
||||
configReloaderWG.Add(1)
|
||||
@@ -200,6 +221,42 @@ func Init() {
|
||||
}()
|
||||
}
|
||||
|
||||
func dropDanglingQueues() {
|
||||
if *keepDanglingQueues {
|
||||
return
|
||||
}
|
||||
if len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Do not drop dangling queues for *remoteWriteMultitenantURLs, since it is impossible to determine
|
||||
// unused queues for multitenant urls - they are created on demand when new sample for the given
|
||||
// tenant is pushed to remote storage.
|
||||
return
|
||||
}
|
||||
// Remove dangling persistent queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
//
|
||||
existingQueues := make(map[string]struct{}, len(rwctxsDefault))
|
||||
for _, rwctx := range rwctxsDefault {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxsDefault))
|
||||
}
|
||||
}
|
||||
|
||||
func reloadRelabelConfigs() {
|
||||
relabelConfigReloads.Inc()
|
||||
logger.Infof("reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
|
||||
@@ -219,7 +276,7 @@ func reloadRelabelConfigs() {
|
||||
var (
|
||||
relabelConfigReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
|
||||
relabelConfigReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
|
||||
relabelConfigSuccess = metrics.NewCounter(`vmagent_relabel_config_last_reload_successful`)
|
||||
relabelConfigSuccess = metrics.NewGauge(`vmagent_relabel_config_last_reload_successful`, nil)
|
||||
relabelConfigTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
@@ -273,33 +330,6 @@ func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
|
||||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
}
|
||||
|
||||
if !*keepDanglingQueues {
|
||||
// Remove dangling queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
existingQueues := make(map[string]struct{}, len(rwctxs))
|
||||
for _, rwctx := range rwctxs {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxs))
|
||||
}
|
||||
}
|
||||
|
||||
return rwctxs
|
||||
}
|
||||
|
||||
@@ -308,7 +338,7 @@ var configReloaderWG sync.WaitGroup
|
||||
|
||||
// Stop stops remotewrite.
|
||||
//
|
||||
// It is expected that nobody calls Push during and after the call to this func.
|
||||
// It is expected that nobody calls TryPush during and after the call to this func.
|
||||
func Stop() {
|
||||
close(configReloaderStopCh)
|
||||
configReloaderWG.Wait()
|
||||
@@ -318,7 +348,7 @@ func Stop() {
|
||||
}
|
||||
rwctxsDefault = nil
|
||||
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call Push during the Stop call.
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call TryPush during the Stop call.
|
||||
for _, rwctxs := range rwctxsMap {
|
||||
for _, rwctx := range rwctxs {
|
||||
rwctx.MustStop()
|
||||
@@ -334,24 +364,47 @@ func Stop() {
|
||||
}
|
||||
}
|
||||
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured `-remoteWrite.url`.
|
||||
// If at isn't nil, the data is pushed to the configured `-remoteWrite.multitenantURL`.
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// Note that wr may be modified by Push because of relabeling and rounding.
|
||||
func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
|
||||
// PushDropSamplesOnFailure can modify wr contents.
|
||||
func PushDropSamplesOnFailure(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
_ = tryPush(at, wr, true)
|
||||
}
|
||||
|
||||
// TryPush tries sending wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// TryPush can modify wr contents, so the caller must re-initialize wr before calling TryPush() after unsuccessful attempt.
|
||||
// TryPush may send partial data from wr on unsuccessful attempt, so repeated call for the same wr may send the data multiple times.
|
||||
//
|
||||
// The caller must return ErrQueueFullHTTPRetry to the client, which sends wr, if TryPush returns false.
|
||||
func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
|
||||
return tryPush(at, wr, *dropSamplesOnOverload)
|
||||
}
|
||||
|
||||
func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
|
||||
tss := wr.Timeseries
|
||||
|
||||
if at == nil && MultitenancyEnabled() {
|
||||
// Write data to default tenant if at isn't set when multitenancy is enabled.
|
||||
at = defaultAuthToken
|
||||
}
|
||||
|
||||
var tenantRctx *relabelCtx
|
||||
var rwctxs []*remoteWriteCtx
|
||||
if at == nil {
|
||||
rwctxs = rwctxsDefault
|
||||
} else if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
// Convert at to (vm_account_id, vm_project_id) labels.
|
||||
tenantRctx = getRelabelCtx()
|
||||
defer putRelabelCtx(tenantRctx)
|
||||
rwctxs = rwctxsDefault
|
||||
} else {
|
||||
if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
logger.Panicf("BUG: -remoteWrite.multitenantURL command-line flag must be set when __tenant_id__=%q label is set", at)
|
||||
}
|
||||
rwctxsMapLock.Lock()
|
||||
tenantID := tenantmetrics.TenantID{
|
||||
AccountID: at.AccountID,
|
||||
@@ -365,18 +418,37 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
rwctxsMapLock.Unlock()
|
||||
}
|
||||
|
||||
rowsCount := getRowsCount(tss)
|
||||
|
||||
if *disableOnDiskQueue {
|
||||
// Quick check whether writes to configured remote storage systems are blocked.
|
||||
// This allows saving CPU time spent on relabeling and block compression
|
||||
// if some of remote storage systems cannot keep up with the data ingestion rate.
|
||||
for _, rwctx := range rwctxs {
|
||||
if rwctx.fq.IsWriteBlocked() {
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
// Just drop samples
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var rctx *relabelCtx
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcsGlobal := rcs.global
|
||||
if pcsGlobal.Len() > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
defer putRelabelCtx(rctx)
|
||||
}
|
||||
tss := wr.Timeseries
|
||||
rowsCount := getRowsCount(tss)
|
||||
globalRowsPushedBeforeRelabel.Add(rowsCount)
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
|
||||
for len(tss) > 0 {
|
||||
// Process big tss in smaller blocks in order to reduce the maximum memory usage
|
||||
samplesCount := 0
|
||||
@@ -384,7 +456,7 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
i := 0
|
||||
for i < len(tss) {
|
||||
samplesCount += len(tss[i].Samples)
|
||||
labelsCount += len(tss[i].Labels)
|
||||
labelsCount += len(tss[i].Samples) * len(tss[i].Labels)
|
||||
i++
|
||||
if samplesCount >= maxSamplesPerBlock || labelsCount >= maxLabelsPerBlock {
|
||||
break
|
||||
@@ -397,6 +469,9 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
} else {
|
||||
tss = nil
|
||||
}
|
||||
if tenantRctx != nil {
|
||||
tenantRctx.tenantToLabels(tssBlock, at.AccountID, at.ProjectID)
|
||||
}
|
||||
if rctx != nil {
|
||||
rowsCountBeforeRelabel := getRowsCount(tssBlock)
|
||||
tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal)
|
||||
@@ -405,25 +480,35 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
}
|
||||
sortLabelsIfNeeded(tssBlock)
|
||||
tssBlock = limitSeriesCardinality(tssBlock)
|
||||
pushBlockToRemoteStorages(rwctxs, tssBlock)
|
||||
if rctx != nil {
|
||||
rctx.reset()
|
||||
if !tryPushBlockToRemoteStorages(rwctxs, tssBlock) {
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushBlockToRemoteStorages must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
if rctx != nil {
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) {
|
||||
var (
|
||||
samplesDropped = metrics.NewCounter(`vmagent_remotewrite_samples_dropped_total`)
|
||||
pushFailures = metrics.NewCounter(`vmagent_remotewrite_push_failures_total`)
|
||||
)
|
||||
|
||||
func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) bool {
|
||||
if len(tssBlock) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
if len(rwctxs) == 1 {
|
||||
// Fast path - just push data to the configured single remote storage
|
||||
rwctxs[0].Push(tssBlock)
|
||||
return
|
||||
return rwctxs[0].TryPush(tssBlock)
|
||||
}
|
||||
|
||||
// We need to push tssBlock to multiple remote storages.
|
||||
@@ -452,6 +537,7 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for i, rwctx := range rwctxs {
|
||||
tssShard := tssByURL[i]
|
||||
if len(tssShard) == 0 {
|
||||
@@ -459,11 +545,13 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
||||
}
|
||||
go func(rwctx *remoteWriteCtx, tss []prompbmarshal.TimeSeries) {
|
||||
defer wg.Done()
|
||||
rwctx.Push(tss)
|
||||
if !rwctx.TryPush(tss) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
}(rwctx, tssShard)
|
||||
}
|
||||
wg.Wait()
|
||||
return
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
}
|
||||
|
||||
// Replicate data among rwctxs.
|
||||
@@ -471,13 +559,17 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for _, rwctx := range rwctxs {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
rwctx.Push(tssBlock)
|
||||
if !rwctx.TryPush(tssBlock) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
}
|
||||
|
||||
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
|
||||
@@ -597,13 +689,19 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
|
||||
maxPendingBytes = persistentqueue.DefaultChunkFileSize
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes)
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, *disableOnDiskQueue)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queue_blocked{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
if fq.IsWriteBlocked() {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
@@ -625,7 +723,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
}
|
||||
pss := make([]*pendingSeries, pssLen)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, c.useVMProto, sf, rd)
|
||||
pss[i] = newPendingSeries(fq, c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
@@ -642,7 +740,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
||||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(argIdx)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggr.config=%q: %s", sasFile, err)
|
||||
}
|
||||
@@ -678,7 +776,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.rowsDroppedByRelabel = nil
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
func (rwctx *remoteWriteCtx) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
@@ -716,7 +814,9 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
}
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
rwctx.pushInternal(tss)
|
||||
|
||||
// Try pushing the data to remote storage
|
||||
ok := rwctx.tryPushInternal(tss)
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
if rctx != nil {
|
||||
@@ -724,6 +824,8 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
var matchIdxsPool bytesutil.ByteBufferPool
|
||||
@@ -743,7 +845,21 @@ func dropAggregatedSeries(src []prompbmarshal.TimeSeries, matchIdxs []byte, drop
|
||||
return dst
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompbmarshal.TimeSeries) {
|
||||
if rwctx.tryPushInternal(tss) {
|
||||
return
|
||||
}
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushInternal must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if *dropSamplesOnOverload {
|
||||
rowsCount := getRowsCount(tss)
|
||||
samplesDropped.Add(rowsCount)
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) tryPushInternal(tss []prompbmarshal.TimeSeries) bool {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
if len(labelsGlobal) > 0 {
|
||||
@@ -757,13 +873,16 @@ func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
|
||||
ok := pss[idx].TryPush(tss)
|
||||
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
@@ -776,7 +895,7 @@ func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", sasFile)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, sasFile)).Inc()
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(rwctx.idx)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
if err != nil {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, sasFile)).Inc()
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(0)
|
||||
|
||||
@@ -76,7 +76,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
||||
@@ -1,246 +1,3 @@
|
||||
See vmalert-tool docs [here](https://docs.victoriametrics.com/vmalert-tool.html).
|
||||
|
||||
# vmalert-tool
|
||||
|
||||
VMAlert command-line tool
|
||||
|
||||
## Unit testing for rules
|
||||
|
||||
You can use `vmalert-tool` to run unit tests for alerting and recording rules.
|
||||
It will perform the following actions:
|
||||
* sets up an isolated VictoriaMetrics instance;
|
||||
* simulates the periodic ingestion of time series;
|
||||
* queries the ingested data for recording and alerting rules evaluation like [vmalert](https://docs.victoriametrics.com/vmalert.html);
|
||||
* checks whether the firing alerts or resulting recording rules match the expected results.
|
||||
|
||||
See how to run vmalert-tool for unit test below:
|
||||
|
||||
```
|
||||
# Run vmalert-tool with one or multiple test files via --files cmd-line flag
|
||||
./vmalert-tool unittest --files test1.yaml --files test2.yaml
|
||||
```
|
||||
|
||||
vmalert-tool unittest is compatible with [Prometheus config format for tests](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-file-format)
|
||||
except `promql_expr_test` field. Use `metricsql_expr_test` field name instead. The name is different because vmalert-tool
|
||||
validates and executes [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) expressions,
|
||||
which aren't always backward compatible with [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
|
||||
|
||||
### Test file format
|
||||
|
||||
The configuration format for files specified in `--files` cmd-line flag is the following:
|
||||
|
||||
```yaml
|
||||
# Path to the files or http url containing [rule groups](https://docs.victoriametrics.com/vmalert.html#groups) configuration.
|
||||
# Enterprise version of vmalert-tool supports S3 and GCS paths to rules.
|
||||
rule_files:
|
||||
[ - <string> ]
|
||||
|
||||
# The evaluation interval for rules specified in `rule_files`
|
||||
[ evaluation_interval: <duration> | default = 1m ]
|
||||
|
||||
# Groups listed below will be evaluated by order.
|
||||
# Not All the groups need not be mentioned, if not, they will be evaluated by define order in rule_files.
|
||||
group_eval_order:
|
||||
[ - <string> ]
|
||||
|
||||
# The list of unit test files to be checked during evaluation.
|
||||
tests:
|
||||
[ - <test_group> ]
|
||||
```
|
||||
|
||||
#### `<test_group>`
|
||||
|
||||
```yaml
|
||||
# Interval between samples for input series
|
||||
interval: <duration>
|
||||
# Time series to persist into the database according to configured <interval> before running tests.
|
||||
input_series:
|
||||
[ - <series> ]
|
||||
|
||||
# Name of the test group, optional
|
||||
[ name: <string> ]
|
||||
|
||||
# Unit tests for alerting rules
|
||||
alert_rule_test:
|
||||
[ - <alert_test_case> ]
|
||||
|
||||
# Unit tests for Metricsql expressions.
|
||||
metricsql_expr_test:
|
||||
[ - <metricsql_expr_test> ]
|
||||
|
||||
# External labels accessible for templating.
|
||||
external_labels:
|
||||
[ <labelname>: <string> ... ]
|
||||
|
||||
```
|
||||
|
||||
#### `<series>`
|
||||
|
||||
```yaml
|
||||
# series in the following format '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
series: <string>
|
||||
|
||||
# values support several special equations:
|
||||
# 'a+bxc' becomes 'a a+b a+(2*b) a+(3*b) … a+(c*b)'
|
||||
# Read this as series starts at a, then c further samples incrementing by b.
|
||||
# 'a-bxc' becomes 'a a-b a-(2*b) a-(3*b) … a-(c*b)'
|
||||
# Read this as series starts at a, then c further samples decrementing by b (or incrementing by negative b).
|
||||
# '_' represents a missing sample from scrape
|
||||
# 'stale' indicates a stale sample
|
||||
# Examples:
|
||||
# 1. '-2+4x3' becomes '-2 2 6 10' - series starts at -2, then 3 further samples incrementing by 4.
|
||||
# 2. ' 1-2x4' becomes '1 -1 -3 -5 -7' - series starts at 1, then 4 further samples decrementing by 2.
|
||||
# 3. ' 1x4' becomes '1 1 1 1 1' - shorthand for '1+0x4', series starts at 1, then 4 further samples incrementing by 0.
|
||||
# 4. ' 1 _x3 stale' becomes '1 _ _ _ stale' - the missing sample cannot increment, so 3 missing samples are produced by the '_x3' expression.
|
||||
values: <string>
|
||||
```
|
||||
|
||||
#### `<alert_test_case>`
|
||||
|
||||
vmalert by default adds `alertgroup` and `alertname` to the generated alerts and time series.
|
||||
So you will need to specify both `groupname` and `alertname` under a single `<alert_test_case>`,
|
||||
but no need to add them under `exp_alerts`.
|
||||
You can also pass `--disableAlertgroupLabel` to skip `alertgroup` check.
|
||||
|
||||
```yaml
|
||||
# The time elapsed from time=0s when this alerting rule should be checked.
|
||||
# Means this rule should be firing at this point, or shouldn't be firing if 'exp_alerts' is empty.
|
||||
eval_time: <duration>
|
||||
|
||||
# Name of the group name to be tested.
|
||||
groupname: <string>
|
||||
|
||||
# Name of the alert to be tested.
|
||||
alertname: <string>
|
||||
|
||||
# List of the expected alerts that are firing under the given alertname at
|
||||
# the given evaluation time. If you want to test if an alerting rule should
|
||||
# not be firing, then you can mention only the fields above and leave 'exp_alerts' empty.
|
||||
exp_alerts:
|
||||
[ - <alert> ]
|
||||
```
|
||||
|
||||
#### `<alert>`
|
||||
|
||||
```yaml
|
||||
# These are the expanded labels and annotations of the expected alert.
|
||||
# Note: labels also include the labels of the sample associated with the alert
|
||||
exp_labels:
|
||||
[ <labelname>: <string> ]
|
||||
exp_annotations:
|
||||
[ <labelname>: <string> ]
|
||||
```
|
||||
|
||||
#### `<metricsql_expr_test>`
|
||||
|
||||
```yaml
|
||||
# Expression to evaluate
|
||||
expr: <string>
|
||||
|
||||
# The time elapsed from time=0s when this expression be evaluated.
|
||||
eval_time: <duration>
|
||||
|
||||
# Expected samples at the given evaluation time.
|
||||
exp_samples:
|
||||
[ - <sample> ]
|
||||
```
|
||||
|
||||
#### `<sample>`
|
||||
|
||||
```yaml
|
||||
# Labels of the sample in usual series notation '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
labels: <string>
|
||||
|
||||
# The expected value of the Metricsql expression.
|
||||
value: <number>
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
This is an example input file for unit testing which will pass.
|
||||
`test.yaml` is the test file which follows the syntax above and `alerts.yaml` contains the alerting rules.
|
||||
|
||||
With `rules.yaml` in the same directory, run `./vmalert-tool unittest --files=./unittest/testdata/test.yaml`.
|
||||
|
||||
#### `test.yaml`
|
||||
|
||||
```yaml
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
evaluation_interval: 1m
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'up{job="prometheus", instance="localhost:9090"}'
|
||||
values: "0+0x1440"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: suquery_interval_test
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- labels: '{__name__="suquery_interval_test", datacenter="dc-123", instance="localhost:9090", job="prometheus"}'
|
||||
value: 1
|
||||
|
||||
alert_rule_test:
|
||||
- eval_time: 2h
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
job: prometheus
|
||||
severity: page
|
||||
instance: localhost:9090
|
||||
datacenter: dc-123
|
||||
exp_annotations:
|
||||
summary: "Instance localhost:9090 down"
|
||||
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: AlwaysFiring
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
datacenter: dc-123
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts: []
|
||||
|
||||
external_labels:
|
||||
datacenter: dc-123
|
||||
```
|
||||
|
||||
#### `alerts.yaml`
|
||||
|
||||
```yaml
|
||||
# This is the rules file.
|
||||
|
||||
groups:
|
||||
- name: group1
|
||||
rules:
|
||||
- alert: InstanceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||
- alert: AlwaysFiring
|
||||
expr: 1
|
||||
|
||||
- name: group2
|
||||
rules:
|
||||
- record: job:test:count_over_time1m
|
||||
expr: sum without(instance) (count_over_time(test[1m]))
|
||||
- record: suquery_interval_test
|
||||
expr: count_over_time(up[5m:])
|
||||
```
|
||||
vmalert-tool docs can be edited at [docs/vmalert-tool.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmalert-tool.md).
|
||||
|
||||
@@ -21,6 +21,11 @@ tests:
|
||||
groupname: group2
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts: []
|
||||
- eval_time: 150s
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts:
|
||||
- {}
|
||||
- eval_time: 6m
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
|
||||
@@ -336,7 +336,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
if disableAlertgroupLabel {
|
||||
g.Name = ""
|
||||
}
|
||||
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name]; !ok {
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name]; !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := gotAlertsMap[g.Name]; !ok {
|
||||
@@ -347,7 +347,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
if !isAlertRule {
|
||||
continue
|
||||
}
|
||||
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name][ar.Name]; ok {
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name][ar.Name]; ok {
|
||||
for _, got := range ar.GetAlerts() {
|
||||
if got.State != notifier.StateFiring {
|
||||
continue
|
||||
|
||||
@@ -68,6 +68,7 @@ publish-vmalert:
|
||||
|
||||
test-vmalert:
|
||||
go test -v -race -cover ./app/vmalert -loggerLevel=ERROR
|
||||
go test -v -race -cover ./app/vmalert/rule
|
||||
go test -v -race -cover ./app/vmalert/templates
|
||||
go test -v -race -cover ./app/vmalert/datasource
|
||||
go test -v -race -cover ./app/vmalert/notifier
|
||||
|
||||
@@ -22,6 +22,7 @@ groups:
|
||||
{{ . | first | value }}
|
||||
{{ end }}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
link: http://localhost:3000/d/wNf0q_kZk?viewPanel=51&from={{($activeAt.Add (parseDurationTime "1h")).UnixMilli}}&to={{($activeAt.Add (parseDurationTime "-1h")).UnixMilli}}
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
update_entries_limit: -1
|
||||
expr: sum by(job)
|
||||
|
||||
@@ -37,11 +37,13 @@ var (
|
||||
tlsCAFile = flag.String("datasource.tlsCAFile", "", `Optional path to TLS CA file to use for verifying connections to -datasource.url. By default, system CA is used`)
|
||||
tlsServerName = flag.String("datasource.tlsServerName", "", `Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used`)
|
||||
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url. ")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url.")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url. ")
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url.")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
oauth2ClientID = flag.String("datasource.oauth2.clientID", "", "Optional OAuth2 clientID to use for -datasource.url")
|
||||
oauth2ClientSecret = flag.String("datasource.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -datasource.url")
|
||||
oauth2ClientSecretFile = flag.String("datasource.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -datasource.url")
|
||||
oauth2EndpointParams = flag.String("datasource.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -datasource.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Will be deprecated soon, please adjust "-search.latencyOffset" at datasource side `+
|
||||
`or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. `+
|
||||
@@ -108,10 +110,14 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
extraParams.Set("round_digits", fmt.Sprintf("%d", *roundDigits))
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
|
||||
@@ -59,7 +59,7 @@ absolute path to all .tpl files in root.
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -96,7 +96,6 @@ func main() {
|
||||
notifier.InitSecretFlags()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
logger.Warnf("flag `remoteRead.ignoreRestoreErrors` is deprecated and will be removed in next releases.")
|
||||
@@ -118,9 +117,9 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
|
||||
eu, err := getExternalURL(*externalURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init `external.url`: %s", err)
|
||||
logger.Fatalf("failed to init `-external.url`: %s", err)
|
||||
}
|
||||
|
||||
alertURLGeneratorFn, err = getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
|
||||
@@ -182,8 +181,11 @@ func main() {
|
||||
rh := &requestHandler{m: manager}
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, rh.handler)
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("service received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
if err := httpserver.Stop(*httpListenAddr); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
@@ -194,7 +196,7 @@ func main() {
|
||||
var (
|
||||
configReloads = metrics.NewCounter(`vmalert_config_last_reload_total`)
|
||||
configReloadErrors = metrics.NewCounter(`vmalert_config_last_reload_errors_total`)
|
||||
configSuccess = metrics.NewCounter(`vmalert_config_last_reload_successful`)
|
||||
configSuccess = metrics.NewGauge(`vmalert_config_last_reload_successful`, nil)
|
||||
configTimestamp = metrics.NewCounter(`vmalert_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
@@ -243,14 +245,26 @@ func newManager(ctx context.Context) (*manager, error) {
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL, error) {
|
||||
if externalURL != "" {
|
||||
return url.Parse(externalURL)
|
||||
func getExternalURL(customURL string) (*url.URL, error) {
|
||||
if customURL == "" {
|
||||
// use local hostname as external URL
|
||||
return getHostnameAsExternalURL(*httpListenAddr, httpserver.IsTLS())
|
||||
}
|
||||
hname, err := os.Hostname()
|
||||
u, err := url.Parse(customURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return nil, fmt.Errorf("invalid scheme %q in url %q, only 'http' and 'https' are supported", u.Scheme, u.String())
|
||||
}
|
||||
return u, nil
|
||||
}
|
||||
|
||||
func getHostnameAsExternalURL(httpListenAddr string, isSecure bool) (*url.URL, error) {
|
||||
hname, err := os.Hostname()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get hostname: %w", err)
|
||||
}
|
||||
port := ""
|
||||
if ipport := strings.Split(httpListenAddr, ":"); len(ipport) > 1 {
|
||||
port = ":" + ipport[1]
|
||||
|
||||
@@ -22,22 +22,29 @@ func init() {
|
||||
}
|
||||
|
||||
func TestGetExternalURL(t *testing.T) {
|
||||
expURL := "https://vicotriametrics.com/path"
|
||||
u, err := getExternalURL(expURL, "", false)
|
||||
invalidURL := "victoriametrics.com/path"
|
||||
_, err := getExternalURL(invalidURL)
|
||||
if err == nil {
|
||||
t.Errorf("expected error, got nil")
|
||||
}
|
||||
|
||||
expURL := "https://victoriametrics.com/path"
|
||||
u, err := getExternalURL(expURL)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if u.String() != expURL {
|
||||
t.Errorf("unexpected url want %s, got %s", expURL, u.String())
|
||||
t.Errorf("unexpected url: want %q, got %s", expURL, u.String())
|
||||
}
|
||||
|
||||
h, _ := os.Hostname()
|
||||
expURL = fmt.Sprintf("https://%s:4242", h)
|
||||
u, err = getExternalURL("", "0.0.0.0:4242", true)
|
||||
expURL = fmt.Sprintf("http://%s:8880", h)
|
||||
u, err = getExternalURL("")
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if u.String() != expURL {
|
||||
t.Errorf("unexpected url want %s, got %s", expURL, u.String())
|
||||
t.Errorf("unexpected url: want %s, got %s", expURL, u.String())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +141,7 @@ groups:
|
||||
t.Fatalf("expected to have config error %s; got nil instead", cErr)
|
||||
}
|
||||
if cfgSuc != 0 {
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 0; got %d instead", cfgSuc)
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 0; got %v instead", cfgSuc)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -143,7 +150,7 @@ groups:
|
||||
t.Fatalf("unexpected config error: %s", cErr)
|
||||
}
|
||||
if cfgSuc != 1 {
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 1; got %d instead", cfgSuc)
|
||||
t.Fatalf("expected to have metric configSuccess to be set to 1; got %v instead", cfgSuc)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -156,11 +156,14 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
var wg sync.WaitGroup
|
||||
for _, item := range toUpdate {
|
||||
wg.Add(1)
|
||||
// cancel evaluation so the Update will be applied as fast as possible.
|
||||
// it is important to call InterruptEval before the update, because cancel fn
|
||||
// can be re-assigned during the update.
|
||||
item.old.InterruptEval()
|
||||
go func(old *rule.Group, new *rule.Group) {
|
||||
old.UpdateWith(new)
|
||||
wg.Done()
|
||||
}(item.old, item.new)
|
||||
item.old.InterruptEval()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -191,7 +191,7 @@ func (a Alert) toPromLabels(relabelCfg *promrelabel.ParsedConfigs) []prompbmarsh
|
||||
var labels []prompbmarshal.Label
|
||||
for k, v := range a.Labels {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: k,
|
||||
Name: promrelabel.SanitizeMetricName(k),
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -237,6 +237,11 @@ func TestAlert_toPromLabels(t *testing.T) {
|
||||
[]prompbmarshal.Label{{Name: "a", Value: "baz"}, {Name: "foo", Value: "bar"}},
|
||||
nil,
|
||||
)
|
||||
fn(
|
||||
map[string]string{"foo.bar": "baz", "service!name": "qux"},
|
||||
[]prompbmarshal.Label{{Name: "foo_bar", Value: "baz"}, {Name: "service_name", Value: "qux"}},
|
||||
nil,
|
||||
)
|
||||
|
||||
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
|
||||
- target_label: "foo"
|
||||
|
||||
@@ -144,7 +144,7 @@ func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg proma
|
||||
aCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(ba.Username, ba.Password.String(), ba.PasswordFile),
|
||||
utils.WithBearer(authCfg.BearerToken.String(), authCfg.BearerTokenFile),
|
||||
utils.WithOAuth(oauth.ClientID, oauth.ClientSecretFile, oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";")))
|
||||
utils.WithOAuth(oauth.ClientID, oauth.ClientSecretFile, oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";"), oauth.EndpointParams))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ var (
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
blackHole = flag.Bool("notifier.blackhole", false, "Whether to blackhole alerting notifications. "+
|
||||
"Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). "+
|
||||
"`-notifier.url`, `-notifier.config` and `-notifier.blackhole` are mutually exclusive.")
|
||||
"-notifier.url, -notifier.config and -notifier.blackhole are mutually exclusive.")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
@@ -46,6 +46,8 @@ var (
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("notifier.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("notifier.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -notifier.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("notifier.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("notifier.oauth2.scopes", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'. "+
|
||||
@@ -141,6 +143,11 @@ func InitSecretFlags() {
|
||||
func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
var notifiers []Notifier
|
||||
for i, addr := range *addrs {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(i)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -notifier.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
authCfg := promauth.HTTPClientConfig{
|
||||
TLSConfig: &promauth.TLSConfig{
|
||||
CAFile: tlsCAFile.GetOptionalArg(i),
|
||||
@@ -160,6 +167,7 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
ClientID: oauth2ClientID.GetOptionalArg(i),
|
||||
ClientSecret: promauth.NewSecret(oauth2ClientSecret.GetOptionalArg(i)),
|
||||
ClientSecretFile: oauth2ClientSecretFile.GetOptionalArg(i),
|
||||
EndpointParams: endpointParams,
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(i), ";"),
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(i),
|
||||
},
|
||||
|
||||
@@ -41,8 +41,10 @@ var (
|
||||
oauth2ClientID = flag.String("remoteRead.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteRead.url.")
|
||||
oauth2ClientSecret = flag.String("remoteRead.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteRead.url.")
|
||||
oauth2ClientSecretFile = flag.String("remoteRead.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -remoteRead.url.")
|
||||
oauth2TokenURL = flag.String("remoteRead.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -remoteRead.url. ")
|
||||
oauth2Scopes = flag.String("remoteRead.oauth2.scopes", "", "Optional OAuth2 scopes to use for -remoteRead.url. Scopes must be delimited by ';'.")
|
||||
oauth2EndpointParams = flag.String("remoteRead.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -remoteRead.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("remoteRead.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -remoteRead.url. ")
|
||||
oauth2Scopes = flag.String("remoteRead.oauth2.scopes", "", "Optional OAuth2 scopes to use for -remoteRead.url. Scopes must be delimited by ';'.")
|
||||
)
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||
@@ -63,10 +65,14 @@ func Init() (datasource.QuerierBuilder, error) {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteRead.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
|
||||
@@ -123,14 +123,12 @@ func (c *Client) Push(s prompbmarshal.TimeSeries) error {
|
||||
case <-c.doneCh:
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(s.Samples))
|
||||
droppedBytes.Add(s.Size())
|
||||
return fmt.Errorf("client is closed")
|
||||
case c.input <- s:
|
||||
return nil
|
||||
default:
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(s.Samples))
|
||||
droppedBytes.Add(s.Size())
|
||||
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
|
||||
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
|
||||
c.maxQueueSize)
|
||||
@@ -195,7 +193,6 @@ var (
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||
|
||||
@@ -211,15 +208,10 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
defer prompbmarshal.ResetWriteRequest(wr)
|
||||
defer wr.Reset()
|
||||
defer bufferFlushDuration.UpdateDuration(time.Now())
|
||||
|
||||
data, err := wr.Marshal()
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal WriteRequest: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
b := snappy.Encode(nil, data)
|
||||
|
||||
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
|
||||
@@ -276,8 +268,11 @@ L:
|
||||
}
|
||||
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(wr.Timeseries))
|
||||
droppedBytes.Add(len(b))
|
||||
rows := 0
|
||||
for _, ts := range wr.Timeseries {
|
||||
rows += len(ts.Samples)
|
||||
}
|
||||
droppedRows.Add(rows)
|
||||
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
|
||||
len(wr.Timeseries))
|
||||
}
|
||||
|
||||
@@ -140,7 +140,7 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
wr := &prompb.WriteRequest{}
|
||||
if err := wr.Unmarshal(b); err != nil {
|
||||
if err := wr.UnmarshalProtobuf(b); err != nil {
|
||||
rw.err(w, fmt.Errorf("unmarhsal err: %w", err))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -49,10 +49,7 @@ func (c *DebugClient) Push(s prompbmarshal.TimeSeries) error {
|
||||
c.wg.Add(1)
|
||||
defer c.wg.Done()
|
||||
wr := &prompbmarshal.WriteRequest{Timeseries: []prompbmarshal.TimeSeries{s}}
|
||||
data, err := wr.Marshal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal the given time series: %w", err)
|
||||
}
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
|
||||
return c.send(data)
|
||||
}
|
||||
|
||||
@@ -41,11 +41,13 @@ var (
|
||||
tlsServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default, the server name from -remoteWrite.url is used")
|
||||
|
||||
oauth2ClientID = flag.String("remoteWrite.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteWrite.url.")
|
||||
oauth2ClientSecret = flag.String("remoteWrite.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteWrite.url.")
|
||||
oauth2ClientSecretFile = flag.String("remoteWrite.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -remoteWrite.url.")
|
||||
oauth2TokenURL = flag.String("remoteWrite.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -notifier.url.")
|
||||
oauth2Scopes = flag.String("remoteWrite.oauth2.scopes", "", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'.")
|
||||
oauth2ClientID = flag.String("remoteWrite.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteWrite.url")
|
||||
oauth2ClientSecret = flag.String("remoteWrite.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flag.String("remoteWrite.oauth2.clientSecretFile", "", "Optional OAuth2 clientSecretFile to use for -remoteWrite.url")
|
||||
oauth2EndpointParams = flag.String("remoteWrite.oauth2.endpointParams", "", "Optional OAuth2 endpoint parameters to use for -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flag.String("remoteWrite.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -notifier.url.")
|
||||
oauth2Scopes = flag.String("remoteWrite.oauth2.scopes", "", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'.")
|
||||
)
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||
@@ -67,10 +69,14 @@ func Init(ctx context.Context) (*Client, error) {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
|
||||
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := utils.AuthConfig(
|
||||
utils.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
utils.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes),
|
||||
utils.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
utils.WithHeaders(*headers))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure auth: %w", err)
|
||||
|
||||
@@ -30,6 +30,7 @@ type AlertingRule struct {
|
||||
Annotations map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
File string
|
||||
EvalInterval time.Duration
|
||||
Debug bool
|
||||
|
||||
@@ -47,7 +48,7 @@ type AlertingRule struct {
|
||||
}
|
||||
|
||||
type alertingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
errors *utils.Counter
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
@@ -67,6 +68,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
Annotations: cfg.Annotations,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
File: group.File,
|
||||
EvalInterval: group.Interval,
|
||||
Debug: cfg.Debug,
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
@@ -116,14 +118,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.Err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{%s}`, labels))
|
||||
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
@@ -242,11 +237,30 @@ type labelSet struct {
|
||||
origin map[string]string
|
||||
// processed labels includes origin labels
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
// in case of conflicts, extra labels are preferred.
|
||||
// in case of key conflicts, origin labels are renamed with prefix `exported_` and extra labels are preferred.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5161
|
||||
// used as labels attached to notifier.Alert and ALERTS series written to remote storage.
|
||||
processed map[string]string
|
||||
}
|
||||
|
||||
// add adds a value v with key k to origin and processed label sets.
|
||||
// On k conflicts in processed set, the passed v is preferred.
|
||||
// On k conflicts in origin set, the original value is preferred and copied
|
||||
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
||||
func (ls *labelSet) add(k, v string) {
|
||||
ls.processed[k] = v
|
||||
ov, ok := ls.origin[k]
|
||||
if !ok {
|
||||
ls.origin[k] = v
|
||||
return
|
||||
}
|
||||
if ov != v {
|
||||
// copy value only if v and ov are different
|
||||
key := fmt.Sprintf("exported_%s", k)
|
||||
ls.processed[key] = ov
|
||||
}
|
||||
}
|
||||
|
||||
// toLabels converts labels from given Metric
|
||||
// to labelSet which contains original and processed labels.
|
||||
func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
||||
@@ -272,24 +286,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.processed[k] = v
|
||||
if _, ok := ls.origin[k]; !ok {
|
||||
ls.origin[k] = v
|
||||
}
|
||||
ls.add(k, v)
|
||||
}
|
||||
|
||||
// set additional labels to identify group and rule name
|
||||
if ar.Name != "" {
|
||||
ls.processed[alertNameLabel] = ar.Name
|
||||
if _, ok := ls.origin[alertNameLabel]; !ok {
|
||||
ls.origin[alertNameLabel] = ar.Name
|
||||
}
|
||||
ls.add(alertNameLabel, ar.Name)
|
||||
}
|
||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||
ls.processed[alertGroupNameLabel] = ar.GroupName
|
||||
if _, ok := ls.origin[alertGroupNameLabel]; !ok {
|
||||
ls.origin[alertGroupNameLabel] = ar.GroupName
|
||||
}
|
||||
ls.add(alertGroupNameLabel, ar.GroupName)
|
||||
}
|
||||
return ls, nil
|
||||
}
|
||||
@@ -320,16 +324,6 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
|
||||
// if alert is instant, For: 0
|
||||
if ar.For == 0 {
|
||||
a.State = notifier.StateFiring
|
||||
for i := range s.Values {
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// if alert with For > 0
|
||||
prevT := time.Time{}
|
||||
for i := range s.Values {
|
||||
at := time.Unix(s.Timestamps[i], 0)
|
||||
@@ -350,6 +344,10 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
a.Start = at
|
||||
}
|
||||
prevT = at
|
||||
if ar.For == 0 {
|
||||
// rules with `for: 0` are always firing when they have Value
|
||||
a.State = notifier.StateFiring
|
||||
}
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
|
||||
// save alert's state on last iteration, so it can be used on the next execRange call
|
||||
@@ -383,6 +381,9 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
if curState.Err != nil {
|
||||
ar.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
ar.alertsMu.Lock()
|
||||
@@ -416,8 +417,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
if _, ok := updated[h]; ok {
|
||||
// duplicate may be caused by extra labels
|
||||
// conflicting with the metric labels
|
||||
// duplicate may be caused the removal of `__name__` label
|
||||
curState.Err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||
return nil, curState.Err
|
||||
}
|
||||
@@ -440,14 +440,13 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
a.KeepFiringSince = time.Time{}
|
||||
continue
|
||||
}
|
||||
a, err := ar.newAlert(m, ls, start, qFn)
|
||||
a, err := ar.newAlert(m, ls, ts, qFn)
|
||||
if err != nil {
|
||||
curState.Err = fmt.Errorf("failed to create alert: %w", err)
|
||||
return nil, curState.Err
|
||||
}
|
||||
a.ID = h
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = ts
|
||||
ar.alerts[h] = a
|
||||
ar.logDebugf(ts, a, "created in state PENDING")
|
||||
}
|
||||
@@ -473,7 +472,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
}
|
||||
// alerts with ar.KeepFiringFor>0 may remain FIRING
|
||||
// even if their expression isn't true anymore
|
||||
if ts.Sub(a.KeepFiringSince) > ar.KeepFiringFor {
|
||||
if ts.Sub(a.KeepFiringSince) >= ar.KeepFiringFor {
|
||||
a.State = notifier.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
|
||||
@@ -553,9 +552,9 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
}
|
||||
|
||||
const (
|
||||
// alertMetricName is the metric name for synthetic alert timeseries.
|
||||
// alertMetricName is the metric name for time series reflecting the alert state.
|
||||
alertMetricName = "ALERTS"
|
||||
// alertForStateMetricName is the metric name for 'for' state of alert.
|
||||
// alertForStateMetricName is the metric name for time series reflecting the moment of time when alert became active.
|
||||
alertForStateMetricName = "ALERTS_FOR_STATE"
|
||||
|
||||
// alertNameLabel is the label name indicating the name of an alert.
|
||||
@@ -570,12 +569,10 @@ const (
|
||||
|
||||
// alertToTimeSeries converts the given alert with the given timestamp to time series
|
||||
func (ar *AlertingRule) alertToTimeSeries(a *notifier.Alert, timestamp int64) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
tss = append(tss, alertToTimeSeries(a, timestamp))
|
||||
if ar.For > 0 {
|
||||
tss = append(tss, alertForToTimeSeries(a, timestamp))
|
||||
return []prompbmarshal.TimeSeries{
|
||||
alertToTimeSeries(a, timestamp),
|
||||
alertForToTimeSeries(a, timestamp),
|
||||
}
|
||||
return tss
|
||||
}
|
||||
|
||||
func alertToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
|
||||
|
||||
@@ -3,6 +3,7 @@ package rule
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
@@ -26,20 +28,26 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
|
||||
}{
|
||||
{
|
||||
newTestAlertingRule("instant", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring},
|
||||
¬ifier.Alert{State: notifier.StateFiring, ActiveAt: timestamp.Add(time.Second)},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
}),
|
||||
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
|
||||
[]int64{timestamp.UnixNano()},
|
||||
map[string]string{
|
||||
"__name__": alertForStateMetricName,
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("instant extra labels", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}},
|
||||
¬ifier.Alert{State: notifier.StateFiring, ActiveAt: timestamp.Add(time.Second),
|
||||
Labels: map[string]string{
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
@@ -47,19 +55,33 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}),
|
||||
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
|
||||
[]int64{timestamp.UnixNano()},
|
||||
map[string]string{
|
||||
"__name__": alertForStateMetricName,
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("instant labels override", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
||||
alertStateLabel: "foo",
|
||||
"__name__": "bar",
|
||||
}},
|
||||
¬ifier.Alert{State: notifier.StateFiring, ActiveAt: timestamp.Add(time.Second),
|
||||
Labels: map[string]string{
|
||||
alertStateLabel: "foo",
|
||||
"__name__": "bar",
|
||||
}},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
}),
|
||||
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
|
||||
[]int64{timestamp.UnixNano()},
|
||||
map[string]string{
|
||||
"__name__": alertForStateMetricName,
|
||||
alertStateLabel: "foo",
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -306,14 +328,17 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
tc.rule.q = fq
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
ts := time.Now()
|
||||
for i, step := range tc.steps {
|
||||
fq.Reset()
|
||||
fq.Add(step...)
|
||||
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
if _, err := tc.rule.exec(context.TODO(), ts, 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
// artificial delay between applying steps
|
||||
time.Sleep(defaultStep)
|
||||
|
||||
// shift the execution timestamp before the next iteration
|
||||
ts = ts.Add(defaultStep)
|
||||
|
||||
if _, ok := tc.expAlerts[i]; !ok {
|
||||
continue
|
||||
}
|
||||
@@ -365,7 +390,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{Values: []float64{1}, Timestamps: []int64{1}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
@@ -376,8 +401,9 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{
|
||||
Labels: map[string]string{"name": "foo"},
|
||||
State: notifier.StateFiring,
|
||||
Labels: map[string]string{"name": "foo"},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
},
|
||||
},
|
||||
nil,
|
||||
@@ -388,9 +414,9 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1e3, 2e3, 3e3}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1e3, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(2e3, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(3e3, 0)},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
@@ -458,6 +484,20 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
For: time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRuleWithEvalInterval("firing=>inactive=>inactive=>firing=>firing", 0, time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1, 1}, Timestamps: []int64{1, 4, 5, 6}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
// It is expected for ActiveAT to remain the same while rule continues to fire in each iteration
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(4, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(4, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(4, 0)},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
|
||||
[]datasource.Metric{
|
||||
@@ -532,21 +572,25 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0),
|
||||
Labels: map[string]string{
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(100, 0),
|
||||
Labels: map[string]string{
|
||||
"source": "vm",
|
||||
}},
|
||||
//
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0),
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"source": "vm",
|
||||
}},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0),
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
"source": "vm",
|
||||
}},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
@@ -766,14 +810,16 @@ func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
ar.q = fq
|
||||
|
||||
// successful attempt
|
||||
// label `job` will be overridden by rule extra label, the original value will be reserved by "exported_job"
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
_, err := ar.exec(context.TODO(), time.Now(), 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// label `job` will collide with rule extra label and will make both time series equal
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
// label `__name__` will be omitted and get duplicated results here
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo_1", "job", "bar"))
|
||||
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
||||
if !errors.Is(err, errDuplicate) {
|
||||
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
||||
@@ -897,20 +943,22 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "foo"}): {
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "override label",
|
||||
"exported_alertname": "override",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `first: Too high connection number for "foo"`,
|
||||
"description": `override: It is 2 connections for "foo"`,
|
||||
},
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "override label", "instance": "bar"}): {
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "bar"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
"instance": "bar",
|
||||
alertNameLabel: "override label",
|
||||
"exported_alertname": "override",
|
||||
"instance": "bar",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `second: Too high connection number for "bar"`,
|
||||
@@ -939,14 +987,18 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"exported_alertgroup": "originGroupname",
|
||||
"instance": "foo",
|
||||
}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"instance": "foo",
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
alertGroupNameLabel: "Testing",
|
||||
"exported_alertgroup": "originGroupname",
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||
@@ -1078,12 +1130,72 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
metrics: &alertingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{alertname=%q}`, name)),
|
||||
},
|
||||
}
|
||||
return &rule
|
||||
}
|
||||
|
||||
func newTestAlertingRuleWithEvalInterval(name string, waitFor, evalInterval time.Duration) *AlertingRule {
|
||||
rule := newTestAlertingRule(name, waitFor)
|
||||
rule.EvalInterval = evalInterval
|
||||
return rule
|
||||
}
|
||||
|
||||
func newTestAlertingRuleWithKeepFiring(name string, waitFor, keepFiringFor time.Duration) *AlertingRule {
|
||||
rule := newTestAlertingRule(name, waitFor)
|
||||
rule.KeepFiringFor = keepFiringFor
|
||||
return rule
|
||||
}
|
||||
|
||||
func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
metric := datasource.Metric{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "instance", Value: "0.0.0.0:8800"},
|
||||
{Name: "group", Value: "vmalert"},
|
||||
{Name: "alertname", Value: "ConfigurationReloadFailure"},
|
||||
},
|
||||
Values: []float64{1},
|
||||
Timestamps: []int64{time.Now().UnixNano()},
|
||||
}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Labels: map[string]string{
|
||||
"instance": "override", // this should override instance with new value
|
||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||
},
|
||||
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
||||
Name: "AlertingRulesError",
|
||||
GroupName: "vmalert",
|
||||
}
|
||||
|
||||
expectedOriginLabels := map[string]string{
|
||||
"instance": "0.0.0.0:8800",
|
||||
"group": "vmalert",
|
||||
"alertname": "ConfigurationReloadFailure",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
expectedProcessedLabels := map[string]string{
|
||||
"instance": "override",
|
||||
"exported_instance": "0.0.0.0:8800",
|
||||
"alertname": "AlertingRulesError",
|
||||
"exported_alertname": "ConfigurationReloadFailure",
|
||||
"group": "vmalert",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
ls, err := ar.toLabels(metric, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
||||
t.Errorf("origin labels mismatch, got: %v, want: %v", ls.origin, expectedOriginLabels)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.processed, expectedProcessedLabels) {
|
||||
t.Errorf("processed labels mismatch, got: %v, want: %v", ls.processed, expectedProcessedLabels)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,12 +17,14 @@ import (
|
||||
// to evaluate configured Expression and
|
||||
// return TimeSeries as result.
|
||||
type RecordingRule struct {
|
||||
Type config.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
Labels map[string]string
|
||||
GroupID uint64
|
||||
Type config.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
Labels map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
File string
|
||||
|
||||
q datasource.Querier
|
||||
|
||||
@@ -34,7 +36,7 @@ type RecordingRule struct {
|
||||
}
|
||||
|
||||
type recordingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
errors *utils.Counter
|
||||
samples *utils.Gauge
|
||||
}
|
||||
|
||||
@@ -52,13 +54,15 @@ func (rr *RecordingRule) ID() uint64 {
|
||||
// NewRecordingRule creates a new RecordingRule
|
||||
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
||||
rr := &RecordingRule{
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
File: group.File,
|
||||
metrics: &recordingRuleMetrics{},
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
@@ -79,14 +83,7 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
if e.Err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
rr.metrics.errors = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_recording_rules_errors_total{%s}`, labels))
|
||||
rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
@@ -138,6 +135,9 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
|
||||
defer func() {
|
||||
rr.state.add(curState)
|
||||
if curState.Err != nil {
|
||||
rr.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
@@ -194,6 +194,9 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSer
|
||||
labels["__name__"] = rr.Name
|
||||
// override existing labels with configured ones
|
||||
for k, v := range rr.Labels {
|
||||
if _, ok := labels[k]; ok && labels[k] != v {
|
||||
labels[fmt.Sprintf("exported_%s", k)] = labels[k]
|
||||
}
|
||||
labels[k] = v
|
||||
}
|
||||
return newTimeSeries(m.Values, m.Timestamps, labels)
|
||||
@@ -203,7 +206,7 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSer
|
||||
func (rr *RecordingRule) updateWith(r Rule) error {
|
||||
nr, ok := r.(*RecordingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("BUG: attempt to update recroding rule with wrong type %#v", r)
|
||||
return fmt.Errorf("BUG: attempt to update recording rule with wrong type %#v", r)
|
||||
}
|
||||
rr.Expr = nr.Expr
|
||||
rr.Labels = nr.Labels
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
@@ -60,7 +61,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin"),
|
||||
},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
@@ -69,9 +70,10 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
"source": "test",
|
||||
}),
|
||||
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||
"__name__": "job:foo",
|
||||
"job": "bar",
|
||||
"source": "test",
|
||||
"__name__": "job:foo",
|
||||
"job": "bar",
|
||||
"source": "test",
|
||||
"exported_source": "origin",
|
||||
}),
|
||||
},
|
||||
},
|
||||
@@ -201,9 +203,15 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
||||
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
||||
}
|
||||
rule := &RecordingRule{Name: "job:foo", state: &ruleState{entries: make([]StateEntry, 10)}, Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
}}
|
||||
rule := &RecordingRule{Name: "job:foo",
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
},
|
||||
metrics: &recordingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(`vmalert_recording_rules_errors_total{alertname="job:foo"}`),
|
||||
},
|
||||
}
|
||||
var err error
|
||||
for _, testCase := range testCases {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
@@ -223,6 +231,9 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||
"job": "test",
|
||||
},
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
metrics: &recordingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(`vmalert_recording_rules_errors_total{alertname="job:foo"}`),
|
||||
},
|
||||
}
|
||||
fq := &datasource.FakeQuerier{}
|
||||
expErr := "connection reset by peer"
|
||||
@@ -244,10 +255,7 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||
fq.Add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
|
||||
|
||||
_, err = rr.exec(context.TODO(), time.Now(), 0)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to get err; got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), errDuplicate.Error()) {
|
||||
t.Fatalf("expected to get err %q; got %q insterad", errDuplicate, err)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,26 +43,26 @@ type ruleState struct {
|
||||
// StateEntry stores rule's execution states
|
||||
type StateEntry struct {
|
||||
// stores last moment of time rule.Exec was called
|
||||
Time time.Time
|
||||
Time time.Time `json:"time"`
|
||||
// stores the timesteamp with which rule.Exec was called
|
||||
At time.Time
|
||||
At time.Time `json:"at"`
|
||||
// stores the duration of the last rule.Exec call
|
||||
Duration time.Duration
|
||||
Duration time.Duration `json:"duration"`
|
||||
// stores last error that happened in Exec func
|
||||
// resets on every successful Exec
|
||||
// may be used as Health ruleState
|
||||
Err error
|
||||
Err error `json:"error"`
|
||||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
Samples int
|
||||
Samples int `json:"samples"`
|
||||
// stores the number of time series fetched during
|
||||
// the last evaluation.
|
||||
// Is supported by VictoriaMetrics only, starting from v1.90.0
|
||||
// If seriesFetched == nil, then this attribute was missing in
|
||||
// datasource response (unsupported).
|
||||
SeriesFetched *int
|
||||
SeriesFetched *int `json:"series_fetched"`
|
||||
// stores the curl command reflecting the HTTP request used during rule.Exec
|
||||
Curl string
|
||||
Curl string `json:"curl"`
|
||||
}
|
||||
|
||||
// GetLastEntry returns latest stateEntry of rule
|
||||
|
||||
@@ -45,13 +45,14 @@ func WithBearer(token, tokenFile string) AuthConfigOptions {
|
||||
}
|
||||
|
||||
// WithOAuth returns AuthConfigOptions and set OAuth params based on given params
|
||||
func WithOAuth(clientID, clientSecret, clientSecretFile, tokenURL, scopes string) AuthConfigOptions {
|
||||
func WithOAuth(clientID, clientSecret, clientSecretFile, tokenURL, scopes string, endpointParams map[string]string) AuthConfigOptions {
|
||||
return func(config *promauth.HTTPClientConfig) {
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
config.OAuth2 = &promauth.OAuth2Config{
|
||||
ClientID: clientID,
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: tokenURL,
|
||||
Scopes: strings.Split(scopes, ";"),
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 73 KiB |
|
Before Width: | Height: | Size: 151 KiB |
|
Before Width: | Height: | Size: 122 KiB |
|
Before Width: | Height: | Size: 80 KiB |
|
Before Width: | Height: | Size: 62 KiB |
|
Before Width: | Height: | Size: 109 KiB |
|
Before Width: | Height: | Size: 41 KiB |
|
Before Width: | Height: | Size: 41 KiB |
@@ -12,11 +12,14 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
)
|
||||
|
||||
var reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
|
||||
|
||||
var (
|
||||
apiLinks = [][2]string{
|
||||
// api links are relative since they can be used by external clients,
|
||||
@@ -132,7 +135,28 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||
rule, err := rh.getRule(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
rwu := apiRuleWithUpdates{
|
||||
apiRule: rule,
|
||||
StateUpdates: rule.Updates,
|
||||
}
|
||||
data, err := json.Marshal(rwu)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey.Get(), "reloadAuthKey") {
|
||||
return true
|
||||
}
|
||||
logger.Infof("api config reload was called, sending sighup")
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
@@ -143,6 +143,28 @@ func TestHandler(t *testing.T) {
|
||||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
||||
expRule := ruleToAPI(ar)
|
||||
gotRule := apiRule{}
|
||||
getResp(ts.URL+"/"+expRule.APILink(), &gotRule, 200)
|
||||
|
||||
if expRule.ID != gotRule.ID {
|
||||
t.Errorf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||
}
|
||||
|
||||
gotRule = apiRule{}
|
||||
getResp(ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
|
||||
|
||||
if expRule.ID != gotRule.ID {
|
||||
t.Errorf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||
}
|
||||
|
||||
gotRuleWithUpdates := apiRuleWithUpdates{}
|
||||
getResp(ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
|
||||
if gotRuleWithUpdates.StateUpdates == nil || len(gotRuleWithUpdates.StateUpdates) < 1 {
|
||||
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestEmptyResponse(t *testing.T) {
|
||||
|
||||
@@ -151,6 +151,10 @@ type apiRule struct {
|
||||
ID string `json:"id"`
|
||||
// GroupID is an unique Group's ID
|
||||
GroupID string `json:"group_id"`
|
||||
// GroupName is Group name rule belong to
|
||||
GroupName string `json:"group_name"`
|
||||
// File is file name where rule is defined
|
||||
File string `json:"file"`
|
||||
// Debug shows whether debug mode is enabled
|
||||
Debug bool `json:"debug"`
|
||||
|
||||
@@ -160,6 +164,19 @@ type apiRule struct {
|
||||
Updates []rule.StateEntry `json:"-"`
|
||||
}
|
||||
|
||||
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
|
||||
type apiRuleWithUpdates struct {
|
||||
apiRule
|
||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
|
||||
}
|
||||
|
||||
// APILink returns a link to the rule's JSON representation.
|
||||
func (ar apiRule) APILink() string {
|
||||
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
|
||||
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||
}
|
||||
|
||||
// WebLink returns a link to the alert which can be used in UI.
|
||||
func (ar apiRule) WebLink() string {
|
||||
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
||||
@@ -227,8 +244,10 @@ func alertingToAPI(ar *rule.AlertingRule) apiRule {
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
GroupName: ar.GroupName,
|
||||
File: ar.File,
|
||||
}
|
||||
if lastState.Err != nil {
|
||||
r.LastError = lastState.Err.Error()
|
||||
|
||||
@@ -1,576 +1,3 @@
|
||||
# vmauth
|
||||
See vmauth docs [here](https://docs.victoriametrics.com/vmauth.html).
|
||||
|
||||
`vmauth` is a simple auth proxy, router and [load balancer](#load-balancing) for [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It reads auth credentials from `Authorization` http header ([Basic Auth](https://en.wikipedia.org/wiki/Basic_access_authentication), `Bearer token` and [InfluxDB authorization](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1897) is supported),
|
||||
matches them against configs pointed by [-auth.config](#auth-config) command-line flag and proxies incoming HTTP requests to the configured per-user `url_prefix` on successful match.
|
||||
The `-auth.config` can point to either local file or to http url.
|
||||
|
||||
## Quick start
|
||||
|
||||
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), unpack it
|
||||
and pass the following flag to `vmauth` binary in order to start authorizing and routing requests:
|
||||
|
||||
```console
|
||||
/path/to/vmauth -auth.config=/path/to/auth/config.yml
|
||||
```
|
||||
|
||||
After that `vmauth` starts accepting HTTP requests on port `8427` and routing them according to the provided [-auth.config](#auth-config).
|
||||
The port can be modified via `-httpListenAddr` command-line flag.
|
||||
|
||||
The auth config can be reloaded via the following ways:
|
||||
|
||||
- By passing `SIGHUP` signal to `vmauth`.
|
||||
- By querying `/-/reload` http endpoint. This endpoint can be protected with `-reloadAuthKey` command-line flag. See [security docs](#security) for more details.
|
||||
- By specifying `-configCheckInterval` command-line flag to the interval between config re-reads. For example, `-configCheckInterval=5s` will re-read the config
|
||||
and apply new changes every 5 seconds.
|
||||
|
||||
Docker images for `vmauth` are available [here](https://hub.docker.com/r/victoriametrics/vmauth/tags).
|
||||
See how `vmauth` used in [docker-compose env](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/README.md#victoriametrics-cluster).
|
||||
|
||||
Pass `-help` to `vmauth` in order to see all the supported command-line flags with their descriptions.
|
||||
|
||||
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML,
|
||||
accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.com/vmgateway.html).
|
||||
|
||||
## Dropping request path prefix
|
||||
|
||||
By default `vmauth` doesn't drop the path prefix from the original request when proxying the request to the matching backend.
|
||||
Sometimes it is needed to drop path prefix before routing the request to the backend. This can be done by specifying the number of `/`-delimited
|
||||
prefix parts to drop from the request path via `drop_src_path_prefix_parts` option at `url_map` level or at `user` level.
|
||||
|
||||
For example, if you need to serve requests to [vmalert](https://docs.victoriametrics.com/vmalert.html) at `/vmalert/` path prefix,
|
||||
while serving requests to [vmagent](https://docs.victoriametrics.com/vmagent.html) at `/vmagent/` path prefix for a particular user,
|
||||
then the following [-auth.config](#auth-config) can be used:
|
||||
|
||||
```yml
|
||||
users:
|
||||
- username: foo
|
||||
url_map:
|
||||
|
||||
# proxy all the requests, which start with `/vmagent/`, to vmagent backend
|
||||
- src_paths:
|
||||
- "/vmagent/.+"
|
||||
|
||||
# drop /vmagent/ path prefix from the original request before proxying it to url_prefix.
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmagent-backend:8429/"
|
||||
|
||||
# proxy all the requests, which start with `/vmalert`, to vmalert backend
|
||||
- src_paths:
|
||||
- "/vmalert/.+"
|
||||
|
||||
# drop /vmalert/ path prefix from the original request before proxying it to url_prefix.
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmalert-backend:8880/"
|
||||
```
|
||||
|
||||
## Load balancing
|
||||
|
||||
Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
|
||||
In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner.
|
||||
|
||||
If the backend at the configured url isn't available, then `vmauth` tries sending the request to the remaining configured urls.
|
||||
|
||||
It is possible to configure automatic retry of requests if the backend responds with status code from optional `retry_status_codes` list.
|
||||
|
||||
Load balancing feature can be used in the following cases:
|
||||
|
||||
- Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
The following `-auth.config` file can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests
|
||||
or requests with 500 and 502 response status codes:
|
||||
|
||||
```yml
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
- http://vmselect1:8481/
|
||||
- http://vmselect2:8481/
|
||||
- http://vmselect3:8481/
|
||||
retry_status_codes: [500, 502]
|
||||
```
|
||||
|
||||
- Spreading select queries among multiple availability zones (AZs) with identical data. For example, the following config spreads select queries
|
||||
among 3 AZs. Requests are re-tried if some AZs are temporarily unavailable or if some `vmstorage` nodes in some AZs are temporarily unavailable.
|
||||
`vmauth` adds `deny_partial_response=1` query arg to all the queries in order to guarantee to get full response from every AZ.
|
||||
See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details.
|
||||
|
||||
```yml
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
- https://vmselect-az1/?deny_partial_response=1
|
||||
- https://vmselect-az2/?deny_partial_response=1
|
||||
- https://vmselect-az3/?deny_partial_response=1
|
||||
retry_status_codes: [500, 502, 503]
|
||||
```
|
||||
|
||||
Load balancig can also be configured independently per each user and per each `url_map` entry.
|
||||
See [auth config docs](#auth-config) for more details.
|
||||
|
||||
## Concurrency limiting
|
||||
|
||||
`vmauth` limits the number of concurrent requests it can proxy according to the following command-line flags:
|
||||
|
||||
- `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
|
||||
- `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
|
||||
|
||||
It is also possible to set individual limits on the number of concurrent requests per each user
|
||||
with the `max_concurrent_requests` option - see [auth config example](#auth-config).
|
||||
|
||||
`vmauth` responds with `429 Too Many Requests` HTTP error when the number of concurrent requests exceeds the configured limits.
|
||||
|
||||
The following [metrics](#monitoring) related to concurrency limits are exposed by `vmauth`:
|
||||
|
||||
- `vmauth_concurrent_requests_capacity` - the global limit on the number of concurrent requests `vmauth` can serve.
|
||||
It is set via `-maxConcurrentRequests` command-line flag.
|
||||
- `vmauth_concurrent_requests_current` - the current number of concurrent requests `vmauth` processes.
|
||||
- `vmauth_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the global concurrency limit has been reached.
|
||||
- `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_limit_reached_total{username="foo"}` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for the given `username`.
|
||||
- `vmauth_unauthorized_user_concurrent_requests_capacity` - the limit on the number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
- `vmauth_unauthorized_user_concurrent_requests_current` - the current number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
- `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
|
||||
|
||||
## Backend TLS setup
|
||||
|
||||
By default `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option
|
||||
in the [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config). These settings can be overridden with the following command-line flags:
|
||||
|
||||
- `-backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
|
||||
via `tls_insecure_skip_verify` option. For example:
|
||||
|
||||
```yml
|
||||
- username: "foo"
|
||||
url_prefix: "https://localhost"
|
||||
tls_insecure_skip_verify: true
|
||||
```
|
||||
|
||||
- `-backend.tlsCAFile` allows specifying the path to TLS Root CA, which will be used for TLS verification when connecting to HTTPS backends.
|
||||
The `-backend.tlsCAFile` may point either to local file or to `http` / `https` url.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
|
||||
via `tls_ca_file` option. For example:
|
||||
|
||||
```yml
|
||||
- username: "foo"
|
||||
url_prefix: "https://localhost"
|
||||
tls_ca_file: "/path/to/tls/root/ca"
|
||||
```
|
||||
|
||||
## IP filters
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/enterprise.html) of `vmauth` can be configured to allow / deny incoming requests via global and per-user IP filters.
|
||||
|
||||
For example, the following config allows requests to `vmauth` from `10.0.0.0/24` network and from `1.2.3.4` IP address, while denying requests from `10.0.0.42` IP address:
|
||||
|
||||
```yml
|
||||
users:
|
||||
# User configs here
|
||||
|
||||
ip_filters:
|
||||
allow_list:
|
||||
- 10.0.0.0/24
|
||||
- 1.2.3.4
|
||||
deny_list: [10.0.0.42]
|
||||
```
|
||||
|
||||
The following config allows requests for the user 'foobar' only from the IP `127.0.0.1`:
|
||||
|
||||
```yml
|
||||
users:
|
||||
- username: "foobar"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428"
|
||||
ip_filters:
|
||||
allow_list: [127.0.0.1]
|
||||
```
|
||||
|
||||
See config example of using IP filters [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/example_config_ent.yml).
|
||||
|
||||
## Auth config
|
||||
|
||||
`-auth.config` is represented in the following simple `yml` format:
|
||||
|
||||
```yml
|
||||
# Arbitrary number of usernames may be put here.
|
||||
# It is possible to set multiple identical usernames with different passwords.
|
||||
# Such usernames can be differentiated by `name` option.
|
||||
|
||||
users:
|
||||
# Requests with the 'Authorization: Bearer XXXX' and 'Authorization: Token XXXX'
|
||||
# header are proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
# Requests with the Basic Auth username=XXXX are proxied to http://localhost:8428 as well.
|
||||
- bearer_token: "XXXX"
|
||||
url_prefix: "http://localhost:8428"
|
||||
|
||||
# Requests with the 'Authorization: Bearer YYY' header are proxied to http://localhost:8428 ,
|
||||
# The `X-Scope-OrgID: foobar` http header is added to every proxied request.
|
||||
# The `X-Server-Hostname` http header is removed from the proxied response.
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
- bearer_token: "YYY"
|
||||
url_prefix: "http://localhost:8428"
|
||||
# extra headers to add to the request or remove from the request (if header value is empty)
|
||||
headers:
|
||||
- "X-Scope-OrgID: foobar"
|
||||
# extra headers to add to the response or remove from the response (if header value is empty)
|
||||
response_headers:
|
||||
- "X-Server-Hostname:" # empty value means the header will be removed from the response
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
#
|
||||
# The given user can send maximum 10 concurrent requests according to the provided max_concurrent_requests.
|
||||
# Excess concurrent requests are rejected with 429 HTTP status code.
|
||||
# See also -maxConcurrentPerUserRequests and -maxConcurrentRequests command-line flags.
|
||||
- username: "local-single-node"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428"
|
||||
max_concurrent_requests: 10
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
- username: "local-single-node2"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to https://localhost:8428.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
|
||||
# TLS verification is skipped for https://localhost.
|
||||
- username: "local-single-node-with-tls"
|
||||
password: "***"
|
||||
url_prefix: "https://localhost"
|
||||
tls_insecure_skip_verify: true
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
|
||||
# - http://vmselect1:8481/select/123/prometheus/api/v1/select
|
||||
# - http://vmselect2:8481/select/123/prometheus/api/v1/select
|
||||
- username: "cluster-select-account-123"
|
||||
password: "***"
|
||||
url_prefix:
|
||||
- "http://vmselect1:8481/select/123/prometheus"
|
||||
- "http://vmselect2:8481/select/123/prometheus"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are load-balanced between http://vminsert1:8480/insert/42/prometheus and http://vminsert2:8480/insert/42/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/write is proxied to the following urls in a round-robin manner:
|
||||
# - http://vminsert1:8480/insert/42/prometheus/api/v1/write
|
||||
# - http://vminsert2:8480/insert/42/prometheus/api/v1/write
|
||||
- username: "cluster-insert-account-42"
|
||||
password: "***"
|
||||
url_prefix:
|
||||
- "http://vminsert1:8480/insert/42/prometheus"
|
||||
- "http://vminsert2:8480/insert/42/prometheus"
|
||||
|
||||
# A single user for querying and inserting data:
|
||||
#
|
||||
# - Requests to http://vmauth:8427/api/v1/query, http://vmauth:8427/api/v1/query_range
|
||||
# and http://vmauth:8427/api/v1/label/<label_name>/values are proxied to the following urls in a round-robin manner:
|
||||
# - http://vmselect1:8481/select/42/prometheus
|
||||
# - http://vmselect2:8481/select/42/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect1:8480/select/42/prometheus/api/v1/query
|
||||
# or to http://vmselect2:8480/select/42/prometheus/api/v1/query .
|
||||
# Requests are re-tried at other url_prefix backends if response status codes match 500 or 502.
|
||||
#
|
||||
# - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write .
|
||||
# The "X-Scope-OrgID: abc" http header is added to these requests.
|
||||
# The "X-Server-Hostname" http header is removed from the proxied response.
|
||||
#
|
||||
# Request which do not match `src_paths` from the `url_map` are proxied to the urls from `default_url`
|
||||
# in a round-robin manner. The original request path is passed in `request_path` query arg.
|
||||
# For example, request to http://vmauth:8427/non/existing/path are proxied:
|
||||
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
#
|
||||
# Regular expressions are allowed in `src_paths` entries.
|
||||
- username: "foobar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
- "/api/v1/query"
|
||||
- "/api/v1/query_range"
|
||||
- "/api/v1/label/[^/]+/values"
|
||||
url_prefix:
|
||||
- "http://vmselect1:8481/select/42/prometheus"
|
||||
- "http://vmselect2:8481/select/42/prometheus"
|
||||
retry_status_codes: [500, 502]
|
||||
- src_paths: ["/api/v1/write"]
|
||||
url_prefix: "http://vminsert:8480/insert/42/prometheus"
|
||||
headers:
|
||||
- "X-Scope-OrgID: abc"
|
||||
response_headers:
|
||||
- "X-Server-Hostname:" # empty value means the header will be removed from the response
|
||||
ip_filters:
|
||||
deny_list: [127.0.0.1]
|
||||
default_url:
|
||||
- "http://default1:8888/unsupported_url_handler"
|
||||
- "http://default2:8888/unsupported_url_handler"
|
||||
|
||||
# Requests without Authorization header are routed according to `unauthorized_user` section.
|
||||
# Requests are routed in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the routed requests.
|
||||
# The requests are re-tried if url_prefix backends send 500 or 503 response status codes.
|
||||
# Note that the unauthorized_user section takes precedence when processing a route without credentials,
|
||||
# even if such a route also exists in the users section (see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5236).
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
- http://vmselect-az1/?deny_partial_response=1
|
||||
- http://vmselect-az2/?deny_partial_response=1
|
||||
retry_status_codes: [503, 500]
|
||||
|
||||
ip_filters:
|
||||
allow_list: ["1.2.3.0/24", "127.0.0.1"]
|
||||
deny_list:
|
||||
- 10.1.0.1
|
||||
```
|
||||
|
||||
The config may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
This may be useful for passing secrets to the config.
|
||||
|
||||
Please note, vmauth doesn't follow redirects. If destination redirects request to a new location, make sure this
|
||||
location is supported in vmauth `url_map` config.
|
||||
|
||||
## Security
|
||||
|
||||
It is expected that all the backend services protected by `vmauth` are located in an isolated private network, so they can be accessed by external users only via `vmauth`.
|
||||
|
||||
Do not transfer Basic Auth headers in plaintext over untrusted networks. Enable https. This can be done by passing the following `-tls*` command-line flags to `vmauth`:
|
||||
|
||||
```console
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
```
|
||||
|
||||
Alternatively, [https termination proxy](https://en.wikipedia.org/wiki/TLS_termination_proxy) may be put in front of `vmauth`.
|
||||
|
||||
It is recommended protecting the following endpoints with authKeys:
|
||||
* `/-/reload` with `-reloadAuthKey` command-line flag, so external users couldn't trigger config reload.
|
||||
* `/flags` with `-flagsAuthKey` command-line flag, so unauthorized users couldn't get application command-line flags.
|
||||
* `/metrics` with `-metricsAuthKey` command-line flag, so unauthorized users couldn't get access to [vmauth metrics](#monitoring).
|
||||
* `/debug/pprof` with `-pprofAuthKey` command-line flag, so unauthorized users couldn't get access to [profiling information](#profiling).
|
||||
|
||||
`vmauth` also supports the ability to restrict access by IP - see [these docs](#ip-filters). See also [concurrency limiting docs](#concurrency-limiting).
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vmauth` exports various metrics in Prometheus exposition format at `http://vmauth-host:8427/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://docs.victoriametrics.com/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
`vmauth` exports `vmauth_user_requests_total` [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric
|
||||
and `vmauth_user_request_duration_seconds_*` [summary](https://docs.victoriametrics.com/keyConcepts.html#summary) metric
|
||||
with `username` label. The `username` label value equals to `username` field value set in the `-auth.config` file.
|
||||
It is possible to override or hide the value in the label by specifying `name` field.
|
||||
For example, the following config will result in `vmauth_user_requests_total{username="foobar"}`
|
||||
instead of `vmauth_user_requests_total{username="secret_user"}`:
|
||||
|
||||
```yml
|
||||
users:
|
||||
- username: "secret_user"
|
||||
name: "foobar"
|
||||
# other config options here
|
||||
```
|
||||
|
||||
For unauthorized users `vmauth` exports `vmauth_unauthorized_user_requests_total`
|
||||
[counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric and
|
||||
`vmauth_unauthorized_user_request_duration_seconds_*` [summary](https://docs.victoriametrics.com/keyConcepts.html#summary)
|
||||
metric without label (if `unauthorized_user` section of config is used).
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - `vmauth` is located in `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
|
||||
1. Run `make vmauth` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmauth` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
1. Run `make vmauth-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmauth-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmauth`. It builds `victoriametrics/vmauth:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmauth`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```console
|
||||
ROOT_IMAGE=scratch make package-vmauth
|
||||
```
|
||||
|
||||
## Profiling
|
||||
|
||||
`vmauth` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
* Memory profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```console
|
||||
curl http://0.0.0.0:8427/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
* CPU profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```console
|
||||
curl http://0.0.0.0:8427/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
It is safe sharing the collected profiles from security point of view, since they do not contain sensitive information.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
Pass `-help` command-line arg to `vmauth` in order to see all the configuration options:
|
||||
|
||||
```console
|
||||
./vmauth -help
|
||||
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
|
||||
-auth.config string
|
||||
Path to auth config. It can point either to local file or to http url. See https://docs.victoriametrics.com/vmauth.html for details on the format of this auth config
|
||||
-configCheckInterval duration
|
||||
interval for config file re-read. Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-failTimeout duration
|
||||
Sets a delay period for load balancing to skip a malfunctioning backend (default 3s)
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8427")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-logInvalidAuthTokens
|
||||
Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentPerUserRequests int
|
||||
The maximum number of concurrent requests vmauth can process per each configured user. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option in per-user config (default 300)
|
||||
-maxConcurrentRequests int
|
||||
The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000)
|
||||
-maxIdleConnsPerBackend int
|
||||
The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100)
|
||||
-maxRequestBodySizeToRetry size
|
||||
The maximum request body size, which can be cached and re-tried at other backends. Bigger values may require more memory
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-reloadAuthKey string
|
||||
Auth key for /-/reload http endpoint. It must be passed as authKey=...
|
||||
-responseTimeout duration
|
||||
The timeout for receiving a response from backend (default 5m0s)
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
vmauth docs can be edited at [docs/vmauth.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmauth.md).
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -16,11 +17,13 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
)
|
||||
@@ -30,6 +33,10 @@ var (
|
||||
"See https://docs.victoriametrics.com/vmauth.html for details on the format of this auth config")
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "interval for config file re-read. "+
|
||||
"Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.")
|
||||
defaultRetryStatusCodes = flagutil.NewArrayInt("retryStatusCodes", 0, "Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. "+
|
||||
"See https://docs.victoriametrics.com/vmauth.html#load-balancing for details")
|
||||
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
|
||||
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/vmauth.html#load-balancing for more details")
|
||||
)
|
||||
|
||||
// AuthConfig represents auth config.
|
||||
@@ -50,16 +57,20 @@ type UserInfo struct {
|
||||
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
|
||||
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
TLSInsecureSkipVerify *bool `yaml:"tls_insecure_skip_verify,omitempty"`
|
||||
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
||||
|
||||
MetricLabels map[string]string `yaml:"metric_labels,omitempty"`
|
||||
|
||||
concurrencyLimitCh chan struct{}
|
||||
concurrencyLimitReached *metrics.Counter
|
||||
|
||||
httpTransport *http.Transport
|
||||
|
||||
requests *metrics.Counter
|
||||
backendErrors *metrics.Counter
|
||||
requestsDuration *metrics.Summary
|
||||
}
|
||||
|
||||
@@ -120,23 +131,61 @@ func (h *Header) MarshalYAML() (interface{}, error) {
|
||||
|
||||
// URLMap is a mapping from source paths to target urls.
|
||||
type URLMap struct {
|
||||
SrcPaths []*SrcPath `yaml:"src_paths,omitempty"`
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
HeadersConf HeadersConf `yaml:",inline"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
// SrcHosts is the list of regular expressions, which match the request hostname.
|
||||
SrcHosts []*Regex `yaml:"src_hosts,omitempty"`
|
||||
|
||||
// SrcPaths is the list of regular expressions, which match the request path.
|
||||
SrcPaths []*Regex `yaml:"src_paths,omitempty"`
|
||||
|
||||
// UrlPrefix contains backend url prefixes for the proxied request url.
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
|
||||
// HeadersConf is the config for augumenting request and response headers.
|
||||
HeadersConf HeadersConf `yaml:",inline"`
|
||||
|
||||
// RetryStatusCodes is the list of response status codes used for retrying requests.
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
|
||||
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
|
||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||
|
||||
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
|
||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
}
|
||||
|
||||
// SrcPath represents an src path
|
||||
type SrcPath struct {
|
||||
// Regex represents a regex
|
||||
type Regex struct {
|
||||
sOriginal string
|
||||
re *regexp.Regexp
|
||||
}
|
||||
|
||||
// URLPrefix represents passed `url_prefix`
|
||||
type URLPrefix struct {
|
||||
n uint32
|
||||
n uint32
|
||||
|
||||
// the list of backend urls
|
||||
bus []*backendURL
|
||||
|
||||
// requests are re-tried on other backend urls for these http response status codes
|
||||
retryStatusCodes []int
|
||||
|
||||
// load balancing policy used
|
||||
loadBalancingPolicy string
|
||||
|
||||
// how many request path prefix parts to drop before routing the request to backendURL.
|
||||
dropSrcPathPrefixParts int
|
||||
}
|
||||
|
||||
func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
||||
switch loadBalancingPolicy {
|
||||
case "", // empty string is equivalent to least_loaded
|
||||
"least_loaded",
|
||||
"first_available":
|
||||
up.loadBalancingPolicy = loadBalancingPolicy
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unexpected load_balancing_policy: %q; want least_loaded or first_available", loadBalancingPolicy)
|
||||
}
|
||||
}
|
||||
|
||||
type backendURL struct {
|
||||
@@ -155,6 +204,10 @@ func (bu *backendURL) setBroken() {
|
||||
atomic.StoreUint64(&bu.brokenDeadline, deadline)
|
||||
}
|
||||
|
||||
func (bu *backendURL) get() {
|
||||
atomic.AddInt32(&bu.concurrentRequests, 1)
|
||||
}
|
||||
|
||||
func (bu *backendURL) put() {
|
||||
atomic.AddInt32(&bu.concurrentRequests, -1)
|
||||
}
|
||||
@@ -163,6 +216,40 @@ func (up *URLPrefix) getBackendsCount() int {
|
||||
return len(up.bus)
|
||||
}
|
||||
|
||||
// getBackendURL returns the backendURL depending on the load balance policy.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func (up *URLPrefix) getBackendURL() *backendURL {
|
||||
if up.loadBalancingPolicy == "first_available" {
|
||||
return up.getFirstAvailableBackendURL()
|
||||
}
|
||||
return up.getLeastLoadedBackendURL()
|
||||
}
|
||||
|
||||
// getFirstAvailableBackendURL returns the first available backendURL, which isn't broken.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func (up *URLPrefix) getFirstAvailableBackendURL() *backendURL {
|
||||
bus := up.bus
|
||||
|
||||
bu := bus[0]
|
||||
if !bu.isBroken() {
|
||||
// Fast path - send the request to the first url.
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
// Slow path - the first url is temporarily unavailabel. Fall back to the remaining urls.
|
||||
for i := 1; i < len(bus); i++ {
|
||||
if !bus[i].isBroken() {
|
||||
bu = bus[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
@@ -171,7 +258,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
|
||||
if len(bus) == 1 {
|
||||
// Fast path - return the only backend url.
|
||||
bu := bus[0]
|
||||
atomic.AddInt32(&bu.concurrentRequests, 1)
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
@@ -184,8 +271,10 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
if atomic.CompareAndSwapInt32(&bu.concurrentRequests, 0, 1) {
|
||||
if atomic.LoadInt32(&bu.concurrentRequests) == 0 {
|
||||
// Fast path - return the backend with zero concurrently executed requests.
|
||||
// Do not use atomic.CompareAndSwapInt32(), since it is much slower on systems with many CPU cores.
|
||||
atomic.AddInt32(&bu.concurrentRequests, 1)
|
||||
return bu
|
||||
}
|
||||
}
|
||||
@@ -202,7 +291,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
|
||||
minRequests = n
|
||||
}
|
||||
}
|
||||
atomic.AddInt32(&buMin.concurrentRequests, 1)
|
||||
buMin.get()
|
||||
return buMin
|
||||
}
|
||||
|
||||
@@ -212,6 +301,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
|
||||
if err := f(&v); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var urls []string
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
@@ -232,6 +322,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
|
||||
default:
|
||||
return fmt.Errorf("unexpected type for `url_prefix`: %T; want string or []string", v)
|
||||
}
|
||||
|
||||
bus := make([]*backendURL, len(urls))
|
||||
for i, u := range urls {
|
||||
pu, err := url.Parse(u)
|
||||
@@ -266,8 +357,8 @@ func (up *URLPrefix) MarshalYAML() (interface{}, error) {
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
func (sp *SrcPath) match(s string) bool {
|
||||
prefix, ok := sp.re.LiteralPrefix()
|
||||
func (r *Regex) match(s string) bool {
|
||||
prefix, ok := r.re.LiteralPrefix()
|
||||
if ok {
|
||||
// Fast path - literal match
|
||||
return s == prefix
|
||||
@@ -275,11 +366,11 @@ func (sp *SrcPath) match(s string) bool {
|
||||
if !strings.HasPrefix(s, prefix) {
|
||||
return false
|
||||
}
|
||||
return sp.re.MatchString(s)
|
||||
return r.re.MatchString(s)
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements yaml.Unmarshaler
|
||||
func (sp *SrcPath) UnmarshalYAML(f func(interface{}) error) error {
|
||||
func (r *Regex) UnmarshalYAML(f func(interface{}) error) error {
|
||||
var s string
|
||||
if err := f(&s); err != nil {
|
||||
return err
|
||||
@@ -289,20 +380,20 @@ func (sp *SrcPath) UnmarshalYAML(f func(interface{}) error) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot build regexp from %q: %w", s, err)
|
||||
}
|
||||
sp.sOriginal = s
|
||||
sp.re = re
|
||||
r.sOriginal = s
|
||||
r.re = re
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalYAML implements yaml.Marshaler.
|
||||
func (sp *SrcPath) MarshalYAML() (interface{}, error) {
|
||||
return sp.sOriginal, nil
|
||||
func (r *Regex) MarshalYAML() (interface{}, error) {
|
||||
return r.sOriginal, nil
|
||||
}
|
||||
|
||||
var (
|
||||
configReloads = metrics.NewCounter(`vmauth_config_last_reload_total`)
|
||||
configReloadErrors = metrics.NewCounter(`vmauth_config_last_reload_errors_total`)
|
||||
configSuccess = metrics.NewCounter(`vmauth_config_last_reload_successful`)
|
||||
configSuccess = metrics.NewGauge(`vmauth_config_last_reload_successful`, nil)
|
||||
configTimestamp = metrics.NewCounter(`vmauth_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
@@ -378,17 +469,19 @@ func authConfigReloader(sighupCh <-chan os.Signal) {
|
||||
// authConfigData needs to be updated each time authConfig is updated.
|
||||
var authConfigData atomic.Pointer[[]byte]
|
||||
|
||||
var authConfig atomic.Pointer[AuthConfig]
|
||||
var authUsers atomic.Pointer[map[string]*UserInfo]
|
||||
var authConfigWG sync.WaitGroup
|
||||
var stopCh chan struct{}
|
||||
var (
|
||||
authConfig atomic.Pointer[AuthConfig]
|
||||
authUsers atomic.Pointer[map[string]*UserInfo]
|
||||
authConfigWG sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
)
|
||||
|
||||
// loadAuthConfig loads and applies the config from *authConfigPath.
|
||||
// It returns bool value to identify if new config was applied.
|
||||
// The config can be not applied if there is a parsing error
|
||||
// or if there are no changes to the current authConfig.
|
||||
func loadAuthConfig() (bool, error) {
|
||||
data, err := fs.ReadFileOrHTTP(*authConfigPath)
|
||||
data, err := fscore.ReadFileOrHTTP(*authConfigPath)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to read -auth.config=%q: %w", *authConfigPath, err)
|
||||
}
|
||||
@@ -440,16 +533,26 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
if ui.Name != "" {
|
||||
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total`)
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds`)
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metricLabels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||
ui.backendErrors = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||
ui.concurrencyLimitReached = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_concurrent_requests_limit_reached_total`)
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_capacity`, func() float64 {
|
||||
ui.concurrencyLimitReached = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
return float64(cap(ui.concurrencyLimitCh))
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_current`, func() float64 {
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
|
||||
tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
|
||||
@@ -480,51 +583,32 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
if byAuthToken[at2] != nil {
|
||||
return nil, fmt.Errorf("duplicate auth token found for bearer_token=%q, username=%q: %q", ui.BearerToken, ui.Username, at2)
|
||||
}
|
||||
if ui.URLPrefix != nil {
|
||||
if err := ui.URLPrefix.sanitize(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
if err := ui.DefaultURL.sanitize(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ui.BearerToken != "" && ui.Password != "" {
|
||||
return nil, fmt.Errorf("password shouldn't be set for bearer_token %q", ui.BearerToken)
|
||||
}
|
||||
for _, e := range ui.URLMaps {
|
||||
if len(e.SrcPaths) == 0 {
|
||||
return nil, fmt.Errorf("missing `src_paths` in `url_map`")
|
||||
}
|
||||
if e.URLPrefix == nil {
|
||||
return nil, fmt.Errorf("missing `url_prefix` in `url_map`")
|
||||
}
|
||||
if err := e.URLPrefix.sanitize(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
|
||||
return nil, fmt.Errorf("missing `url_prefix`")
|
||||
}
|
||||
name := ui.name()
|
||||
if ui.BearerToken != "" {
|
||||
if ui.Password != "" {
|
||||
return nil, fmt.Errorf("password shouldn't be set for bearer_token %q", ui.BearerToken)
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_requests_total{username=%q}`, name))
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(fmt.Sprintf(`vmauth_user_request_duration_seconds{username=%q}`, name))
|
||||
}
|
||||
if ui.Username != "" {
|
||||
ui.requests = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_requests_total{username=%q}`, name))
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(fmt.Sprintf(`vmauth_user_request_duration_seconds{username=%q}`, name))
|
||||
|
||||
metricLabels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.backendErrors = metrics.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
ui.concurrencyLimitCh = make(chan struct{}, mcr)
|
||||
ui.concurrencyLimitReached = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_concurrent_requests_limit_reached_total{username=%q}`, name))
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_capacity{username=%q}`, name), func() float64 {
|
||||
ui.concurrencyLimitReached = metrics.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
return float64(cap(ui.concurrencyLimitCh))
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 {
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
|
||||
tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
|
||||
@@ -537,6 +621,91 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
return byAuthToken, nil
|
||||
}
|
||||
|
||||
var labelNameRegexp = regexp.MustCompile("^[a-zA-Z_:.][a-zA-Z0-9_:.]*$")
|
||||
|
||||
func (ui *UserInfo) getMetricLabels() (string, error) {
|
||||
name := ui.name()
|
||||
if len(name) == 0 && len(ui.MetricLabels) == 0 {
|
||||
// fast path
|
||||
return "", nil
|
||||
}
|
||||
labels := make([]string, 0, len(ui.MetricLabels)+1)
|
||||
if len(name) > 0 {
|
||||
labels = append(labels, fmt.Sprintf(`username=%q`, name))
|
||||
}
|
||||
for k, v := range ui.MetricLabels {
|
||||
if !labelNameRegexp.MatchString(k) {
|
||||
return "", fmt.Errorf("incorrect label name=%q, it must match regex=%q for user=%q", k, labelNameRegexp, name)
|
||||
}
|
||||
labels = append(labels, fmt.Sprintf(`%s=%q`, k, v))
|
||||
}
|
||||
sort.Strings(labels)
|
||||
labelsStr := "{" + strings.Join(labels, ",") + "}"
|
||||
return labelsStr, nil
|
||||
}
|
||||
|
||||
func (ui *UserInfo) initURLs() error {
|
||||
retryStatusCodes := defaultRetryStatusCodes.Values()
|
||||
loadBalancingPolicy := *defaultLoadBalancingPolicy
|
||||
dropSrcPathPrefixParts := 0
|
||||
if ui.URLPrefix != nil {
|
||||
if err := ui.URLPrefix.sanitize(); err != nil {
|
||||
return err
|
||||
}
|
||||
if ui.RetryStatusCodes != nil {
|
||||
retryStatusCodes = ui.RetryStatusCodes
|
||||
}
|
||||
if ui.LoadBalancingPolicy != "" {
|
||||
loadBalancingPolicy = ui.LoadBalancingPolicy
|
||||
}
|
||||
if ui.DropSrcPathPrefixParts != nil {
|
||||
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
|
||||
}
|
||||
ui.URLPrefix.retryStatusCodes = retryStatusCodes
|
||||
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
|
||||
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
if err := ui.DefaultURL.sanitize(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, e := range ui.URLMaps {
|
||||
if len(e.SrcPaths) == 0 && len(e.SrcHosts) == 0 {
|
||||
return fmt.Errorf("missing `src_paths` and `src_hosts` in `url_map`")
|
||||
}
|
||||
if e.URLPrefix == nil {
|
||||
return fmt.Errorf("missing `url_prefix` in `url_map`")
|
||||
}
|
||||
if err := e.URLPrefix.sanitize(); err != nil {
|
||||
return err
|
||||
}
|
||||
rscs := retryStatusCodes
|
||||
lbp := loadBalancingPolicy
|
||||
dsp := dropSrcPathPrefixParts
|
||||
if e.RetryStatusCodes != nil {
|
||||
rscs = e.RetryStatusCodes
|
||||
}
|
||||
if e.LoadBalancingPolicy != "" {
|
||||
lbp = e.LoadBalancingPolicy
|
||||
}
|
||||
if e.DropSrcPathPrefixParts != nil {
|
||||
dsp = *e.DropSrcPathPrefixParts
|
||||
}
|
||||
e.URLPrefix.retryStatusCodes = rscs
|
||||
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
|
||||
return err
|
||||
}
|
||||
e.URLPrefix.dropSrcPathPrefixParts = dsp
|
||||
}
|
||||
if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
|
||||
return fmt.Errorf("missing `url_prefix`")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ui *UserInfo) name() string {
|
||||
if ui.Name != "" {
|
||||
return ui.Name
|
||||
@@ -545,7 +714,8 @@ func (ui *UserInfo) name() string {
|
||||
return ui.Username
|
||||
}
|
||||
if ui.BearerToken != "" {
|
||||
return "bearer_token"
|
||||
h := xxhash.Sum64([]byte(ui.BearerToken))
|
||||
return fmt.Sprintf("bearer_token:hash:%016X", h)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -145,6 +145,12 @@ users:
|
||||
url_map:
|
||||
- src_paths: ["/foo/bar"]
|
||||
`)
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
url_map:
|
||||
- src_hosts: ["foobar"]
|
||||
`)
|
||||
|
||||
// Invalid url_prefix in url_map
|
||||
f(`
|
||||
@@ -154,6 +160,13 @@ users:
|
||||
- src_paths: ["/foo/bar"]
|
||||
url_prefix: foo.bar
|
||||
`)
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
url_map:
|
||||
- src_hosts: ["foobar"]
|
||||
url_prefix: foo.bar
|
||||
`)
|
||||
|
||||
// empty url_prefix in url_map
|
||||
f(`
|
||||
@@ -163,8 +176,15 @@ users:
|
||||
- src_paths: ['/foo/bar']
|
||||
url_prefix: []
|
||||
`)
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
url_map:
|
||||
- src_phosts: ['foobar']
|
||||
url_prefix: []
|
||||
`)
|
||||
|
||||
// Missing src_paths in url_map
|
||||
// Missing src_paths and src_hosts in url_map
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
@@ -181,6 +201,15 @@ users:
|
||||
url_prefix: http://foobar
|
||||
`)
|
||||
|
||||
// Invalid regexp in src_hosts
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
url_map:
|
||||
- src_hosts: ['fo[obar']
|
||||
url_prefix: http://foobar
|
||||
`)
|
||||
|
||||
// Invalid headers in url_map (missing ':')
|
||||
f(`
|
||||
users:
|
||||
@@ -200,6 +229,14 @@ users:
|
||||
url_prefix: http://foobar
|
||||
headers:
|
||||
aaa: bbb
|
||||
`)
|
||||
// Invalid metric label name
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
url_prefix: http://foo.bar
|
||||
metric_labels:
|
||||
not-prometheus-compatible: value
|
||||
`)
|
||||
}
|
||||
|
||||
@@ -250,6 +287,7 @@ users:
|
||||
- http://node2:343/bbb
|
||||
tls_insecure_skip_verify: false
|
||||
retry_status_codes: [500, 501]
|
||||
load_balancing_policy: first_available
|
||||
drop_src_path_prefix_parts: 1
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("", "foo", "bar"): {
|
||||
@@ -261,7 +299,8 @@ users:
|
||||
}),
|
||||
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
|
||||
RetryStatusCodes: []int{500, 501},
|
||||
DropSrcPathPrefixParts: 1,
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: intp(1),
|
||||
},
|
||||
})
|
||||
|
||||
@@ -291,6 +330,7 @@ users:
|
||||
- src_paths: ["/api/v1/query","/api/v1/query_range","/api/v1/label/[^./]+/.+"]
|
||||
url_prefix: http://vmselect/select/0/prometheus
|
||||
- src_paths: ["/api/v1/write"]
|
||||
src_hosts: ["foo\\.bar", "baz:1234"]
|
||||
url_prefix: ["http://vminsert1/insert/0/prometheus","http://vminsert2/insert/0/prometheus"]
|
||||
headers:
|
||||
- "foo: bar"
|
||||
@@ -300,11 +340,12 @@ users:
|
||||
BearerToken: "foo",
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
SrcHosts: getRegexs([]string{"foo\\.bar", "baz:1234"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURLs([]string{
|
||||
"http://vminsert1/insert/0/prometheus",
|
||||
"http://vminsert2/insert/0/prometheus",
|
||||
@@ -328,11 +369,12 @@ users:
|
||||
BearerToken: "foo",
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
SrcHosts: getRegexs([]string{"foo\\.bar", "baz:1234"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURLs([]string{
|
||||
"http://vminsert1/insert/0/prometheus",
|
||||
"http://vminsert2/insert/0/prometheus",
|
||||
@@ -394,11 +436,11 @@ users:
|
||||
BearerToken: "foo",
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURLs([]string{
|
||||
"http://vminsert1/insert/0/prometheus",
|
||||
"http://vminsert2/insert/0/prometheus",
|
||||
@@ -426,11 +468,11 @@ users:
|
||||
BearerToken: "foo",
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURLs([]string{
|
||||
"http://vminsert1/insert/0/prometheus",
|
||||
"http://vminsert2/insert/0/prometheus",
|
||||
@@ -455,7 +497,41 @@ users:
|
||||
}),
|
||||
},
|
||||
})
|
||||
|
||||
// With metric_labels
|
||||
f(`
|
||||
users:
|
||||
- username: foo-same
|
||||
password: baz
|
||||
url_prefix: http://foo
|
||||
metric_labels:
|
||||
dc: eu
|
||||
team: dev
|
||||
- username: foo-same
|
||||
password: bar
|
||||
url_prefix: https://bar/x///
|
||||
metric_labels:
|
||||
backend_env: test
|
||||
team: accounting
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("", "foo-same", "baz"): {
|
||||
Username: "foo-same",
|
||||
Password: "baz",
|
||||
URLPrefix: mustParseURL("http://foo"),
|
||||
MetricLabels: map[string]string{
|
||||
"dc": "eu",
|
||||
"team": "dev",
|
||||
},
|
||||
},
|
||||
getAuthToken("", "foo-same", "bar"): {
|
||||
Username: "foo-same",
|
||||
Password: "bar",
|
||||
URLPrefix: mustParseURL("https://bar/x"),
|
||||
MetricLabels: map[string]string{
|
||||
"backend_env": "test",
|
||||
"team": "accounting",
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
|
||||
@@ -492,6 +568,86 @@ unauthorized_user:
|
||||
}
|
||||
}
|
||||
|
||||
func TestUserInfoGetMetricLabels(t *testing.T) {
|
||||
t.Run("empty-labels", func(t *testing.T) {
|
||||
ui := &UserInfo{
|
||||
Username: "user1",
|
||||
}
|
||||
labels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
labelsExpected := `{username="user1"}`
|
||||
if labels != labelsExpected {
|
||||
t.Fatalf("unexpected labels; got %s; want %s", labels, labelsExpected)
|
||||
}
|
||||
})
|
||||
t.Run("non-empty-username", func(t *testing.T) {
|
||||
ui := &UserInfo{
|
||||
Username: "user1",
|
||||
MetricLabels: map[string]string{
|
||||
"env": "prod",
|
||||
"datacenter": "dc1",
|
||||
},
|
||||
}
|
||||
labels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
labelsExpected := `{datacenter="dc1",env="prod",username="user1"}`
|
||||
if labels != labelsExpected {
|
||||
t.Fatalf("unexpected labels; got %s; want %s", labels, labelsExpected)
|
||||
}
|
||||
})
|
||||
t.Run("non-empty-name", func(t *testing.T) {
|
||||
ui := &UserInfo{
|
||||
Name: "user1",
|
||||
BearerToken: "abc",
|
||||
MetricLabels: map[string]string{
|
||||
"env": "prod",
|
||||
"datacenter": "dc1",
|
||||
},
|
||||
}
|
||||
labels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
labelsExpected := `{datacenter="dc1",env="prod",username="user1"}`
|
||||
if labels != labelsExpected {
|
||||
t.Fatalf("unexpected labels; got %s; want %s", labels, labelsExpected)
|
||||
}
|
||||
})
|
||||
t.Run("non-empty-bearer-token", func(t *testing.T) {
|
||||
ui := &UserInfo{
|
||||
BearerToken: "abc",
|
||||
MetricLabels: map[string]string{
|
||||
"env": "prod",
|
||||
"datacenter": "dc1",
|
||||
},
|
||||
}
|
||||
labels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
labelsExpected := `{datacenter="dc1",env="prod",username="bearer_token:hash:44BC2CF5AD770999"}`
|
||||
if labels != labelsExpected {
|
||||
t.Fatalf("unexpected labels; got %s; want %s", labels, labelsExpected)
|
||||
}
|
||||
})
|
||||
t.Run("invalid-label", func(t *testing.T) {
|
||||
ui := &UserInfo{
|
||||
Username: "foo",
|
||||
MetricLabels: map[string]string{
|
||||
",{": "aaaa",
|
||||
},
|
||||
}
|
||||
_, err := ui.getMetricLabels()
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func isSetBool(boolP *bool, expectedValue bool) bool {
|
||||
if boolP == nil {
|
||||
return false
|
||||
@@ -499,10 +655,10 @@ func isSetBool(boolP *bool, expectedValue bool) bool {
|
||||
return *boolP == expectedValue
|
||||
}
|
||||
|
||||
func getSrcPaths(paths []string) []*SrcPath {
|
||||
var sps []*SrcPath
|
||||
func getRegexs(paths []string) []*Regex {
|
||||
var sps []*Regex
|
||||
for _, path := range paths {
|
||||
sps = append(sps, &SrcPath{
|
||||
sps = append(sps, &Regex{
|
||||
sOriginal: path,
|
||||
re: regexp.MustCompile("^(?:" + path + ")$"),
|
||||
})
|
||||
@@ -550,3 +706,7 @@ func mustParseURLs(us []string) *URLPrefix {
|
||||
bus: bus,
|
||||
}
|
||||
}
|
||||
|
||||
func intp(n int) *int {
|
||||
return &n
|
||||
}
|
||||
|
||||
@@ -10,6 +10,11 @@ users:
|
||||
- bearer_token: "XXXX"
|
||||
url_prefix: "http://localhost:8428"
|
||||
|
||||
# Adds labels to the exported metrics for given user section
|
||||
# label name must be prometheus compatible and match regex: `^[a-zA-Z_:.][a-zA-Z0-9_:.]*$`
|
||||
metric_labels:
|
||||
backend_dc: eu
|
||||
access_team: dev
|
||||
# Requests with the 'Authorization: Bearer YYY' header are proxied to http://localhost:8428 ,
|
||||
# The `X-Scope-OrgID: foobar` http header is added to every proxied request.
|
||||
# The `X-Server-Hostname:` http header is removed from the proxied response.
|
||||
@@ -37,14 +42,14 @@ users:
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
- username: "local-single-node2"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to https://localhost:8428
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to https://localhost/api/v1/query
|
||||
# TLS verification is ignored for https://localhost.
|
||||
- username: "local-single-node-with-tls"
|
||||
password: "***"
|
||||
@@ -92,7 +97,7 @@ users:
|
||||
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
#
|
||||
# Regular expressions are allowed in `src_paths` entries.
|
||||
# Regular expressions are allowed in `src_paths` and `src_hosts` entries.
|
||||
- username: "foobar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
@@ -111,9 +116,9 @@ users:
|
||||
- "http://default1:8888/unsupported_url_handler"
|
||||
- "http://default2:8888/unsupported_url_handler"
|
||||
|
||||
# Requests without Authorization header are routed according to `unauthorized_user` section.
|
||||
# Requests are routed in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the routed requests.
|
||||
# Requests without Authorization header are proxied according to `unauthorized_user` section.
|
||||
# Requests are proxied in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the proxied requests.
|
||||
# The requests are re-tried if url_prefix backends send 500 or 503 response status codes.
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
|
||||
@@ -24,7 +24,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
@@ -33,7 +33,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
@@ -45,7 +45,7 @@ var (
|
||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
|
||||
"in per-user config")
|
||||
reloadAuthKey = flag.String("reloadAuthKey", "", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
|
||||
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
||||
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
|
||||
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
||||
@@ -64,7 +64,6 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
logger.Infof("starting vmauth at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
@@ -72,8 +71,10 @@ func main() {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started vmauth in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime = time.Now()
|
||||
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
|
||||
@@ -88,7 +89,7 @@ func main() {
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
switch r.URL.Path {
|
||||
case "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, *reloadAuthKey, "reloadAuthKey") {
|
||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey.Get(), "reloadAuthKey") {
|
||||
return true
|
||||
}
|
||||
configReloadRequests.Inc()
|
||||
@@ -149,12 +150,20 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
if err := ui.beginConcurrencyLimit(); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
<-concurrencyLimitCh
|
||||
|
||||
// Requests failed because of concurrency limit must be counted as errors,
|
||||
// since this usually means the backend cannot keep up with the current load.
|
||||
ui.backendErrors.Inc()
|
||||
return
|
||||
}
|
||||
default:
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
|
||||
// Requests failed because of concurrency limit must be counted as errors,
|
||||
// since this usually means the backend cannot keep up with the current load.
|
||||
ui.backendErrors.Inc()
|
||||
return
|
||||
}
|
||||
processRequest(w, r, ui)
|
||||
@@ -164,7 +173,7 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
u := normalizeURL(r.URL)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc := ui.getURLPrefixAndHeaders(u)
|
||||
isDefault := false
|
||||
if up == nil {
|
||||
if ui.DefaultURL == nil {
|
||||
@@ -180,7 +189,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
httpserver.Errorf(w, r, "missing route for %q", u.String())
|
||||
return
|
||||
}
|
||||
up, hc, retryStatusCodes = ui.DefaultURL, ui.HeadersConf, ui.RetryStatusCodes
|
||||
up, hc = ui.DefaultURL, ui.HeadersConf
|
||||
isDefault = true
|
||||
}
|
||||
maxAttempts := up.getBackendsCount()
|
||||
@@ -190,17 +199,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
}
|
||||
}
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
bu := up.getLeastLoadedBackendURL()
|
||||
bu := up.getBackendURL()
|
||||
targetURL := bu.url
|
||||
// Don't change path and add request_path query param for default route.
|
||||
if isDefault {
|
||||
query := targetURL.Query()
|
||||
query.Set("request_path", u.Path)
|
||||
query.Set("request_path", u.String())
|
||||
targetURL.RawQuery = query.Encode()
|
||||
} else { // Update path for regular routes.
|
||||
targetURL = mergeURLs(targetURL, u, dropSrcPathPrefixParts)
|
||||
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts)
|
||||
}
|
||||
ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes, ui.httpTransport)
|
||||
ok := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
|
||||
bu.put()
|
||||
if ok {
|
||||
return
|
||||
@@ -212,15 +221,16 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, transport *http.Transport) bool {
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) bool {
|
||||
// This code has been copied from net/http/httputil/reverseproxy.go
|
||||
req := sanitizeRequestHeaders(r)
|
||||
req.URL = targetURL
|
||||
req.Host = targetURL.Host
|
||||
updateHeadersByConfig(req.Header, hc.RequestHeaders)
|
||||
res, err := transport.RoundTrip(req)
|
||||
res, err := ui.httpTransport.RoundTrip(req)
|
||||
rtb, rtbOK := req.Body.(*readTrackingBody)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
@@ -228,15 +238,20 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// Timed out request must be counted as errors, since this usually means that the backend is slow.
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
return true
|
||||
}
|
||||
if !rtbOK || !rtb.canRetry() {
|
||||
// Request body cannot be re-sent to another backend. Return the error to the client then.
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot proxy the request to %q: %w", targetURL, err),
|
||||
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
return true
|
||||
}
|
||||
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
|
||||
@@ -246,7 +261,20 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
|
||||
return false
|
||||
}
|
||||
if (rtbOK && rtb.canRetry()) && hasInt(retryStatusCodes, res.StatusCode) {
|
||||
if hasInt(retryStatusCodes, res.StatusCode) {
|
||||
_ = res.Body.Close()
|
||||
if !rtbOK || !rtb.canRetry() {
|
||||
// If we get an error from the retry_status_codes list, but cannot execute retry,
|
||||
// we consider such a request an error as well.
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
|
||||
res.StatusCode, targetURL),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
return true
|
||||
}
|
||||
// Retry requests at other backends if it matches retryStatusCodes.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
@@ -265,6 +293,7 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
||||
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
|
||||
copyBufPool.Put(copyBuf)
|
||||
_ = res.Body.Close()
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
@@ -392,8 +421,10 @@ func getTransport(insecureSkipVerifyP *bool, caFile string) (*http.Transport, er
|
||||
return tr, nil
|
||||
}
|
||||
|
||||
var transportMap = make(map[string]*http.Transport)
|
||||
var transportMapLock sync.Mutex
|
||||
var (
|
||||
transportMap = make(map[string]*http.Transport)
|
||||
transportMapLock sync.Mutex
|
||||
)
|
||||
|
||||
func appendTransportKey(dst []byte, insecureSkipVerify bool, caFile string) []byte {
|
||||
dst = encoding.MarshalBool(dst, insecureSkipVerify)
|
||||
@@ -421,7 +452,7 @@ func newTransport(insecureSkipVerify bool, caFile string) (*http.Transport, erro
|
||||
tlsCfg.ClientSessionCache = tls.NewLRUClientSessionCache(0)
|
||||
tlsCfg.InsecureSkipVerify = insecureSkipVerify
|
||||
if caFile != "" {
|
||||
data, err := fs.ReadFileOrHTTP(caFile)
|
||||
data, err := fscore.ReadFileOrHTTP(caFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read tls_ca_file: %w", err)
|
||||
}
|
||||
|
||||
@@ -49,18 +49,28 @@ func dropPrefixParts(path string, parts int) string {
|
||||
return path
|
||||
}
|
||||
|
||||
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int, int) {
|
||||
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf) {
|
||||
for _, e := range ui.URLMaps {
|
||||
for _, sp := range e.SrcPaths {
|
||||
if sp.match(u.Path) {
|
||||
return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes, e.DropSrcPathPrefixParts
|
||||
}
|
||||
if matchAnyRegex(e.SrcHosts, u.Host) && matchAnyRegex(e.SrcPaths, u.Path) {
|
||||
return e.URLPrefix, e.HeadersConf
|
||||
}
|
||||
}
|
||||
if ui.URLPrefix != nil {
|
||||
return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes, ui.DropSrcPathPrefixParts
|
||||
return ui.URLPrefix, ui.HeadersConf
|
||||
}
|
||||
return nil, HeadersConf{}, nil, 0
|
||||
return nil, HeadersConf{}
|
||||
}
|
||||
|
||||
func matchAnyRegex(rs []*Regex, s string) bool {
|
||||
if len(rs) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, r := range rs {
|
||||
if r.match(s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func normalizeURL(uOrig *url.URL) *url.URL {
|
||||
|
||||
@@ -79,19 +79,22 @@ func TestDropPrefixParts(t *testing.T) {
|
||||
|
||||
func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string,
|
||||
expectedRetryStatusCodes []int, expectedDropSrcPathPrefixParts int) {
|
||||
expectedRetryStatusCodes []int, expectedLoadBalancingPolicy string, expectedDropSrcPathPrefixParts int) {
|
||||
t.Helper()
|
||||
if err := ui.initURLs(); err != nil {
|
||||
t.Fatalf("cannot initialize urls inside UserInfo: %s", err)
|
||||
}
|
||||
u, err := url.Parse(requestURI)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
u = normalizeURL(u)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc := ui.getURLPrefixAndHeaders(u)
|
||||
if up == nil {
|
||||
t.Fatalf("cannot determie backend: %s", err)
|
||||
}
|
||||
bu := up.getLeastLoadedBackendURL()
|
||||
target := mergeURLs(bu.url, u, dropSrcPathPrefixParts)
|
||||
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
|
||||
bu.put()
|
||||
if target.String() != expectedTarget {
|
||||
t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
|
||||
@@ -100,17 +103,20 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
if headersStr != expectedRequestHeaders {
|
||||
t.Fatalf("unexpected request headers; got %s; want %s", headersStr, expectedRequestHeaders)
|
||||
}
|
||||
if !reflect.DeepEqual(retryStatusCodes, expectedRetryStatusCodes) {
|
||||
t.Fatalf("unexpected retryStatusCodes; got %d; want %d", retryStatusCodes, expectedRetryStatusCodes)
|
||||
if !reflect.DeepEqual(up.retryStatusCodes, expectedRetryStatusCodes) {
|
||||
t.Fatalf("unexpected retryStatusCodes; got %d; want %d", up.retryStatusCodes, expectedRetryStatusCodes)
|
||||
}
|
||||
if dropSrcPathPrefixParts != expectedDropSrcPathPrefixParts {
|
||||
t.Fatalf("unexpected dropSrcPathPrefixParts; got %d; want %d", dropSrcPathPrefixParts, expectedDropSrcPathPrefixParts)
|
||||
if up.loadBalancingPolicy != expectedLoadBalancingPolicy {
|
||||
t.Fatalf("unexpected loadBalancingPolicy; got %q; want %q", up.loadBalancingPolicy, expectedLoadBalancingPolicy)
|
||||
}
|
||||
if up.dropSrcPathPrefixParts != expectedDropSrcPathPrefixParts {
|
||||
t.Fatalf("unexpected dropSrcPathPrefixParts; got %d; want %d", up.dropSrcPathPrefixParts, expectedDropSrcPathPrefixParts)
|
||||
}
|
||||
}
|
||||
// Simple routing with `url_prefix`
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "", "http://foo.bar/.", "[]", "[]", nil, 0)
|
||||
}, "", "http://foo.bar/.", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
HeadersConf: HeadersConf{
|
||||
@@ -120,29 +126,31 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
}},
|
||||
},
|
||||
RetryStatusCodes: []int{503, 501},
|
||||
DropSrcPathPrefixParts: 2,
|
||||
}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, 2)
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, "first_available", 2)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/federate"),
|
||||
}, "/", "http://foo.bar/federate", "[]", "[]", nil, 0)
|
||||
}, "/", "http://foo.bar/federate", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, 0)
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, 0)
|
||||
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, 0)
|
||||
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, 0)
|
||||
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, "least_loaded", 0)
|
||||
|
||||
// Complex routing with `url_map`
|
||||
ui := &UserInfo{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/vmsingle/api/v1/query"}),
|
||||
SrcHosts: getRegexs([]string{"host42"}),
|
||||
SrcPaths: getRegexs([]string{"/vmsingle/api/v1/query"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
|
||||
HeadersConf: HeadersConf{
|
||||
RequestHeaders: []Header{
|
||||
@@ -163,11 +171,14 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
RetryStatusCodes: []int{503, 500, 501},
|
||||
DropSrcPathPrefixParts: 1,
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: intp(1),
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
|
||||
RetryStatusCodes: []int{},
|
||||
DropSrcPathPrefixParts: intp(0),
|
||||
},
|
||||
},
|
||||
URLPrefix: mustParseURL("http://default-server"),
|
||||
@@ -182,37 +193,46 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
}},
|
||||
},
|
||||
RetryStatusCodes: []int{502},
|
||||
DropSrcPathPrefixParts: 2,
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
}
|
||||
f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, 1)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
|
||||
f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, 2)
|
||||
f(ui, "http://host42/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up",
|
||||
`[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, "first_available", 1)
|
||||
f(ui, "http://host123/vmsingle/api/v1/query?query=up", "http://default-server/v1/query?query=up",
|
||||
`[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, "least_loaded", 2)
|
||||
f(ui, "https://foo-host/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", []int{}, "least_loaded", 0)
|
||||
f(ui, "https://foo-host/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, "least_loaded", 2)
|
||||
|
||||
// Complex routing regexp paths in `url_map`
|
||||
ui = &UserInfo{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query(_range)?", "/api/v1/label/[^/]+/values"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/query(_range)?", "/api/v1/label/[^/]+/values"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
|
||||
},
|
||||
{
|
||||
SrcHosts: getRegexs([]string{"vmui\\..+"}),
|
||||
URLPrefix: mustParseURL("http://vmui.host:1234/vmui/"),
|
||||
},
|
||||
},
|
||||
URLPrefix: mustParseURL("http://default-server"),
|
||||
}
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "https://vmui.foobar.com/a/b?c=d", "http://vmui.host:1234/vmui/a/b?c=d", "[]", "[]", nil, "least_loaded", 0)
|
||||
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"),
|
||||
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, 0)
|
||||
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"),
|
||||
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, 0)
|
||||
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, "least_loaded", 0)
|
||||
}
|
||||
|
||||
func TestCreateTargetURLFailure(t *testing.T) {
|
||||
@@ -223,7 +243,7 @@ func TestCreateTargetURLFailure(t *testing.T) {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
u = normalizeURL(u)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc := ui.getURLPrefixAndHeaders(u)
|
||||
if up != nil {
|
||||
t.Fatalf("unexpected non-empty up=%#v", up)
|
||||
}
|
||||
@@ -233,18 +253,12 @@ func TestCreateTargetURLFailure(t *testing.T) {
|
||||
if hc.ResponseHeaders != nil {
|
||||
t.Fatalf("unexpected non-empty response headers=%q", hc.ResponseHeaders)
|
||||
}
|
||||
if retryStatusCodes != nil {
|
||||
t.Fatalf("unexpected non-empty retryStatusCodes=%d", retryStatusCodes)
|
||||
}
|
||||
if dropSrcPathPrefixParts != 0 {
|
||||
t.Fatalf("unexpected non-zero dropSrcPathPrefixParts=%d", dropSrcPathPrefixParts)
|
||||
}
|
||||
}
|
||||
f(&UserInfo{}, "/foo/bar")
|
||||
f(&UserInfo{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
|
||||
SrcPaths: getRegexs([]string{"/api/v1/query"}),
|
||||
URLPrefix: mustParseURL("http://foobar/baz"),
|
||||
},
|
||||
},
|
||||
|
||||
@@ -1,444 +1,3 @@
|
||||
# vmbackup
|
||||
See vmbackup docs [here](https://docs.victoriametrics.com/vmbackup.html).
|
||||
|
||||
`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
|
||||
`vmbackup` supports incremental and full backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
|
||||
Full backups can be sped up with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
|
||||
data between the existing backup and new backup. It saves time and costs on data transfer.
|
||||
|
||||
Backup process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmbackup` with the same args.
|
||||
|
||||
Backed up data can be restored with [vmrestore](https://docs.victoriametrics.com/vmrestore.html).
|
||||
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
|
||||
See also [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) tool built on top of `vmbackup`. This tool simplifies
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
|
||||
## Supported storage types
|
||||
|
||||
`vmbackup` supports the following `-dst` storage types:
|
||||
|
||||
* [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
|
||||
* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
|
||||
* [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs/). Example: `azblob://<container>/<path/to/backup>`
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/) or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
|
||||
* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
|
||||
|
||||
## Use cases
|
||||
|
||||
### Regular backups
|
||||
|
||||
Regular backup can be performed with the following command:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup>
|
||||
```
|
||||
|
||||
* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
|
||||
There is no need to stop VictoriaMetrics for creating backups since they are performed from immutable [instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
* `http://victoriametrics:8428/snapshot/create` is the url for creating snapshots according to [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots). `vmbackup` creates a snapshot by querying the provided `-snapshot.createURL`, then performs the backup and then automatically removes the created snapshot.
|
||||
* `<bucket>` is an already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
|
||||
* `<path/to/new/backup>` is the destination path where new backup will be placed.
|
||||
|
||||
### Regular backups with server-side copy from existing backup
|
||||
|
||||
If the destination GCS bucket already contains the previous backup at `-origin` path, then new backup can be sped up
|
||||
with the following command:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup> -origin=gs://<bucket>/<path/to/existing/backup>
|
||||
```
|
||||
|
||||
It saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
|
||||
|
||||
### Incremental backups
|
||||
|
||||
Incremental backups are performed if `-dst` points to an already existing backup. In this case only new data is uploaded to remote storage.
|
||||
It saves time and network bandwidth costs when working with big backups:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/existing/backup>
|
||||
```
|
||||
|
||||
### Smart backups
|
||||
|
||||
Smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
|
||||
|
||||
* Run the following command every hour:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/latest
|
||||
```
|
||||
|
||||
Where `<latest-snapshot>` is the latest [snapshot](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
The command will upload only changed data to `gs://<bucket>/latest`.
|
||||
|
||||
* Run the following command once a day:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<YYYYMMDD> -origin=gs://<bucket>/latest
|
||||
```
|
||||
|
||||
Where `<daily-snapshot>` is the snapshot for the last day `<YYYYMMDD>`.
|
||||
|
||||
This approach saves network bandwidth costs on hourly backups (since they are incremental) and allows recovering data from either the last hour (`latest` backup)
|
||||
or from any day (`YYYYMMDD` backups). Note that hourly backup shouldn't run when creating daily backup.
|
||||
|
||||
Do not forget to remove old backups when they are no longer needed in order to save storage costs.
|
||||
|
||||
See also [vmbackupmanager tool](https://docs.victoriametrics.com/vmbackupmanager.html) for automating smart backups.
|
||||
|
||||
### Server-side copy of the existing backup
|
||||
|
||||
Sometimes it is needed to make server-side copy of the existing backup. This can be done by specifying the source backup path via `-origin` command-line flag,
|
||||
while the destination path for backup copy must be specified via `-dst` command-line flag. For example, the following command copies backup
|
||||
from `gs://bucket/foo` to `gs://bucket/bar`:
|
||||
|
||||
```console
|
||||
./vmbackup -origin=gs://bucket/foo -dst=gs://bucket/bar
|
||||
```
|
||||
|
||||
The `-origin` and `-dst` must point to the same object storage bucket or to the same filesystem.
|
||||
|
||||
The server-side backup copy is usually performed at much faster speed comparing to the usual backup, since backup data isn't transferred
|
||||
between the remote storage and locally running `vmbackup` tool.
|
||||
|
||||
If the `-dst` already contains some data, then its' contents is synced with the `-origin` data. This allows making incremental server-side copies of backups.
|
||||
|
||||
## How does it work?
|
||||
|
||||
The backup algorithm is the following:
|
||||
|
||||
1. Create a snapshot by querying the provided `-snapshot.createURL`
|
||||
1. Collect information about files in the created snapshot, in the `-dst` and in the `-origin`.
|
||||
1. Determine which files in `-dst` are missing in the created snapshot, and delete them. These are usually small files, which are already merged into bigger files in the snapshot.
|
||||
1. Determine which files in the created snapshot are missing in `-dst`. These are usually small new files and bigger merged files.
|
||||
1. Determine which files from step 3 exist in the `-origin`, and perform server-side copy of these files from `-origin` to `-dst`.
|
||||
These are usually the biggest and the oldest files, which are shared between backups.
|
||||
1. Upload the remaining files from step 3 from the created snapshot to `-dst`.
|
||||
1. Delete the created snapshot.
|
||||
|
||||
The algorithm splits source files into 1 GiB chunks in the backup. Each chunk is stored as a separate file in the backup.
|
||||
Such splitting balances between the number of files in the backup and the amounts of data that needs to be re-transferred after temporary errors.
|
||||
|
||||
`vmbackup` relies on [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) properties:
|
||||
|
||||
* All the files in the snapshot are immutable.
|
||||
* Old files are periodically merged into new files.
|
||||
* Smaller files have higher probability to be merged.
|
||||
* Consecutive snapshots share many identical files.
|
||||
|
||||
These properties allow performing fast and cheap incremental backups and server-side copying from `-origin` paths.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
`vmbackup` can work improperly or slowly when these properties are violated.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
|
||||
* If `vmbackup` eats all the network bandwidth or CPU, then either decrease the `-concurrency` command-line flag value or set `-maxBytesPerSecond` command-line flag value to lower value.
|
||||
* If `vmbackup` consumes all the CPU on systems with big number of CPU cores, then try running it with `-filestream.disableFadvise` command-line flag.
|
||||
* If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process.
|
||||
* Backups created from [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html) cannot be restored
|
||||
at [cluster VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) and vice versa.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
|
||||
### Providing credentials as a file
|
||||
|
||||
Obtaining credentials from a file.
|
||||
|
||||
Add flag `-credsFilePath=/etc/credentials` with the following content:
|
||||
|
||||
- for S3 (AWS, MinIO or other S3 compatible storages):
|
||||
|
||||
```console
|
||||
[default]
|
||||
aws_access_key_id=theaccesskey
|
||||
aws_secret_access_key=thesecretaccesskeyvalue
|
||||
```
|
||||
|
||||
- for GCP cloud storage:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "project-id",
|
||||
"private_key_id": "key-id",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nprivate-key\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "service-account-email",
|
||||
"client_id": "client-id",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://accounts.google.com/o/oauth2/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account-email"
|
||||
}
|
||||
```
|
||||
|
||||
### Providing credentials via env variables
|
||||
|
||||
Obtaining credentials from env variables.
|
||||
- For AWS S3 compatible storages set env variable `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
|
||||
Also you can set env variable `AWS_SHARED_CREDENTIALS_FILE` with path to credentials file.
|
||||
- For GCE cloud storage set env variable `GOOGLE_APPLICATION_CREDENTIALS` with path to credentials file.
|
||||
- For Azure storage either set env variables `AZURE_STORAGE_ACCOUNT_NAME` and `AZURE_STORAGE_ACCOUNT_KEY`, or `AZURE_STORAGE_ACCOUNT_CONNECTION_STRING`.
|
||||
|
||||
Please, note that `vmbackup` will use credentials provided by cloud providers metadata service [when applicable](https://docs.victoriametrics.com/vmbackup.html#using-cloud-providers-metadata-service).
|
||||
|
||||
### Using cloud providers metadata service
|
||||
|
||||
`vmbackup` and `vmbackupmanager` will automatically use cloud providers metadata service in order to obtain credentials if they are running in cloud environment
|
||||
and credentials are not explicitly provided via flags or env variables.
|
||||
|
||||
### Providing credentials in Kubernetes
|
||||
|
||||
The simplest way to provide credentials in Kubernetes is to use [Secrets](https://kubernetes.io/docs/concepts/configuration/secret/)
|
||||
and inject them into the pod as environment variables. For example, the following secret can be used for AWS S3 credentials:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: vmbackup-credentials
|
||||
data:
|
||||
access_key: key
|
||||
secret_key: secret
|
||||
```
|
||||
And then it can be injected into the pod as environment variables:
|
||||
```yaml
|
||||
...
|
||||
env:
|
||||
- name: AWS_ACCESS_KEY_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: access_key
|
||||
name: vmbackup-credentials
|
||||
- name: AWS_SECRET_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: secret_key
|
||||
name: vmbackup-credentials
|
||||
...
|
||||
```
|
||||
|
||||
A more secure way is to use IAM roles to provide tokens for pods instead of managing credentials manually.
|
||||
|
||||
For AWS deployments it will be required to configure [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html).
|
||||
In order to use IAM roles for service accounts with `vmbackup` or `vmbackupmanager` it is required to create ServiceAccount with IAM role mapping:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: monitoring-backups
|
||||
annotations:
|
||||
eks.amazonaws.com/role-arn: arn:aws:iam::{ACCOUNT_ID}:role/{ROLE_NAME}
|
||||
```
|
||||
And [configure pod to use service account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
|
||||
After this `vmbackup` and `vmbackupmanager` will automatically use IAM role for service account in order to obtain credentials.
|
||||
|
||||
For GCP deployments it will be required to configure [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity).
|
||||
In order to use Workload Identity with `vmbackup` or `vmbackupmanager` it is required to create ServiceAccount with Workload Identity annotation:
|
||||
```yaml
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: monitoring-backups
|
||||
annotations:
|
||||
iam.gke.io/gcp-service-account: {sa_name}@{project_name}.iam.gserviceaccount.com
|
||||
```
|
||||
And [configure pod to use service account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
|
||||
After this `vmbackup` and `vmbackupmanager` will automatically use Workload Identity for servicpe account in order to obtain credentials.
|
||||
|
||||
### Using custom S3 endpoint
|
||||
|
||||
Usage with s3 custom url endpoint. It is possible to use `vmbackup` with s3 compatible storages like minio, cloudian, etc.
|
||||
You have to add a custom url endpoint via flag:
|
||||
|
||||
- for MinIO
|
||||
```console
|
||||
-customS3Endpoint=http://localhost:9000
|
||||
```
|
||||
|
||||
- for aws gov region
|
||||
```console
|
||||
-customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com
|
||||
```
|
||||
|
||||
### Permanent deletion of objects in S3 and compatible storages
|
||||
|
||||
`vmbackup` and [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) use standard delete operation
|
||||
for S3-compatible object storage when pefrorming [incremental backups](#incremental-backups).
|
||||
This operation removes only the current version of the object. This works OK in most cases.
|
||||
|
||||
Sometimes it is needed to remove all the versions of an object. In this case pass `-deleteAllObjectVersions` command-line flag to `vmbackup`.
|
||||
|
||||
Alternatively, it is possible to use object storage lifecycle rules to remove non-current versions of objects automatically.
|
||||
Refer to the respective documentation for your object storage provider for more details.
|
||||
|
||||
### Command-line flags
|
||||
|
||||
Run `vmbackup -help` in order to see all the available options:
|
||||
|
||||
```console
|
||||
-concurrency int
|
||||
The number of concurrent workers. Higher concurrency may reduce backup duration (default 10)
|
||||
-configFilePath string
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-configProfile string
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used
|
||||
-credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-deleteAllObjectVersions
|
||||
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
|
||||
-dst string
|
||||
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir
|
||||
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address for exporting metrics at /metrics page (default ":8420")
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxBytesPerSecond size
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-origin string
|
||||
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-s3ForcePathStyle
|
||||
Prefixing endpoint with bucket name when set false, true by default. (default true)
|
||||
-s3StorageClass string
|
||||
The Storage Class applied to objects uploaded to AWS S3. Supported values are: GLACIER, DEEP_ARCHIVE, GLACIER_IR, INTELLIGENT_TIERING, ONEZONE_IA, OUTPOSTS, REDUCED_REDUNDANCY, STANDARD, STANDARD_IA.
|
||||
See https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-class-intro.html
|
||||
-snapshot.createURL string
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup. Example: http://victoriametrics:8428/snapshot/create . There is no need in setting -snapshotName if -snapshot.createURL is set
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. All created snapshots will be automatically deleted. Example: http://victoriametrics:8428/snapshot/delete
|
||||
-snapshotName string
|
||||
Name for the snapshot to backup. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots. There is no need in setting -snapshotName if -snapshot.createURL is set
|
||||
-storageDataPath string
|
||||
Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - see `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
|
||||
1. Run `make vmbackup` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmbackup` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
1. Run `make vmbackup-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmbackup-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```console
|
||||
ROOT_IMAGE=scratch make package-vmbackup
|
||||
```
|
||||
vmbackup docs can be edited at [docs/vmbackup.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmbackup.md).
|
||||
|
||||
@@ -47,7 +47,6 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
// Storing snapshot delete function to be able to call it in case
|
||||
// of error since logger.Fatal will exit the program without
|
||||
@@ -96,11 +95,13 @@ func main() {
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, false, nil)
|
||||
|
||||
pushmetrics.Init()
|
||||
err := makeBackup()
|
||||
deleteSnapshot()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create backup: %s", err)
|
||||
}
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime := time.Now()
|
||||
logger.Infof("gracefully shutting down http server for metrics at %q", *httpListenAddr)
|
||||
|
||||
@@ -1,554 +1,3 @@
|
||||
# vmbackupmanager
|
||||
See vmbackupmanager docs [here](https://docs.victoriametrics.com/vmbackupmanager.html).
|
||||
|
||||
***vmbackupmanager is a part of [enterprise package](https://docs.victoriametrics.com/enterprise.html).
|
||||
It is available for download and evaluation at [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).***
|
||||
|
||||
The VictoriaMetrics backup manager automates regular backup procedures. It supports the following backup intervals: **hourly**, **daily**, **weekly** and **monthly**.
|
||||
Multiple backup intervals may be configured simultaneously. I.e. the backup manager creates hourly backups every hour, while it creates daily backups every day, etc.
|
||||
Backup manager must have read access to the storage data, so best practice is to install it on the same machine (or as a sidecar) where the storage node is installed.
|
||||
The backup service makes a backup every hour and puts it to the latest folder and then copies data to the folders
|
||||
which represent the backup intervals (hourly, daily, weekly and monthly)
|
||||
|
||||
The required flags for running the service are as follows:
|
||||
|
||||
* -eula - should be true and means that you have the legal right to run a backup manager. That can either be a signed contract or an email
|
||||
with confirmation to run the service in a trial period.
|
||||
* -storageDataPath - path to VictoriaMetrics or vmstorage data path to make backup from.
|
||||
* -snapshot.createURL - VictoriaMetrics creates snapshot URL which will automatically be created during backup. Example: <http://victoriametrics:8428/snapshot/create>
|
||||
* -dst - backup destination at [the supported storage types](https://docs.victoriametrics.com/vmbackup.html#supported-storage-types).
|
||||
* -credsFilePath - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See [https://cloud.google.com/iam/docs/creating-managing-service-account-keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys)
|
||||
and [https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html](https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html).
|
||||
|
||||
Backup schedule is controlled by the following flags:
|
||||
|
||||
* -disableHourly - disable hourly run. Default false
|
||||
* -disableDaily - disable daily run. Default false
|
||||
* -disableWeekly - disable weekly run. Default false
|
||||
* -disableMonthly - disable monthly run. Default false
|
||||
|
||||
By default, all flags are turned on and Backup Manager backups data every hour for every interval (hourly, daily, weekly and monthly).
|
||||
|
||||
The backup manager creates the following directory hierarchy at **-dst**:
|
||||
|
||||
* /latest/ - contains the latest backup
|
||||
* /hourly/ - contains hourly backups. Each backup is named as *YYYY-MM-DD:HH*
|
||||
* /daily/ - contains daily backups. Each backup is named as *YYYY-MM-DD*
|
||||
* /weekly/ - contains weekly backups. Each backup is named as *YYYY-WW*
|
||||
* /monthly/ - contains monthly backups. Each backup is named as *YYYY-MM*
|
||||
|
||||
To get the full list of supported flags please run the following command:
|
||||
|
||||
```console
|
||||
./vmbackupmanager --help
|
||||
```
|
||||
|
||||
The service creates a **full** backup each run. This means that the system can be restored fully
|
||||
from any particular backup using [vmrestore](https://docs.victoriametrics.com/vmrestore.html).
|
||||
Backup manager uploads only the data that has been changed or created since the most recent backup (incremental backup).
|
||||
This reduces the consumed network traffic and the time needed for performing the backup.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for details.
|
||||
|
||||
*Please take into account that the first backup upload could take a significant amount of time as it needs to upload all the data.*
|
||||
|
||||
There are two flags which could help with performance tuning:
|
||||
|
||||
* -maxBytesPerSecond - the maximum upload speed. There is no limit if it is set to 0
|
||||
* -concurrency - The number of concurrent workers. Higher concurrency may improve upload speed (default 10)
|
||||
|
||||
## Example of Usage
|
||||
|
||||
GCS and cluster version. You need to have a credentials file in json format with following structure:
|
||||
|
||||
credentials.json
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "<project>",
|
||||
"private_key_id": "",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\-----END PRIVATE KEY-----\n",
|
||||
"client_email": "test@<project>.iam.gserviceaccount.com",
|
||||
"client_id": "",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test%40<project>.iam.gserviceaccount.com"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Backup manager launched with the following configuration:
|
||||
|
||||
```console
|
||||
export NODE_IP=192.168.0.10
|
||||
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
|
||||
./vmbackupmanager -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create -eula
|
||||
```
|
||||
|
||||
Expected logs in vmbackupmanager:
|
||||
|
||||
```console
|
||||
info lib/backup/actions/backup.go:131 server-side copied 81 out of 81 parts from GCS{bucket: "vmstorage-data", dir: "192.168.0.10//latest/"} to GCS{bucket: "vmstorage-data", dir: "192.168.0.10//weekly/2020-34/"} in 2.549833008s
|
||||
info lib/backup/actions/backup.go:169 backed up 853315 bytes in 2.882 seconds; deleted 0 bytes; server-side copied 853315 bytes; uploaded 0 bytes
|
||||
```
|
||||
|
||||
Expected logs in vmstorage:
|
||||
|
||||
```console
|
||||
info VictoriaMetrics/lib/storage/table.go:146 creating table snapshot of "/vmstorage-data/data"...
|
||||
info VictoriaMetrics/lib/storage/storage.go:311 deleting snapshot "/vmstorage-data/snapshots/20200818201959-162C760149895DDA"...
|
||||
info VictoriaMetrics/lib/storage/storage.go:319 deleted snapshot "/vmstorage-data/snapshots/20200818201959-162C760149895DDA" in 0.169 seconds
|
||||
```
|
||||
|
||||
The result on the GCS bucket
|
||||
|
||||
* The root folder
|
||||
|
||||
<img alt="root folder" src="vmbackupmanager_root_folder.png">
|
||||
|
||||
* The latest folder
|
||||
|
||||
<img alt="latest folder" src="vmbackupmanager_latest_folder.png">
|
||||
|
||||
Please, see [vmbackup docs](https://docs.victoriametrics.com/vmbackup.html#advanced-usage) for more examples of authentication with different
|
||||
storage types.
|
||||
|
||||
## Backup Retention Policy
|
||||
|
||||
Backup retention policy is controlled by:
|
||||
|
||||
* -keepLastHourly - keep the last N hourly backups. Disabled by default
|
||||
* -keepLastDaily - keep the last N daily backups. Disabled by default
|
||||
* -keepLastWeekly - keep the last N weekly backups. Disabled by default
|
||||
* -keepLastMonthly - keep the last N monthly backups. Disabled by default
|
||||
|
||||
> *Note*: 0 value in every keepLast flag results into deletion of ALL backups for particular type (hourly, daily, weekly and monthly)
|
||||
|
||||
> *Note*: retention policy does not enforce removing previous versions of objects in object storages such if versioning is enabled. See [these docs](https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-and-compatible-storages) for more details.
|
||||
|
||||
Let’s assume we have a backup manager collecting daily backups for the past 10 days.
|
||||
|
||||
<img alt="retention policy daily before retention cycle" src="vmbackupmanager_rp_daily_1.png">
|
||||
|
||||
We enable backup retention policy for backup manager by using following configuration:
|
||||
|
||||
```console
|
||||
export NODE_IP=192.168.0.10
|
||||
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
|
||||
./vmbackupmanager -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create
|
||||
-keepLastDaily=3 -eula
|
||||
```
|
||||
|
||||
Expected logs in backup manager on start:
|
||||
|
||||
```console
|
||||
info lib/logger/flag.go:20 flag "keepLastDaily" = "3"
|
||||
```
|
||||
|
||||
Expected logs in backup manager during retention cycle:
|
||||
|
||||
```console
|
||||
info app/vmbackupmanager/retention.go:106 daily backups to delete [daily/2021-02-13 daily/2021-02-12 daily/2021-02-11 daily/2021-02-10 daily/2021-02-09 daily/2021-02-08 daily/2021-02-07]
|
||||
```
|
||||
|
||||
The result on the GCS bucket. We see only 3 daily backups:
|
||||
|
||||
<img alt="retention policy daily after retention cycle" src="vmbackupmanager_rp_daily_2.png">
|
||||
|
||||
### Protection backups against deletion by retention policy
|
||||
|
||||
You can protect any backup against deletion by retention policy with the `vmbackupmanager backups lock` command.
|
||||
|
||||
For instance:
|
||||
|
||||
```console
|
||||
./vmbackupmanager backup lock daily/2021-02-13 -dst=<DST_PATH> -storageDataPath=/vmstorage-data -eula
|
||||
```
|
||||
|
||||
After that the backup won't be deleted by retention policy.
|
||||
You can view the `locked` attribute in backup list:
|
||||
|
||||
```console
|
||||
./vmbackupmanager backup list -dst=<DST_PATH> -storageDataPath=/vmstorage-data -eula
|
||||
```
|
||||
|
||||
To remove protection, you can use the command `vmbackupmanager backups unlock`.
|
||||
|
||||
For example:
|
||||
|
||||
```console
|
||||
./vmbackupmanager backup unlock daily/2021-02-13 -dst=<DST_PATH> -storageDataPath=/vmstorage-data -eula
|
||||
```
|
||||
|
||||
## API methods
|
||||
|
||||
`vmbackupmanager` exposes the following API methods:
|
||||
|
||||
* GET `/api/v1/backups` - returns list of backups in remote storage.
|
||||
Example output:
|
||||
```json
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
> Note: `created_at` field is in RFC3339 format.
|
||||
|
||||
* GET `/api/v1/backups/<BACKUP_NAME>` - returns backup info by name.
|
||||
Example output:
|
||||
```json
|
||||
{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00","locked":true}
|
||||
```
|
||||
|
||||
* PUT `/api/v1/backups/<BACKUP_NAME>` - update "locked" attribute for backup by name.
|
||||
Example request body:
|
||||
```json
|
||||
{"locked":true}
|
||||
```
|
||||
Example response:
|
||||
```json
|
||||
{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00", "locked": true}
|
||||
```
|
||||
|
||||
* POST `/api/v1/restore` - saves backup name to restore when [performing restore](#restore-commands).
|
||||
Example request body:
|
||||
```json
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
* GET `/api/v1/restore` - returns backup name from restore mark if it exists.
|
||||
Example response:
|
||||
```json
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
* DELETE `/api/v1/restore` - delete restore mark.
|
||||
|
||||
## CLI
|
||||
|
||||
`vmbackupmanager` exposes CLI commands to work with [API methods](#api-methods) without external dependencies.
|
||||
|
||||
Supported commands:
|
||||
```console
|
||||
vmbackupmanager backup
|
||||
|
||||
vmbackupmanager backup list
|
||||
List backups in remote storage
|
||||
|
||||
vmbackupmanager backup lock
|
||||
Locks backup in remote storage against deletion
|
||||
|
||||
vmbackupmanager backup unlock
|
||||
Unlocks backup in remote storage for deletion
|
||||
|
||||
vmbackupmanager restore
|
||||
Restore backup specified by restore mark if it exists
|
||||
|
||||
vmbackupmanager restore get
|
||||
Get restore mark if it exists
|
||||
|
||||
vmbackupmanager restore delete
|
||||
Delete restore mark if it exists
|
||||
|
||||
vmbackupmanager restore create [backup_name]
|
||||
Create restore mark
|
||||
```
|
||||
|
||||
By default, CLI commands are using `http://127.0.0.1:8300` endpoint to reach `vmbackupmanager` API.
|
||||
It can be changed by using flag:
|
||||
```
|
||||
-apiURL string
|
||||
vmbackupmanager address to perform API requests (default "http://127.0.0.1:8300")
|
||||
```
|
||||
|
||||
### Backup commands
|
||||
|
||||
`vmbackupmanager backup list` lists backups in remote storage:
|
||||
```console
|
||||
$ ./vmbackupmanager backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
|
||||
### Restore commands
|
||||
|
||||
Restore commands are used to create, get and delete restore mark.
|
||||
Restore mark is used by `vmbackupmanager` to store backup name to restore when running restore.
|
||||
|
||||
|
||||
Create restore mark:
|
||||
```console
|
||||
$ ./vmbackupmanager restore create daily/2022-10-06
|
||||
```
|
||||
|
||||
Get restore mark if it exists:
|
||||
```console
|
||||
$ ./vmbackupmanager restore get
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
Delete restore mark if it exists:
|
||||
```console
|
||||
$ ./vmbackupmanager restore delete
|
||||
```
|
||||
|
||||
Perform restore:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
```
|
||||
Note that `vmsingle` or `vmstorage` should be stopped before performing restore.
|
||||
|
||||
If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vmbackupmanager restore` will exit with successful status code.
|
||||
|
||||
### How to restore backup via CLI
|
||||
|
||||
1. Run `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
1. Run `vmbackupmanager restore create` to create restore mark:
|
||||
- Use relative path to backup to restore from currently used remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create daily/2023-04-07
|
||||
```
|
||||
- Use full path to backup to restore from any remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/daily/2023-04-07
|
||||
```
|
||||
1. Stop `vmstorage` or `vmsingle` node
|
||||
1. Run `vmbackupmanager restore` to restore backup:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
```
|
||||
1. Start `vmstorage` or `vmsingle` node
|
||||
|
||||
|
||||
### How to restore in Kubernetes
|
||||
|
||||
1. Ensure there is an init container with `vmbackupmanager restore` in `vmstorage` or `vmsingle` pod.
|
||||
For [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) deployments it is required to add:
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart:
|
||||
enabled: "true"
|
||||
```
|
||||
See operator `VMStorage` schema [here](https://docs.victoriametrics.com/operator/api.html#vmstorage) and `VMSingle` [here](https://docs.victoriametrics.com/operator/api.html#vmsinglespec).
|
||||
1. Enter container running `vmbackupmanager`
|
||||
1. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
1. Use `vmbackupmanager restore create` to create restore mark:
|
||||
- Use relative path to backup to restore from currently used remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create daily/2023-04-07
|
||||
```
|
||||
- Use full path to backup to restore from any remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/daily/2023-04-07
|
||||
```
|
||||
1. Restart pod
|
||||
|
||||
#### Restore cluster into another cluster
|
||||
|
||||
These steps are assuming that [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) is used to manage `VMCluster`.
|
||||
Clusters here are referred to as `source` and `destination`.
|
||||
|
||||
1. Create a new cluster with access to *source* cluster `vmbackupmanager` storage and same number of storage nodes.
|
||||
Add the following section in order to enable restore on start (operator `VMStorage` schema can be found [here](https://docs.victoriametrics.com/operator/api.html#vmstorage):
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart:
|
||||
enabled: "true"
|
||||
```
|
||||
Note: it is safe to leave this section in the cluster configuration, since it will be ignored if restore mark doesn't exist.
|
||||
> Important! Use different `-dst` for *destination* cluster to avoid overwriting backup data of the *source* cluster.
|
||||
1. Enter container running `vmbackupmanager` in *source* cluster
|
||||
1. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
1. Use `vmbackupmanager restore create` to create restore mark at each pod of the *destination* cluster.
|
||||
Each pod in *destination* cluster should be restored from backup of respective pod in *source* cluster.
|
||||
For example: `vmstorage-source-0` in *source* cluster should be restored from `vmstorage-destination-0` in *destination* cluster.
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create s3://source_cluster/vmstorage-source-0/daily/2023-04-07
|
||||
```
|
||||
1. Restart `vmstorage` pods of *destination* cluster. On pod start `vmbackupmanager` will restore data from the specified backup.
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vmbackupmanager` exports various metrics in Prometheus exposition format at `http://vmbackupmanager:8300/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://docs.victoriametrics.com/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/17798-victoriametrics-backupmanager/) for `vmbackupmanager` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Flags
|
||||
|
||||
Pass `-help` to `vmbackupmanager` in order to see the full list of supported
|
||||
command-line flags with their descriptions.
|
||||
|
||||
The shortlist of configuration flags is the following:
|
||||
|
||||
```
|
||||
vmbackupmanager performs regular backups according to the provided configs.
|
||||
|
||||
subcommands:
|
||||
backup: provides auxiliary backup-related commands
|
||||
restore: restores backup specified by restore mark if it exists
|
||||
|
||||
command-line flags:
|
||||
-apiURL string
|
||||
vmbackupmanager address to perform API requests (default "http://127.0.0.1:8300")
|
||||
-concurrency int
|
||||
The number of concurrent workers. Higher concurrency may reduce backup duration (default 10)
|
||||
-configFilePath string
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-configProfile string
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used
|
||||
-credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-deleteAllObjectVersions
|
||||
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
|
||||
-disableDaily
|
||||
Disable daily run. Default false
|
||||
-disableHourly
|
||||
Disable hourly run. Default false
|
||||
-disableMonthly
|
||||
Disable monthly run. Default false
|
||||
-disableWeekly
|
||||
Disable weekly run. Default false
|
||||
-dst string
|
||||
The root folder of Victoria Metrics backups. Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8300")
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-keepLastDaily int
|
||||
Keep last N daily backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-keepLastHourly int
|
||||
Keep last N hourly backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-keepLastMonthly int
|
||||
Keep last N monthly backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-keepLastWeekly int
|
||||
Keep last N weekly backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxBytesPerSecond int
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-runOnStart
|
||||
Upload backups immediately after start of the service. Otherwise the backup starts on new hour
|
||||
-s3ForcePathStyle
|
||||
Prefixing endpoint with bucket name when set false, true by default. (default true)
|
||||
-s3StorageClass string
|
||||
The Storage Class applied to objects uploaded to AWS S3. Supported values are: GLACIER, DEEP_ARCHIVE, GLACIER_IR, INTELLIGENT_TIERING, ONEZONE_IA, OUTPOSTS, REDUCED_REDUNDANCY, STANDARD, STANDARD_IA.
|
||||
See https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-class-intro.html
|
||||
-snapshot.createURL string
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snapshot/create
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshot.createURL if not provided. All created snaphosts will be automatically deleted.Example: http://victoriametrics:8428/snapshot/delete
|
||||
-storageDataPath string
|
||||
Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
vmbackupmanager docs can be edited at [docs/vmbackupmanager.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmbackupmanager.md).
|
||||
|
||||
|
Before Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 25 KiB |
|
Before Width: | Height: | Size: 99 KiB |
|
Before Width: | Height: | Size: 64 KiB |
1109
app/vmctl/README.md
@@ -15,7 +15,6 @@ func TestRetry_Do(t *testing.T) {
|
||||
backoffFactor float64
|
||||
backoffMinDuration time.Duration
|
||||
retryableFunc retryableFunc
|
||||
ctx context.Context
|
||||
cancelTimeout time.Duration
|
||||
want uint64
|
||||
wantErr bool
|
||||
@@ -25,7 +24,6 @@ func TestRetry_Do(t *testing.T) {
|
||||
retryableFunc: func() error {
|
||||
return ErrBadRequest
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
@@ -35,7 +33,6 @@ func TestRetry_Do(t *testing.T) {
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
return nil
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
@@ -58,7 +55,6 @@ func TestRetry_Do(t *testing.T) {
|
||||
}
|
||||
return nil
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 1,
|
||||
wantErr: false,
|
||||
},
|
||||
@@ -75,7 +71,6 @@ func TestRetry_Do(t *testing.T) {
|
||||
}
|
||||
return nil
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 5,
|
||||
wantErr: true,
|
||||
},
|
||||
@@ -85,14 +80,8 @@ func TestRetry_Do(t *testing.T) {
|
||||
backoffFactor: 1.7,
|
||||
backoffMinDuration: time.Millisecond * 10,
|
||||
retryableFunc: func() error {
|
||||
t := time.NewTicker(time.Millisecond * 5)
|
||||
defer t.Stop()
|
||||
for range t.C {
|
||||
return fmt.Errorf("got some error")
|
||||
}
|
||||
return nil
|
||||
return fmt.Errorf("got some error")
|
||||
},
|
||||
ctx: context.Background(),
|
||||
cancelTimeout: time.Millisecond * 40,
|
||||
want: 3,
|
||||
wantErr: true,
|
||||
@@ -101,12 +90,13 @@ func TestRetry_Do(t *testing.T) {
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r := &Backoff{retries: tt.backoffRetries, factor: tt.backoffFactor, minDuration: tt.backoffMinDuration}
|
||||
ctx := context.Background()
|
||||
if tt.cancelTimeout != 0 {
|
||||
newCtx, cancelFn := context.WithTimeout(tt.ctx, tt.cancelTimeout)
|
||||
tt.ctx = newCtx
|
||||
newCtx, cancelFn := context.WithTimeout(context.Background(), tt.cancelTimeout)
|
||||
ctx = newCtx
|
||||
defer cancelFn()
|
||||
}
|
||||
got, err := r.Retry(tt.ctx, tt.retryableFunc)
|
||||
got, err := r.Retry(ctx, tt.retryableFunc)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("Retry() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
|
||||
@@ -320,26 +320,29 @@ var (
|
||||
)
|
||||
|
||||
const (
|
||||
vmNativeFilterMatch = "vm-native-filter-match"
|
||||
vmNativeFilterTimeStart = "vm-native-filter-time-start"
|
||||
vmNativeFilterTimeEnd = "vm-native-filter-time-end"
|
||||
vmNativeStepInterval = "vm-native-step-interval"
|
||||
vmNativeFilterMatch = "vm-native-filter-match"
|
||||
vmNativeFilterTimeStart = "vm-native-filter-time-start"
|
||||
vmNativeFilterTimeEnd = "vm-native-filter-time-end"
|
||||
vmNativeFilterTimeReverse = "vm-native-filter-time-reverse"
|
||||
vmNativeStepInterval = "vm-native-step-interval"
|
||||
|
||||
vmNativeDisableBinaryProtocol = "vm-native-disable-binary-protocol"
|
||||
vmNativeDisableHTTPKeepAlive = "vm-native-disable-http-keep-alive"
|
||||
vmNativeDisableRetries = "vm-native-disable-retries"
|
||||
vmNativeDisableBinaryProtocol = "vm-native-disable-binary-protocol"
|
||||
vmNativeDisableHTTPKeepAlive = "vm-native-disable-http-keep-alive"
|
||||
vmNativeDisablePerMetricMigration = "vm-native-disable-per-metric-migration"
|
||||
|
||||
vmNativeSrcAddr = "vm-native-src-addr"
|
||||
vmNativeSrcUser = "vm-native-src-user"
|
||||
vmNativeSrcPassword = "vm-native-src-password"
|
||||
vmNativeSrcHeaders = "vm-native-src-headers"
|
||||
vmNativeSrcBearerToken = "vm-native-src-bearer-token"
|
||||
vmNativeSrcAddr = "vm-native-src-addr"
|
||||
vmNativeSrcUser = "vm-native-src-user"
|
||||
vmNativeSrcPassword = "vm-native-src-password"
|
||||
vmNativeSrcHeaders = "vm-native-src-headers"
|
||||
vmNativeSrcBearerToken = "vm-native-src-bearer-token"
|
||||
vmNativeSrcInsecureSkipVerify = "vm-native-src-insecure-skip-verify"
|
||||
|
||||
vmNativeDstAddr = "vm-native-dst-addr"
|
||||
vmNativeDstUser = "vm-native-dst-user"
|
||||
vmNativeDstPassword = "vm-native-dst-password"
|
||||
vmNativeDstHeaders = "vm-native-dst-headers"
|
||||
vmNativeDstBearerToken = "vm-native-dst-bearer-token"
|
||||
vmNativeDstAddr = "vm-native-dst-addr"
|
||||
vmNativeDstUser = "vm-native-dst-user"
|
||||
vmNativeDstPassword = "vm-native-dst-password"
|
||||
vmNativeDstHeaders = "vm-native-dst-headers"
|
||||
vmNativeDstBearerToken = "vm-native-dst-bearer-token"
|
||||
vmNativeDstInsecureSkipVerify = "vm-native-dst-insecure-skip-verify"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -362,10 +365,15 @@ var (
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeStepInterval,
|
||||
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are '%s','%s','%s','%s','%s'.", vmNativeFilterTimeStart,
|
||||
stepper.StepMonth, stepper.StepWeek, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
|
||||
Usage: fmt.Sprintf("The time interval to split the migration into steps. For example, to migrate 1y of data with '--%s=month' vmctl will execute it in 12 separate requests from the beginning of the time range to its end. To reverse the order use '--%s'. Requires setting '--%s'. Valid values are '%s','%s','%s','%s','%s'.",
|
||||
vmNativeStepInterval, vmNativeFilterTimeReverse, vmNativeFilterTimeStart, stepper.StepMonth, stepper.StepWeek, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
|
||||
Value: stepper.StepMonth,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeFilterTimeReverse,
|
||||
Usage: fmt.Sprintf("Whether to reverse the order of time intervals split by '--%s' cmd-line flag. When set, the migration will start from the newest to the oldest data.", vmNativeStepInterval),
|
||||
Value: false,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeDisableHTTPKeepAlive,
|
||||
Usage: "Disable HTTP persistent connections for requests made to VictoriaMetrics components during export",
|
||||
@@ -448,8 +456,8 @@ var (
|
||||
Value: 2,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeDisableRetries,
|
||||
Usage: "Defines whether to disable retries with backoff policy for migration process",
|
||||
Name: vmNativeDisablePerMetricMigration,
|
||||
Usage: "Defines whether to disable per-metric migration and migrate all data via one connection. In this mode, vmctl makes less export/import requests, but can't provide a progress bar or retry failed requests.",
|
||||
Value: false,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
@@ -460,6 +468,16 @@ var (
|
||||
"Non-binary export/import API is less efficient, but supports deduplication if it is configured on vm-native-src-addr side.",
|
||||
Value: false,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeSrcInsecureSkipVerify,
|
||||
Usage: "Whether to skip TLS certificate verification when connecting to the source address",
|
||||
Value: false,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeDstInsecureSkipVerify,
|
||||
Usage: "Whether to skip TLS certificate verification when connecting to the destination address",
|
||||
Value: false,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -469,6 +487,7 @@ const (
|
||||
remoteReadConcurrency = "remote-read-concurrency"
|
||||
remoteReadFilterTimeStart = "remote-read-filter-time-start"
|
||||
remoteReadFilterTimeEnd = "remote-read-filter-time-end"
|
||||
remoteReadFilterTimeReverse = "remote-read-filter-time-reverse"
|
||||
remoteReadFilterLabel = "remote-read-filter-label"
|
||||
remoteReadFilterLabelValue = "remote-read-filter-label-value"
|
||||
remoteReadStepInterval = "remote-read-step-interval"
|
||||
@@ -520,9 +539,14 @@ var (
|
||||
Value: false,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadStepInterval,
|
||||
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are %q,%q,%q,%q.", remoteReadFilterTimeStart, stepper.StepMonth, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
|
||||
Required: true,
|
||||
Name: remoteReadStepInterval,
|
||||
Usage: fmt.Sprintf("The time interval to split the migration into steps. For example, to migrate 1y of data with '--%s=month' vmctl will execute it in 12 separate requests from the beginning of the time range to its end. To reverse the order use '--%s'. Requires setting '--%s'. Valid values are '%s','%s','%s','%s','%s'.",
|
||||
remoteReadStepInterval, remoteReadFilterTimeReverse, remoteReadFilterTimeStart, stepper.StepMonth, stepper.StepWeek, stepper.StepDay, stepper.StepHour, stepper.StepMinute), Required: true,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: remoteReadFilterTimeReverse,
|
||||
Usage: fmt.Sprintf("Whether to reverse the order of time intervals split by '--%s' cmd-line flag. When set, the migration will start from the newest to the oldest data.", remoteReadStepInterval),
|
||||
Value: false,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadSrcAddr,
|
||||
|
||||
@@ -358,10 +358,13 @@ func (c *Client) getSeries() ([]*Series, error) {
|
||||
func (c *Client) do(q influx.Query) ([]queryValues, error) {
|
||||
res, err := c.Query(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query %q err: %s", q.Command, err)
|
||||
return nil, fmt.Errorf("query error: %s", err)
|
||||
}
|
||||
if res.Error() != nil {
|
||||
return nil, fmt.Errorf("response error: %s", res.Error())
|
||||
}
|
||||
if len(res.Results) < 1 {
|
||||
return nil, fmt.Errorf("exploration query %q returned 0 results", q.Command)
|
||||
return nil, fmt.Errorf("query returned 0 results")
|
||||
}
|
||||
return parseResult(res.Results[0])
|
||||
}
|
||||
|
||||