Compare commits
529 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
edd1590ac7 | ||
|
|
3f0bcbe067 | ||
|
|
3eadee6cb7 | ||
|
|
f1a22b097a | ||
|
|
544821b719 | ||
|
|
d3fa0ccabd | ||
|
|
cb12a8f0a8 | ||
|
|
5a0938d807 | ||
|
|
1177dca3da | ||
|
|
75f309fbbf | ||
|
|
c6a8ebb11f | ||
|
|
df32d2836c | ||
|
|
59f9960992 | ||
|
|
3ec6639bbb | ||
|
|
7d23598b33 | ||
|
|
78d35d4f46 | ||
|
|
5f593b0ed3 | ||
|
|
4a0d06d1db | ||
|
|
b59164cf33 | ||
|
|
b46194472f | ||
|
|
a51d0ec6ec | ||
|
|
95dbebf512 | ||
|
|
f010d773d6 | ||
|
|
2c25b0322b | ||
|
|
a5c5b54c22 | ||
|
|
6742839fd6 | ||
|
|
9ff3ecb991 | ||
|
|
9a97941e2a | ||
|
|
fc2240fb22 | ||
|
|
a4c6a3b3e1 | ||
|
|
500e625e8c | ||
|
|
5153410ced | ||
|
|
4c56b1a6dd | ||
|
|
7a0b964e8d | ||
|
|
6f3080f9fb | ||
|
|
e1d1708fa2 | ||
|
|
43f9842b6f | ||
|
|
2256b79a89 | ||
|
|
b8d9d6c326 | ||
|
|
22949911e9 | ||
|
|
3055ab0115 | ||
|
|
25e19c75c7 | ||
|
|
3c1b39c978 | ||
|
|
0db901617d | ||
|
|
569e58dcdf | ||
|
|
b1d0028e79 | ||
|
|
b88feb631e | ||
|
|
df148f48b7 | ||
|
|
7f9c68cdcb | ||
|
|
d1dcbfd0f9 | ||
|
|
c79e4a2f90 | ||
|
|
5b08e6fb16 | ||
|
|
12e4785fe8 | ||
|
|
3967bd705a | ||
|
|
fdb8995642 | ||
|
|
9d237408c6 | ||
|
|
759c938870 | ||
|
|
dc9eafcd02 | ||
|
|
e1f699bb6c | ||
|
|
db963205cc | ||
|
|
48275d8c12 | ||
|
|
f888d194da | ||
|
|
33622c409c | ||
|
|
3be0e6b087 | ||
|
|
e7fdea5953 | ||
|
|
2af39a96c5 | ||
|
|
2d3082bb55 | ||
|
|
0fe8f11090 | ||
|
|
d58d5562f1 | ||
|
|
7962cf1af8 | ||
|
|
65c60cf413 | ||
|
|
75991277fa | ||
|
|
1db1a29ffa | ||
|
|
947b37ba8e | ||
|
|
e6fd1f7875 | ||
|
|
5c7c0b2bda | ||
|
|
5f4a9f782f | ||
|
|
602a3d99ae | ||
|
|
cfdb6762e6 | ||
|
|
b1e49bab52 | ||
|
|
b75c2ce659 | ||
|
|
2601cc0fb0 | ||
|
|
0c403bfd29 | ||
|
|
78188decf9 | ||
|
|
c54cb3e63c | ||
|
|
8fc8ef1aba | ||
|
|
1b7dc1e5a5 | ||
|
|
f39c84b21f | ||
|
|
9761ffd161 | ||
|
|
aa81039b42 | ||
|
|
50f790b5d7 | ||
|
|
136fc6217c | ||
|
|
5ec9e49103 | ||
|
|
88f6286df7 | ||
|
|
f6529f932a | ||
|
|
2065d11300 | ||
|
|
35fb9bdee1 | ||
|
|
a647144616 | ||
|
|
c3c3e51f17 | ||
|
|
0b2a66db30 | ||
|
|
6e855d4b82 | ||
|
|
d4aadba9fa | ||
|
|
edc0a94a3c | ||
|
|
3a3d2165f9 | ||
|
|
9c566f7db9 | ||
|
|
29f9ef9b7f | ||
|
|
331a6a2015 | ||
|
|
b521d1d4f2 | ||
|
|
3cfb3a3683 | ||
|
|
8e2afdf568 | ||
|
|
e17eb35147 | ||
|
|
65a61ff118 | ||
|
|
71b72304ae | ||
|
|
44a6cc5eca | ||
|
|
910092ca4d | ||
|
|
cef010d5f7 | ||
|
|
648d11b8e0 | ||
|
|
fb83e97170 | ||
|
|
b0c956a178 | ||
|
|
529d7be26b | ||
|
|
726f6ad804 | ||
|
|
6cee5338b2 | ||
|
|
e061a4fa19 | ||
|
|
4fb049bcba | ||
|
|
17d4a6e900 | ||
|
|
904dababcc | ||
|
|
45dabfac1b | ||
|
|
b1713e3fcd | ||
|
|
9666834045 | ||
|
|
20ac89c4e0 | ||
|
|
bd0c6a095e | ||
|
|
7bc728bf53 | ||
|
|
828669e4e1 | ||
|
|
ccfb0ae2d3 | ||
|
|
576a80b3d9 | ||
|
|
f104f3eb2a | ||
|
|
6378205415 | ||
|
|
18cfc4be7b | ||
|
|
0ce557951f | ||
|
|
35bb44d317 | ||
|
|
aba955fa16 | ||
|
|
fd86a7dc1d | ||
|
|
264d3432ac | ||
|
|
e36fbfae5b | ||
|
|
f0a4157f89 | ||
|
|
645cf6746c | ||
|
|
031d256810 | ||
|
|
dd7e82c34f | ||
|
|
37323c57c9 | ||
|
|
acf2a82d3c | ||
|
|
85a95bf60c | ||
|
|
7c37e9aea9 | ||
|
|
b1aa8c3d8f | ||
|
|
9c77e34ef9 | ||
|
|
923cdb0552 | ||
|
|
82aab87446 | ||
|
|
fb82c4b9fa | ||
|
|
eb103e1527 | ||
|
|
43504ebd14 | ||
|
|
fb935e6e2c | ||
|
|
3ccf7ea20c | ||
|
|
2dae0a2c47 | ||
|
|
b457739f87 | ||
|
|
6d91842c83 | ||
|
|
c14dafce43 | ||
|
|
7f6f350ee1 | ||
|
|
b88806ecbf | ||
|
|
83edbb7cab | ||
|
|
bf15d6a6a2 | ||
|
|
d409898515 | ||
|
|
7a16e8e3a2 | ||
|
|
2096c6e464 | ||
|
|
7c002023d7 | ||
|
|
43552fa8d3 | ||
|
|
def014eb75 | ||
|
|
ca4d5ce037 | ||
|
|
895d5d1355 | ||
|
|
a6a71ef861 | ||
|
|
fa448806a5 | ||
|
|
f3d712724c | ||
|
|
f669531506 | ||
|
|
133b288681 | ||
|
|
dc0cb54d41 | ||
|
|
c0ac740f93 | ||
|
|
bebcb8130c | ||
|
|
971e3d83f7 | ||
|
|
238fe7d4e8 | ||
|
|
e772d1c920 | ||
|
|
bc5c4add89 | ||
|
|
e02265cfa7 | ||
|
|
314f28fb38 | ||
|
|
45ede6ba98 | ||
|
|
09dc49e942 | ||
|
|
96dbe9bcbd | ||
|
|
d212f35ae8 | ||
|
|
48fb0d1c4b | ||
|
|
2728b25783 | ||
|
|
787242d7b0 | ||
|
|
36fd007247 | ||
|
|
ad34f42467 | ||
|
|
3fd8653b40 | ||
|
|
4b18a4f026 | ||
|
|
e44532b760 | ||
|
|
fe8b12fbad | ||
|
|
433fff0006 | ||
|
|
534944b671 | ||
|
|
f6878eac36 | ||
|
|
364fdf4a56 | ||
|
|
14a399dd06 | ||
|
|
345980f78f | ||
|
|
18fe0ff14b | ||
|
|
ab4f090c63 | ||
|
|
1187ee5e16 | ||
|
|
47ac2051bb | ||
|
|
17b87725ed | ||
|
|
c9a25e931b | ||
|
|
536d59914a | ||
|
|
68b8c48c86 | ||
|
|
444fca89f8 | ||
|
|
a14053ffa0 | ||
|
|
423cd981fb | ||
|
|
d962cdbc13 | ||
|
|
9a48c1b53d | ||
|
|
201b685b13 | ||
|
|
90d6e94e5b | ||
|
|
f391e5a3a0 | ||
|
|
3a68b94487 | ||
|
|
467cb9e3d1 | ||
|
|
621bf03745 | ||
|
|
4c3ef78c05 | ||
|
|
e09a245b2b | ||
|
|
e154f4a644 | ||
|
|
7906316741 | ||
|
|
f4f8f21875 | ||
|
|
1252ca44d8 | ||
|
|
d741794fab | ||
|
|
03dceb700d | ||
|
|
4de4da1e2a | ||
|
|
062211c61c | ||
|
|
d1d34664b5 | ||
|
|
a939667ce0 | ||
|
|
6a7ef768ff | ||
|
|
22b1941cfc | ||
|
|
f6114345de | ||
|
|
937f382938 | ||
|
|
019d8e88d8 | ||
|
|
baefe5a8ad | ||
|
|
2c43e846a9 | ||
|
|
d6a41b6ea2 | ||
|
|
1a3689af9a | ||
|
|
62ebf5c88e | ||
|
|
e32ad9e923 | ||
|
|
3792ea4065 | ||
|
|
f5d77a7081 | ||
|
|
e84153d5ca | ||
|
|
4e3cfe8461 | ||
|
|
732e729ef9 | ||
|
|
369f01c738 | ||
|
|
7f15cd7161 | ||
|
|
cb943f35c7 | ||
|
|
530e9904af | ||
|
|
8d021b73b5 | ||
|
|
2b53add6b2 | ||
|
|
1da1d502a8 | ||
|
|
9fc7726d84 | ||
|
|
e5ca8ac0db | ||
|
|
51bf577431 | ||
|
|
e02d1ef93c | ||
|
|
b21b110b7a | ||
|
|
c459600346 | ||
|
|
59a31171e3 | ||
|
|
68a0f5ce12 | ||
|
|
b523e0369c | ||
|
|
241ffd1f3b | ||
|
|
4281c5ed14 | ||
|
|
05fb08713c | ||
|
|
03903c1176 | ||
|
|
af8b7e8391 | ||
|
|
317b0cbed2 | ||
|
|
4f8a72806a | ||
|
|
422b31de40 | ||
|
|
7cc3d96a41 | ||
|
|
f3a03c4164 | ||
|
|
975dac9086 | ||
|
|
186c078fac | ||
|
|
a78948ae8b | ||
|
|
8683ea85e6 | ||
|
|
9fa2632ac3 | ||
|
|
d86e9b49c4 | ||
|
|
ed8441ec52 | ||
|
|
815666e6a6 | ||
|
|
19712fc2bd | ||
|
|
c8f2f9b2e8 | ||
|
|
9b2246c29b | ||
|
|
a93e644001 | ||
|
|
f7b242540b | ||
|
|
438428b5b0 | ||
|
|
d5f21f3f4b | ||
|
|
f7049e2af7 | ||
|
|
98854e5f2b | ||
|
|
2cd23362f5 | ||
|
|
f050e3f492 | ||
|
|
f4135b0d14 | ||
|
|
2c44178645 | ||
|
|
15d61c4879 | ||
|
|
fa03e0d210 | ||
|
|
2549469d5d | ||
|
|
d136081040 | ||
|
|
dd1e53b119 | ||
|
|
ff5bbc4b88 | ||
|
|
901e12024d | ||
|
|
636c55b526 | ||
|
|
388cdb1980 | ||
|
|
48656dcc38 | ||
|
|
cb311bb156 | ||
|
|
2cfb376945 | ||
|
|
c2678754e4 | ||
|
|
49e36e8d9d | ||
|
|
f26162ec99 | ||
|
|
9c70c1f21f | ||
|
|
5e341ccb59 | ||
|
|
f9084611bd | ||
|
|
a537c4f602 | ||
|
|
ae1238fe5c | ||
|
|
03ebc028f7 | ||
|
|
a7697cc88b | ||
|
|
e540c02014 | ||
|
|
711f8a5b8d | ||
|
|
f95dd67a22 | ||
|
|
d7de4807e1 | ||
|
|
edcdc39eb3 | ||
|
|
1d1ba889fe | ||
|
|
4b110fa21c | ||
|
|
efe1e0cff0 | ||
|
|
ce99b48a9a | ||
|
|
939d5ffc2b | ||
|
|
faad6f84a4 | ||
|
|
d4849561ef | ||
|
|
33806264ec | ||
|
|
63fc140624 | ||
|
|
74424b55ee | ||
|
|
4edfe76bef | ||
|
|
442fcfec5a | ||
|
|
4a07820048 | ||
|
|
9ca7d76b25 | ||
|
|
bea2f86b7b | ||
|
|
1a9a6b560f | ||
|
|
1256931aee | ||
|
|
7f2a6c7b54 | ||
|
|
d61f7b7279 | ||
|
|
458c89324d | ||
|
|
2824856691 | ||
|
|
83a1a889ec | ||
|
|
c4756f94da | ||
|
|
5a401225c7 | ||
|
|
1bf6cd814d | ||
|
|
8ec45ff335 | ||
|
|
b861a64510 | ||
|
|
7faa762021 | ||
|
|
ca191696fe | ||
|
|
ecf132933e | ||
|
|
4e39bf148c | ||
|
|
9f5ac603a7 | ||
|
|
2e30202dc7 | ||
|
|
38d7e96602 | ||
|
|
01d82cbf21 | ||
|
|
74963f71c6 | ||
|
|
71c417427c | ||
|
|
c727d2219b | ||
|
|
73c95c4e5b | ||
|
|
80dc74dbc1 | ||
|
|
802fabf0d7 | ||
|
|
f9902b3372 | ||
|
|
baa36354e0 | ||
|
|
5219c0f474 | ||
|
|
acdb401585 | ||
|
|
1e38ad6d20 | ||
|
|
2d33230793 | ||
|
|
7a3a9421f3 | ||
|
|
04faea8b45 | ||
|
|
0e26b7168a | ||
|
|
b51c23dc5b | ||
|
|
c7ee2fabb8 | ||
|
|
57cac289e0 | ||
|
|
5d5f0b0627 | ||
|
|
cdecf83ce5 | ||
|
|
553016ea99 | ||
|
|
fcb7655d1e | ||
|
|
c7dccebaef | ||
|
|
6b4e6c229c | ||
|
|
31f6b9c977 | ||
|
|
0a69122d81 | ||
|
|
d56390b925 | ||
|
|
fda61e8e96 | ||
|
|
3b146a9976 | ||
|
|
947333bfa2 | ||
|
|
a4140de9e6 | ||
|
|
cb96a1865b | ||
|
|
4dca03501b | ||
|
|
c5770600a2 | ||
|
|
3f689561d5 | ||
|
|
b9bf3cbe3e | ||
|
|
c2f2e5c0a0 | ||
|
|
df0cda3ab9 | ||
|
|
2242647a04 | ||
|
|
8f28a578d3 | ||
|
|
9b9cb04511 | ||
|
|
803a00102a | ||
|
|
4d43ab0875 | ||
|
|
83d3e582ab | ||
|
|
b44edc7832 | ||
|
|
9fb38569eb | ||
|
|
48c8c5093b | ||
|
|
814455138a | ||
|
|
2ff038d841 | ||
|
|
8001d4ccc9 | ||
|
|
2baf98082b | ||
|
|
a2344ef4b7 | ||
|
|
8dc6095749 | ||
|
|
ea00dac35f | ||
|
|
35242831f5 | ||
|
|
8629fd8a72 | ||
|
|
d16f22f3a1 | ||
|
|
a5a1b9bd66 | ||
|
|
6123aa3e75 | ||
|
|
9d41c06db1 | ||
|
|
4abd71402e | ||
|
|
c7e03f30d8 | ||
|
|
b739ff194b | ||
|
|
8c568b13b2 | ||
|
|
7388479a07 | ||
|
|
157c02622b | ||
|
|
4068f8d590 | ||
|
|
bd11fd8f1d | ||
|
|
b577cdd855 | ||
|
|
b39d5ef656 | ||
|
|
8164cd8932 | ||
|
|
b43b498fd8 | ||
|
|
5d87dbfd65 | ||
|
|
195341a7cf | ||
|
|
f0087f0dbb | ||
|
|
a4ae945a79 | ||
|
|
b2aa80e74b | ||
|
|
29a7067827 | ||
|
|
d5c180e680 | ||
|
|
2a7b1cc668 | ||
|
|
929f09b90d | ||
|
|
d6347a3e56 | ||
|
|
fc5b26d856 | ||
|
|
de3c662e8a | ||
|
|
3149ac7a7e | ||
|
|
419ad74269 | ||
|
|
3fe848cdd7 | ||
|
|
5481906db6 | ||
|
|
cc3e69e963 | ||
|
|
8cea3c3cc4 | ||
|
|
c164a8d231 | ||
|
|
3caac3d12c | ||
|
|
054fe1c198 | ||
|
|
0a45220b0a | ||
|
|
8749c2dd92 | ||
|
|
011c5da785 | ||
|
|
fcbefc15d0 | ||
|
|
485d43ef21 | ||
|
|
b638c1eed5 | ||
|
|
cc379f95c2 | ||
|
|
689d769b4d | ||
|
|
0d03855787 | ||
|
|
75f7c51cab | ||
|
|
184a659c5f | ||
|
|
7ce87ebcb2 | ||
|
|
1051d8aa2d | ||
|
|
689cf88eb2 | ||
|
|
bdd0a1cdb2 | ||
|
|
acf1a2c72b | ||
|
|
89315d719d | ||
|
|
dc9d7aedd5 | ||
|
|
7373986f9e | ||
|
|
7bf5d48315 | ||
|
|
3e451ccdda | ||
|
|
fe3444b124 | ||
|
|
77be066ee8 | ||
|
|
1837f2f7d3 | ||
|
|
f5d52b51f1 | ||
|
|
31ec79eaf6 | ||
|
|
c8ea697db8 | ||
|
|
2140ccbdcc | ||
|
|
7976c22797 | ||
|
|
2c44f9989a | ||
|
|
e61e3bf174 | ||
|
|
89611fa48c | ||
|
|
14f0f90507 | ||
|
|
24ffad74c1 | ||
|
|
6740294ebb | ||
|
|
2e2e4f7e21 | ||
|
|
9dcb18e03d | ||
|
|
0477991b4d | ||
|
|
b1f9b39c4b | ||
|
|
39b11b3ff4 | ||
|
|
7bd420cbfe | ||
|
|
85962b459f | ||
|
|
f6ca776c75 | ||
|
|
70df5f4975 | ||
|
|
c86286ec1d | ||
|
|
261535b32d | ||
|
|
4b7105a65b | ||
|
|
df0309eae0 | ||
|
|
ad4e6a9283 | ||
|
|
59183f66d0 | ||
|
|
fb338c50a3 | ||
|
|
86630350bf | ||
|
|
490c69c64e | ||
|
|
932e53522d | ||
|
|
1de15ad490 | ||
|
|
1f2944a9d0 | ||
|
|
cab7e936a3 | ||
|
|
0326638c90 | ||
|
|
4eb520a342 | ||
|
|
b21e16ad0c | ||
|
|
820669da69 | ||
|
|
8dd03ecf19 | ||
|
|
9e4ed5e591 | ||
|
|
9df60518bb | ||
|
|
c270f8f3e6 | ||
|
|
46dba00756 | ||
|
|
de89bcddae | ||
|
|
0f99c1afb1 | ||
|
|
750daa04d1 | ||
|
|
e4f856e900 |
6
.github/workflows/main.yml
vendored
@@ -16,7 +16,7 @@ jobs:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.15
|
||||
go-version: 1.16
|
||||
id: go
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -45,16 +45,20 @@ jobs:
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmalert
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmbackup
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmrestore
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmctl
|
||||
GOOS=openbsd go build -mod=vendor ./app/victoria-metrics
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmagent
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmalert
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmbackup
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmrestore
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmctl
|
||||
GOOS=darwin go build -mod=vendor ./app/victoria-metrics
|
||||
GOOS=darwin go build -mod=vendor ./app/vmagent
|
||||
GOOS=darwin go build -mod=vendor ./app/vmalert
|
||||
GOOS=darwin go build -mod=vendor ./app/vmbackup
|
||||
GOOS=darwin go build -mod=vendor ./app/vmrestore
|
||||
GOOS=darwin go build -mod=vendor ./app/vmctl
|
||||
CGO_ENABLED=0 GOOS=windows go build -mod=vendor ./app/vmagent
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v1.0.6
|
||||
with:
|
||||
|
||||
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2020 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2021 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
162
Makefile
@@ -18,7 +18,8 @@ all: \
|
||||
vmalert-prod \
|
||||
vmauth-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod
|
||||
vmrestore-prod \
|
||||
vmctl-prod
|
||||
|
||||
include app/*/Makefile
|
||||
include deployment/*/Makefile
|
||||
@@ -32,7 +33,8 @@ publish: \
|
||||
publish-vmalert \
|
||||
publish-vmauth \
|
||||
publish-vmbackup \
|
||||
publish-vmrestore
|
||||
publish-vmrestore \
|
||||
publish-vmctl
|
||||
|
||||
package: \
|
||||
package-victoria-metrics \
|
||||
@@ -40,31 +42,136 @@ package: \
|
||||
package-vmalert \
|
||||
package-vmauth \
|
||||
package-vmbackup \
|
||||
package-vmrestore
|
||||
package-vmrestore \
|
||||
package-vmctl
|
||||
|
||||
vmutils: \
|
||||
vmagent \
|
||||
vmalert \
|
||||
vmauth \
|
||||
vmbackup \
|
||||
vmrestore
|
||||
vmrestore \
|
||||
vmctl
|
||||
|
||||
vmutils-pure: \
|
||||
vmagent-pure \
|
||||
vmalert-pure \
|
||||
vmauth-pure \
|
||||
vmbackup-pure \
|
||||
vmrestore-pure \
|
||||
vmctl-pure
|
||||
|
||||
vmutils-arm64: \
|
||||
vmagent-arm64 \
|
||||
vmalert-arm64 \
|
||||
vmauth-arm64 \
|
||||
vmbackup-arm64 \
|
||||
vmrestore-arm64 \
|
||||
vmctl-arm64
|
||||
|
||||
vmutils-arm: \
|
||||
vmagent-arm \
|
||||
vmalert-arm \
|
||||
vmauth-arm \
|
||||
vmbackup-arm \
|
||||
vmrestore-arm \
|
||||
vmctl-arm
|
||||
|
||||
vmutils-windows-amd64: \
|
||||
vmagent-windows-amd64 \
|
||||
vmalert-windows-amd64 \
|
||||
vmauth-windows-amd64 \
|
||||
vmctl-windows-amd64
|
||||
|
||||
release-snap:
|
||||
snapcraft
|
||||
snapcraft upload "victoriametrics_$(PKG_TAG)_multi.snap" --release beta,edge,candidate
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: victoria-metrics-prod
|
||||
cd bin && tar czf victoria-metrics-$(PKG_TAG).tar.gz victoria-metrics-prod && \
|
||||
sha256sum victoria-metrics-$(PKG_TAG).tar.gz > victoria-metrics-$(PKG_TAG)_checksums.txt
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-amd64 \
|
||||
release-victoria-metrics-arm \
|
||||
release-victoria-metrics-arm64
|
||||
|
||||
release-victoria-metrics-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-victoria-metrics-generic
|
||||
|
||||
release-victoria-metrics-arm:
|
||||
GOARCH=arm $(MAKE) release-victoria-metrics-generic
|
||||
|
||||
release-victoria-metrics-arm64:
|
||||
GOARCH=arm64 $(MAKE) release-victoria-metrics-generic
|
||||
|
||||
release-victoria-metrics-generic: victoria-metrics-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOARCH)||" -czf victoria-metrics-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
victoria-metrics-$(GOARCH)-prod \
|
||||
&& sha256sum victoria-metrics-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
victoria-metrics-$(GOARCH)-prod \
|
||||
| sed s/-$(GOARCH)-prod/-prod/ > victoria-metrics-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
|
||||
release-vmutils: \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmauth-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod
|
||||
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmalert-prod vmauth-prod vmbackup-prod vmrestore-prod && \
|
||||
sha256sum vmutils-$(PKG_TAG).tar.gz > vmutils-$(PKG_TAG)_checksums.txt
|
||||
release-vmutils-amd64 \
|
||||
release-vmutils-arm64 \
|
||||
release-vmutils-arm \
|
||||
release-vmutils-windows-amd64
|
||||
|
||||
release-vmutils-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-vmutils-generic
|
||||
|
||||
release-vmutils-arm64:
|
||||
GOARCH=arm64 $(MAKE) release-vmutils-generic
|
||||
|
||||
release-vmutils-arm:
|
||||
GOARCH=arm $(MAKE) release-vmutils-generic
|
||||
|
||||
release-vmutils-windows-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-vmutils-windows-generic
|
||||
|
||||
release-vmutils-generic: \
|
||||
vmagent-$(GOARCH)-prod \
|
||||
vmalert-$(GOARCH)-prod \
|
||||
vmauth-$(GOARCH)-prod \
|
||||
vmbackup-$(GOARCH)-prod \
|
||||
vmrestore-$(GOARCH)-prod \
|
||||
vmctl-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOARCH)||" -czf vmutils-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOARCH)-prod \
|
||||
vmalert-$(GOARCH)-prod \
|
||||
vmauth-$(GOARCH)-prod \
|
||||
vmbackup-$(GOARCH)-prod \
|
||||
vmrestore-$(GOARCH)-prod \
|
||||
vmctl-$(GOARCH)-prod \
|
||||
&& sha256sum vmutils-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOARCH)-prod \
|
||||
vmalert-$(GOARCH)-prod \
|
||||
vmauth-$(GOARCH)-prod \
|
||||
vmbackup-$(GOARCH)-prod \
|
||||
vmrestore-$(GOARCH)-prod \
|
||||
vmctl-$(GOARCH)-prod \
|
||||
| sed s/-$(GOARCH)-prod/-prod/ > vmutils-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
|
||||
release-vmutils-windows-generic: \
|
||||
vmagent-windows-$(GOARCH)-prod \
|
||||
vmalert-windows-$(GOARCH)-prod \
|
||||
vmauth-windows-$(GOARCH)-prod \
|
||||
vmctl-windows-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
@@ -94,6 +201,7 @@ errcheck: install-errcheck
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmauth/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmctl/...
|
||||
|
||||
install-errcheck:
|
||||
which errcheck || go install github.com/kisielk/errcheck
|
||||
@@ -138,6 +246,9 @@ app-local-pure:
|
||||
app-local-with-goarch:
|
||||
GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-$(GOARCH)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
app-local-windows-with-goarch:
|
||||
CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
quicktemplate-gen: install-qtc
|
||||
qtc
|
||||
|
||||
@@ -151,10 +262,21 @@ golangci-lint: install-golangci-lint
|
||||
install-golangci-lint:
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.29.0
|
||||
|
||||
copy-docs:
|
||||
echo "---\nsort: ${ORDER}\n---\n" > ${DST}
|
||||
cat ${SRC} >> ${DST}
|
||||
|
||||
# Copies docs for all components and adds the order tag.
|
||||
# Cluster docs are supposed to be ordered as 9th.
|
||||
# For The rest of docs is ordered manually.t
|
||||
docs-sync:
|
||||
cp app/vmagent/README.md docs/vmagent.md
|
||||
cp app/vmalert/README.md docs/vmalert.md
|
||||
cp app/vmauth/README.md docs/vmauth.md
|
||||
cp app/vmbackup/README.md docs/vmbackup.md
|
||||
cp app/vmrestore/README.md docs/vmrestore.md
|
||||
cp README.md docs/Single-server-VictoriaMetrics.md
|
||||
SRC=README.md DST=docs/Single-server-VictoriaMetrics.md ORDER=1 $(MAKE) copy-docs
|
||||
SRC=app/vmagent/README.md DST=docs/vmagent.md ORDER=2 $(MAKE) copy-docs
|
||||
SRC=app/vmalert/README.md DST=docs/vmalert.md ORDER=3 $(MAKE) copy-docs
|
||||
SRC=app/vmauth/README.md DST=docs/vmauth.md ORDER=4 $(MAKE) copy-docs
|
||||
SRC=app/vmbackup/README.md DST=docs/vmbackup.md ORDER=5 $(MAKE) copy-docs
|
||||
SRC=app/vmrestore/README.md DST=docs/vmrestore.md ORDER=6 $(MAKE) copy-docs
|
||||
SRC=app/vmctl/README.md DST=docs/vmctl.md ORDER=7 $(MAKE) copy-docs
|
||||
SRC=app/vmgateway/README.md DST=docs/vmgateway.md ORDER=8 $(MAKE) copy-docs
|
||||
|
||||
|
||||
|
||||
440
README.md
@@ -1,3 +1,5 @@
|
||||
# VictoriaMetrics
|
||||
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
[](https://hub.docker.com/r/victoriametrics/victoria-metrics)
|
||||
[](http://slack.victoriametrics.com/)
|
||||
@@ -6,11 +8,9 @@
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
|
||||
[](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
|
||||

|
||||
<img src="logo.png" width="300" alt="Victoria Metrics logo">
|
||||
|
||||
## VictoriaMetrics
|
||||
|
||||
VictoriaMetrics is fast, cost-effective and scalable monitoring solution and time series database.
|
||||
VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database.
|
||||
|
||||
It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
|
||||
[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/), [Snap package](https://snapcraft.io/victoriametrics)
|
||||
@@ -104,6 +104,7 @@ Alphabetically sorted links to case studies:
|
||||
|
||||
* [How to start VictoriaMetrics](#how-to-start-victoriametrics)
|
||||
* [Environment variables](#environment-variables)
|
||||
* [Configuration with snap package](#configuration-with-snap-package)
|
||||
* [Prometheus setup](#prometheus-setup)
|
||||
* [Grafana setup](#grafana-setup)
|
||||
* [How to upgrade VictoriaMetrics](#how-to-upgrade-victoriametrics)
|
||||
@@ -116,6 +117,7 @@ Alphabetically sorted links to case studies:
|
||||
* [Prometheus querying API usage](#prometheus-querying-api-usage)
|
||||
* [Prometheus querying API enhancements](#prometheus-querying-api-enhancements)
|
||||
* [Graphite API usage](#graphite-api-usage)
|
||||
* [Graphite Render API usage](#graphite-render-api-usage)
|
||||
* [Graphite Metrics API usage](#graphite-metrics-api-usage)
|
||||
* [Graphite Tags API usage](#graphite-tags-api-usage)
|
||||
* [How to build from sources](#how-to-build-from-sources)
|
||||
@@ -153,6 +155,7 @@ Alphabetically sorted links to case studies:
|
||||
* [Tuning](#tuning)
|
||||
* [Monitoring](#monitoring)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
* [Data migration](#data-migration)
|
||||
* [Backfilling](#backfilling)
|
||||
* [Data updates](#data-updates)
|
||||
* [Replication](#replication)
|
||||
@@ -168,6 +171,7 @@ Alphabetically sorted links to case studies:
|
||||
* [Font used](#font-used)
|
||||
* [Color Palette](#color-palette)
|
||||
* [We kindly ask](#we-kindly-ask)
|
||||
* [List of command-line flags](#list-of-command-line-flags)
|
||||
|
||||
|
||||
## How to start VictoriaMetrics
|
||||
@@ -180,7 +184,7 @@ The following command-line flags are used the most:
|
||||
* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in the current working directory.
|
||||
* `-retentionPeriod` - retention for stored data. Older data is automatically deleted. Default retention is 1 month. See [these docs](#retention) for more details.
|
||||
|
||||
Other flags have good enough default values, so set them only if you really need this. Pass `-help` to see all the available flags with description and default values.
|
||||
Other flags have good enough default values, so set them only if you really need this. Pass `-help` to see [all the available flags with description and default values](#list-of-command-line-flags).
|
||||
|
||||
See how to [ingest data to VictoriaMetrics](#how-to-import-time-series-data), how to [query VictoriaMetrics](#grafana-setup)
|
||||
and how to [handle alerts](#alerting).
|
||||
@@ -197,6 +201,26 @@ Each flag value can be set via environment variables according to these rules:
|
||||
* For repeating flags an alternative syntax can be used by joining the different values into one using `,` char as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`)
|
||||
* It is possible setting prefix for environment vars with `-envflag.prefix`. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_`
|
||||
|
||||
### Configuration with snap package
|
||||
|
||||
|
||||
Command-line flags can be changed with following command:
|
||||
|
||||
```text
|
||||
echo 'FLAGS="-selfScrapeInterval=10s -search.logSlowQueryDuration=20s"' > $SNAP_DATA/var/snap/victoriametrics/current/extra_flags
|
||||
snap restart victoriametrics
|
||||
```
|
||||
Or add needed command-line flags to the file `$SNAP_DATA/var/snap/victoriametrics/current/extra_flags`.
|
||||
|
||||
Note you cannot change value for `-storageDataPath` flag, for safety snap package has limited access to host system.
|
||||
|
||||
|
||||
Changing scrape configuration is possible with text editor:
|
||||
```text
|
||||
vi $SNAP_DATA/var/snap/victoriametrics/current/etc/victoriametrics-scrape-config.yaml
|
||||
```
|
||||
After changes was made, trigger config re-read with command `curl 127.0.0.1:8248/-/reload`.
|
||||
|
||||
|
||||
## Prometheus setup
|
||||
|
||||
@@ -388,6 +412,12 @@ The `/api/v1/export` endpoint should return the following response:
|
||||
Note that Influx line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
|
||||
while VictoriaMetrics stores them with *milliseconds* precision.
|
||||
|
||||
Extra labels may be added to all the written time series by passing `extra_label=name=value` query args.
|
||||
For example, `/write?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
|
||||
|
||||
Some plugins for Telegraf such as [fluentd](https://github.com/fangli/fluent-plugin-influxdb), [Juniper/open-nti](https://github.com/Juniper/open-nti)
|
||||
or [Juniper/jitmon](https://github.com/Juniper/jtimon) send `SHOW DATABASES` query to `/query` and expect a particular database name in the response.
|
||||
Comma-separated list of expected databases can be passed to VictoriaMetrics via `-influx.databaseNames` command-line flag.
|
||||
|
||||
## How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)
|
||||
|
||||
@@ -425,9 +455,12 @@ The `/api/v1/export` endpoint should return the following response:
|
||||
|
||||
Data sent to VictoriaMetrics via `Graphite plaintext protocol` may be read via the following APIs:
|
||||
|
||||
* [Prometheus querying API](#prometheus-querying-api-usage)
|
||||
* Metric names can be explored via [Graphite metrics API](#graphite-metrics-api-usage)
|
||||
* Tags can be explored via [Graphite tags API](#graphite-tags-api-usage)
|
||||
* [Graphite API](#graphite-api-usage)
|
||||
* [Prometheus querying API](#prometheus-querying-api-usage). Graphite metric names may special chars such as `-`, which may clash
|
||||
with [MetricsQL operations](https://victoriametrics.github.io/MetricsQL.html). Such metrics can be queries via `{__name__="foo-bar.baz"}`.
|
||||
VictoriaMetrics supports `__graphite__` pseudo-label for selecting time series with Graphite-compatible filters in [MetricsQL](https://victoriametrics.github.io/MetricsQL.html).
|
||||
For example, `{__graphite__="foo.*.bar"}` is equivalent to `{__name__=~"foo[.][^.]*[.]bar"}`, but it works faster
|
||||
and it is easier to use when migrating from Graphite to VictoriaMetrics.
|
||||
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/main/cmd/carbonapi/carbonapi.example.victoriametrics.yaml)
|
||||
|
||||
## How to send data from OpenTSDB-compatible agents
|
||||
@@ -503,6 +536,9 @@ The `/api/v1/export` endpoint should return the following response:
|
||||
{"metric":{"__name__":"x.y.z","t1":"v1","t2":"v2"},"values":[45.34],"timestamps":[1566464763000]}
|
||||
```
|
||||
|
||||
Extra labels may be added to all the imported time series by passing `extra_label=name=value` query args.
|
||||
For example, `/api/put?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
|
||||
|
||||
|
||||
## Prometheus querying API usage
|
||||
|
||||
@@ -519,36 +555,72 @@ VictoriaMetrics supports the following handlers from [Prometheus querying API](h
|
||||
* [/api/v1/targets](https://prometheus.io/docs/prometheus/latest/querying/api/#targets) - see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter) for more details.
|
||||
|
||||
These handlers can be queried from Prometheus-compatible clients such as Grafana or curl.
|
||||
All the Prometheus querying API handlers can be prepended with `/prometheus` prefix. For example, both `/prometheus/api/v1/query` and `/api/v1/query` should work.
|
||||
|
||||
|
||||
### Prometheus querying API enhancements
|
||||
|
||||
Additionally to unix timestamps and [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) VictoriaMetrics accepts relative times in `time`, `start` and `end` query args.
|
||||
VictoriaMetrics accepts optional `extra_label=<label_name>=<label_value>` query arg, which can be used for enforcing additional label filters for queries. For example,
|
||||
`/api/v1/query_range?extra_label=user_id=123&query=<query>` would automatically add `{user_id="123"}` label filter to the given `<query>`. This functionality can be used
|
||||
for limiting the scope of time series visible to the given tenant. It is expected that the `extra_label` query arg is automatically set by auth proxy sitting
|
||||
in front of VictoriaMetrics. [Contact us](mailto:sales@victoriametrics.com) if you need assistance with such a proxy.
|
||||
|
||||
VictoriaMetrics accepts relative times in `time`, `start` and `end` query args additionally to unix timestamps and [RFC3339](https://www.ietf.org/rfc/rfc3339.txt).
|
||||
For example, the following query would return data for the last 30 minutes: `/api/v1/query_range?start=-30m&query=...`.
|
||||
|
||||
By default, VictoriaMetrics returns time series for the last 5 minutes from /api/v1/series, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range.
|
||||
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
|
||||
|
||||
By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range.
|
||||
|
||||
VictoriaMetrics accepts additional args for `/api/v1/labels` and `/api/v1/label/.../values` handlers.
|
||||
See [this feature request](https://github.com/prometheus/prometheus/issues/6178) for details:
|
||||
|
||||
* Any number [time series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) via `match[]` query arg.
|
||||
* Optional `start` and `end` query args for limiting the time range for the selected labels or label values.
|
||||
|
||||
See [this feature request](https://github.com/prometheus/prometheus/issues/6178) for details.
|
||||
|
||||
Additionally VictoriaMetrics provides the following handlers:
|
||||
|
||||
* `/api/v1/series/count` - it returns the total number of time series in the database. Some notes:
|
||||
* `/api/v1/series/count` - returns the total number of time series in the database. Some notes:
|
||||
* the handler scans all the inverted index, so it can be slow if the database contains tens of millions of time series;
|
||||
* the handler may count [deleted time series](#how-to-delete-time-series) additionally to normal time series due to internal implementation restrictions;
|
||||
* `/api/v1/labels/count` - it returns a list of `label: values_count` entries. It can be used for determining labels with the maximum number of values.
|
||||
* `/api/v1/status/active_queries` - it returns a list of currently running queries.
|
||||
* `/api/v1/labels/count` - returns a list of `label: values_count` entries. It can be used for determining labels with the maximum number of values.
|
||||
* `/api/v1/status/active_queries` - returns a list of currently running queries.
|
||||
* `/api/v1/status/top_queries` - returns the following query lists:
|
||||
* the most frequently executed queries - `topByCount`
|
||||
* queries with the biggest average execution duration - `topByAvgDuration`
|
||||
* queries that took the most time for execution - `topBySumDuration`
|
||||
|
||||
The number of returned queries can be limited via `topN` query arg. Old queries can be filtered out with `maxLifetime` query arg.
|
||||
For example, request to `/api/v1/status/top_queries?topN=5&maxLifetime=30s` would return up to 5 queries per list, which were executed during the last 30 seconds.
|
||||
VictoriaMetrics tracks the last `-search.queryStats.lastQueriesCount` queries with durations at least `-search.queryStats.minQueryDuration`.
|
||||
|
||||
|
||||
## Graphite API usage
|
||||
|
||||
VictoriaMetrics supports the following Graphite APIs:
|
||||
VictoriaMetrics supports the following Graphite APIs, which are needed for [Graphite datasource in Grafana](https://grafana.com/docs/grafana/latest/datasources/graphite/):
|
||||
|
||||
* Render API - see [these docs](#graphite-render-api-usage).
|
||||
* Metrics API - see [these docs](#graphite-metrics-api-usage).
|
||||
* Tags API - see [these docs](#graphite-tags-api-usage).
|
||||
|
||||
All the Graphite handlers can be pre-pended with `/graphite` prefix. For example, both `/graphite/metrics/find` and `/metrics/find` should work.
|
||||
|
||||
VictoriaMetrics accepts optional `extra_label=<label_name>=<label_value>` query arg for all the Graphite APIs. This arg can be used for limiting the scope of time series
|
||||
visible to the given tenant. It is expected that the `extra_label` query arg is automatically set by auth proxy sitting in front of VictoriaMetrics.
|
||||
[Contact us](mailto:sales@victoriametrics.com) if you need assistance with such a proxy.
|
||||
|
||||
VictoriaMetrics supports `__graphite__` pseudo-label for filtering time series with Graphite-compatible filters in [MetricsQL](https://victoriametrics.github.io/MetricsQL.html).
|
||||
For example, `{__graphite__="foo.*.bar"}` is equivalent to `{__name__=~"foo[.][^.]*[.]bar"}`, but it works faster
|
||||
and it is easier to use when migrating from Graphite to VictoriaMetrics.
|
||||
|
||||
|
||||
### Graphite Render API usage
|
||||
|
||||
[VictoriaMetrics Enterprise](https://victoriametrics.com/enterprise.html) supports [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) subset
|
||||
at `/render` endpoint, which is used by [Graphite datasource in Grafana](https://grafana.com/docs/grafana/latest/datasources/graphite/).
|
||||
It supports `Storage-Step` http request header, which must be set to a step between data points stored in VictoriaMetrics when configuring Graphite datasource in Grafana.
|
||||
|
||||
|
||||
### Graphite Metrics API usage
|
||||
|
||||
@@ -587,14 +659,14 @@ to your needs or when testing bugfixes.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make victoria-metrics` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make victoria-metrics` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make victoria-metrics-prod` from the root folder of the repository.
|
||||
2. Run `make victoria-metrics-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### ARM build
|
||||
@@ -603,24 +675,22 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
|
||||
|
||||
### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make victoria-metrics-arm` or `make victoria-metrics-arm64` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make victoria-metrics-arm` or `make victoria-metrics-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics-arm` or `victoria-metrics-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make victoria-metrics-arm-prod` or `make victoria-metrics-arm64-prod` from the root folder of the repository.
|
||||
2. Run `make victoria-metrics-arm-prod` or `make victoria-metrics-arm64-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics-arm-prod` or `victoria-metrics-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
### Pure Go build (CGO_ENABLED=0)
|
||||
|
||||
`Pure Go` mode builds only Go code without [cgo](https://golang.org/cmd/cgo/) dependencies.
|
||||
This is an experimental mode, which may result in a lower compression ratio and slower decompression performance.
|
||||
Use it with caution!
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make victoria-metrics-pure` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make victoria-metrics-pure` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics-pure` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
@@ -759,10 +829,7 @@ The exported data can be imported to VictoriaMetrics via [/api/v1/import/native]
|
||||
|
||||
### How to export data in JSON line format
|
||||
|
||||
Consider [exporting data in native format](#how-to-export-data-in-native-format) if big amounts of data must be migrated between VictoriaMetrics instances,
|
||||
since exporting in native format usually consumes lower amounts of CPU and memory resources, while the resulting exported data occupies lower amounts of disk space.
|
||||
|
||||
In order to export data in JSON line format, send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
|
||||
Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
|
||||
where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
|
||||
for metrics to export. Use `{__name__!=""}` selector for fetching all the time series.
|
||||
The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
|
||||
@@ -947,6 +1014,7 @@ Note that it could be required to flush response cache after importing historica
|
||||
### How to import data in Prometheus exposition format
|
||||
|
||||
VictoriaMetrics accepts data in [Prometheus exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format)
|
||||
and in [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md)
|
||||
via `/api/v1/import/prometheus` path. For example, the following line imports a single line in Prometheus exposition format into VictoriaMetrics:
|
||||
|
||||
```bash
|
||||
@@ -1108,8 +1176,8 @@ on the same time series if they fall within the same discrete 60s bucket. The e
|
||||
|
||||
The recommended value for `-dedup.minScrapeInterval` must equal to `scrape_interval` config from Prometheus configs.
|
||||
|
||||
The de-duplication reduces disk space usage if multiple identically configured Prometheus instances in HA pair
|
||||
write data to the same VictoriaMetrics instance. Note that these Prometheus instances must have identical
|
||||
The de-duplication reduces disk space usage if multiple identically configured [vmagent](https://victoriametrics.github.io/vmagent.html) or Prometheus instances in HA pair
|
||||
write data to the same VictoriaMetrics instance. These vmagent or Prometheus instances must have identical
|
||||
`external_labels` section in their configs, so they write data to the same time series.
|
||||
|
||||
|
||||
@@ -1250,16 +1318,28 @@ The most interesting metrics are:
|
||||
|
||||
VictoriaMetrics also exposes currently running queries with their execution times at `/api/v1/status/active_queries` page.
|
||||
|
||||
See the example of alerting rules for VM components [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts.yml).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need
|
||||
of tweaking these flag values arises.
|
||||
|
||||
* It is recommended inspecting logs during troubleshooting, since they may contain useful information.
|
||||
|
||||
* It is recommended upgrading to the latest available release from [this page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
|
||||
since the encountered issue could be already fixed there.
|
||||
|
||||
* It is recommended inspecting logs during troubleshooting, since they may contain useful information.
|
||||
* It is recommended to have at least 50% of spare resources for CPU, disk IO and RAM, so VictoriaMetrics could handle short spikes in the workload without performance issues.
|
||||
|
||||
* VictoriaMetrics requires free disk space for [merging data files to bigger ones](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
It may slow down when there is no enough free space left. So make sure `-storageDataPath` directory
|
||||
has at least 20% of free space. The remaining amount of free space
|
||||
can be [monitored](#monitoring) via `vm_free_disk_space_bytes` metric. The total size of data
|
||||
stored on the disk can be monitored via sum of `vm_data_size_bytes` metrics.
|
||||
See also `vm_merge_need_free_disk_space` metrics, which are set to values higher than 0
|
||||
if background merge cannot be initiated due to free disk space shortage. The value shows the number of per-month partitions,
|
||||
which would start background merge if they had more free disk space.
|
||||
|
||||
* VictoriaMetrics buffers incoming data in memory for up to a few seconds before flushing it to persistent storage.
|
||||
This may lead to the following "issues":
|
||||
@@ -1270,22 +1350,16 @@ VictoriaMetrics also exposes currently running queries with their execution time
|
||||
|
||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||
then it is likely you have too many active time series for the current amount of RAM.
|
||||
VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics, which could be used as an indicator of low amounts of RAM.
|
||||
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||
VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics such as `vm_slow_row_inserts_total` and `vm_slow_metric_name_loads_total`, which could be used
|
||||
as an indicator of low amounts of RAM. It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||
ingestion and query performance in this case.
|
||||
|
||||
* If the order of labels for the same metrics can change over time (e.g. if `metric{k1="v1",k2="v2"}` may become `metric{k2="v2",k1="v1"}`),
|
||||
then it is recommended running VictoriaMetrics with `-sortLabels` command-line flag in order to reduce memory usage and CPU usage.
|
||||
|
||||
* VictoriaMetrics prioritizes data ingestion over data querying. So if it has no enough resources for data ingestion,
|
||||
then data querying may slow down significantly.
|
||||
|
||||
* VictoriaMetrics requires free disk space for [merging data files to bigger ones](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
It may slow down when there is no enough free space left. So make sure `-storageDataPath` directory
|
||||
has at least 20% of free space comparing to disk size. The remaining amount of free space
|
||||
can be [monitored](#monitoring) via `vm_free_disk_space_bytes` metric. The total size of data
|
||||
stored on the disk can be monitored via sum of `vm_data_size_bytes` metrics.
|
||||
See also `vm_merge_need_free_disk_space` metrics, which are set to values higher than 0
|
||||
if background merge cannot be initiated due to free disk space shortage. The value shows the number of per-month partitions,
|
||||
which would start background merge if they had more free disk space.
|
||||
|
||||
* If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors,
|
||||
then just remove directories with broken parts. It is safe removing subdirectories under `<-storageDataPath>/data/{big,small}/YYYY_MM` directories
|
||||
when VictoriaMetrics isn't running. This recovers VictoriaMetrics at the cost of data loss stored in the deleted broken parts.
|
||||
@@ -1307,18 +1381,29 @@ VictoriaMetrics also exposes currently running queries with their execution time
|
||||
VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
|
||||
while `topN` equals to 10.
|
||||
|
||||
* New time series can be logged if `-logNewSeries` command-line flag is passed to VictoriaMetrics.
|
||||
|
||||
* VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.
|
||||
This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
|
||||
metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload.
|
||||
|
||||
* If you store Graphite metrics like `foo.bar.baz` in VictoriaMetrics, then `-search.treatDotsAsIsInRegexps` command-line flag could be useful.
|
||||
By default `.` chars in regexps match any char. If you need matching only dots, then the `\\.` must be used in regexp filters.
|
||||
When `-search.treatDotsAsIsInRegexps` option is enabled, then dots in regexps are automatically escaped in order to match only dots instead of arbitrary chars.
|
||||
This may significantly increase performance when locating time series for the given label filters.
|
||||
* If you store Graphite metrics like `foo.bar.baz` in VictoriaMetrics, then use `{__graphite__="foo.*.baz"}` syntax for selecting such metrics.
|
||||
This expression is equivalent to `{__name__=~"foo[.][^.]*[.]baz"}`, but it works faster and it is easier to use when migrating from Graphite.
|
||||
|
||||
* VictoriaMetrics ignores `NaN` values during data ingestion.
|
||||
|
||||
|
||||
## Data migration
|
||||
|
||||
Use [vmctl](https://victoriametrics.github.io/vmctl.html) for data migration. It supports the following data migration types:
|
||||
|
||||
* From Prometheus to VictoriaMetrics
|
||||
* From InfluxDB to VictoriaMetrics
|
||||
* From VictoriaMetrics to VictoriaMetrics
|
||||
|
||||
See [vmctl docs](https://victoriametrics.github.io/vmctl.html) for more details.
|
||||
|
||||
|
||||
## Backfilling
|
||||
|
||||
VictoriaMetrics accepts historical data in arbitrary order of time via [any supported ingestion method](#how-to-import-time-series-data).
|
||||
@@ -1339,7 +1424,7 @@ cache when samples with timestamps older than `now - search.cacheTimestampOffset
|
||||
## Data updates
|
||||
|
||||
VictoriaMetrics doesn't support updating already existing sample values to new ones. It stores all the ingested data points
|
||||
for the same time series with identical timestamps. While is possible substituting old time series with new time series via
|
||||
for the same time series with identical timestamps. While it is possible substituting old time series with new time series via
|
||||
[removal of old time series](#how-to-delete-timeseries) and then [writing new time series](#backfilling), this approach
|
||||
should be used only for one-off updates. It shouldn't be used for frequent updates because of non-zero overhead related to data removal.
|
||||
|
||||
@@ -1358,7 +1443,7 @@ See also [high availability docs](#high-availability) and [backup docs](#backups
|
||||
|
||||
VictoriaMetrics supports backups via [vmbackup](https://victoriametrics.github.io/vmbackup.html)
|
||||
and [vmrestore](https://victoriametrics.github.io/vmrestore.html) tools.
|
||||
We also provide `vmbackuper` tool for paid enterprise subscribers - see [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for details.
|
||||
We also provide `vmbackupmanager` tool for paid enterprise subscribers - see [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for details.
|
||||
|
||||
|
||||
## Profiling
|
||||
@@ -1386,14 +1471,14 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
|
||||
|
||||
* [Helm charts for single-node and cluster versions of VictoriaMetrics](https://github.com/VictoriaMetrics/helm-charts).
|
||||
* [Kubernetes operator for VictoriaMetrics](https://github.com/VictoriaMetrics/operator).
|
||||
* [vmctl tool for data migration to VictoriaMetrics](https://github.com/VictoriaMetrics/vmctl).
|
||||
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
|
||||
See [these docs](https://github.com/netdata/netdata#integrations).
|
||||
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend.
|
||||
See [this example](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
|
||||
See [this example](https://github.com/go-graphite/carbonapi/blob/main/cmd/carbonapi/carbonapi.example.victoriametrics.yaml).
|
||||
* [Ansible role for installing single-node VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
|
||||
* [Ansible role for installing cluster VictoriaMetrics](https://github.com/Slapper/ansible-victoriametrics-cluster-role).
|
||||
* [Snap package for VictoriaMetrics](https://snapcraft.io/victoriametrics).
|
||||
* [vmalert-cli](https://github.com/aorfanos/vmalert-cli) - a CLI application for managing [vmalert](https://victoriametrics.github.io/vmalert.html).
|
||||
|
||||
|
||||
## Third-party contributions
|
||||
@@ -1469,3 +1554,256 @@ Files included in each folder:
|
||||
* There should be sufficient clear space around the logo.
|
||||
* Do not change spacing, alignment, or relative locations of the design elements.
|
||||
* Do not change the proportions of any of the design elements or the design itself. You may resize as needed but must retain all proportions.
|
||||
|
||||
|
||||
## List of command-line flags
|
||||
|
||||
Pass `-help` to VictoriaMetrics in order to see the list of supported command-line flags with their description:
|
||||
|
||||
```
|
||||
-bigMergeConcurrency int
|
||||
The maximum number of CPU cores to use for big merges. Default value is used if set to 0
|
||||
-csvTrimTimestamp duration
|
||||
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-dedup.minScrapeInterval duration
|
||||
Remove superflouos samples from time series if they are located closer to each other than this duration. This may be useful for reducing overhead when multiple identically configured Prometheus instances write data to the same VictoriaMetrics. Deduplication is disabled if the -dedup.minScrapeInterval is 0
|
||||
-deleteAuthKey string
|
||||
authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series and /tags/delSeries
|
||||
-denyQueriesOutsideRetention
|
||||
Whether to deny queries outside of the configured -retentionPeriod. When set, then /api/v1/query_range would return '503 Service Unavailable' error for queries with 'from' value outside -retentionPeriod. This may be useful when multiple data sources with distinct retentions are hidden behind query-tee
|
||||
-dryRun
|
||||
Whether to check only -promscrape.config and then exit. Unknown config entries are allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-finalMergeDelay duration
|
||||
The delay before starting final merge for per-month partition after no new data is ingested into it. Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. Zero value disables final merge
|
||||
-forceFlushAuthKey string
|
||||
authKey, which must be passed in query string to /internal/force_flush pages
|
||||
-forceMergeAuthKey string
|
||||
authKey, which must be passed in query string to /internal/force_merge pages
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-graphiteListenAddr string
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
|
||||
-graphiteTrimTimestamp duration
|
||||
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help spreading incoming load among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for graceful shutdown of HTTP server. Highly loaded server may require increased value for graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this dealy the servier returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections (default ":8428")
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 104857600)
|
||||
-influx.databaseNames array
|
||||
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-influx.maxLineSize size
|
||||
The maximum size in bytes for a single Influx line during parsing
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
|
||||
-influxListenAddr string
|
||||
TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write
|
||||
-influxMeasurementFieldSeparator string
|
||||
Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol (default "_")
|
||||
-influxSkipMeasurement
|
||||
Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'
|
||||
-influxSkipSingleField
|
||||
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field
|
||||
-influxTrimTimestamp duration
|
||||
Trim timestamps for Influx line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
|
||||
-logNewSeries
|
||||
Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
-maxLabelsPerTimeseries int
|
||||
The maximum number of labels accepted per time series. Superfluous labels are dropped (default 30)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
|
||||
-opentsdbListenAddr string
|
||||
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
|
||||
-opentsdbTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-opentsdbhttp.maxInsertRequestSize size
|
||||
The maximum size of OpenTSDB HTTP put request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
-opentsdbhttpTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof. It overrides httpAuth settings
|
||||
-precisionBits int
|
||||
The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64)
|
||||
-promscrape.cluster.memberNum int
|
||||
The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster
|
||||
-promscrape.cluster.membersCount int
|
||||
The number of members in a cluster of scrapers. Each member must have an unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets
|
||||
-promscrape.cluster.replicationFactor int
|
||||
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 2, then the deduplication must be enabled at remote storage side. See https://victoriametrics.github.io/#deduplication (default 1)
|
||||
-promscrape.config string
|
||||
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. See https://victoriametrics.github.io/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
|
||||
-promscrape.config.dryRun
|
||||
Checks -promscrape.config file for errors and unsupported fields and then exits. Returns non-zero exit code on parsing errors and emits these errors to stderr. See also -promscrape.config.strictParse command-line flag. Pass -loggerLevel=ERROR if you don't need to see info messages in the output.
|
||||
-promscrape.config.strictParse
|
||||
Whether to allow only supported fields in -promscrape.config . By default unsupported fields are silently skipped
|
||||
-promscrape.configCheckInterval duration
|
||||
Interval for checking for changes in '-promscrape.config' file. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
Interval for checking for changes in Consul. This works only if consul_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config for details (default 30s)
|
||||
-promscrape.disableCompression
|
||||
Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
-promscrape.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets
|
||||
-promscrape.discovery.concurrency int
|
||||
The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.) (default 100)
|
||||
-promscrape.discovery.concurrentWaitTime duration
|
||||
The maximum duration for waiting to perform API requests if more than -promscrape.discovery.concurrency requests are simultaneously performed (default 1m0s)
|
||||
-promscrape.dnsSDCheckInterval duration
|
||||
Interval for checking for changes in dns. This works only if dns_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config for details (default 30s)
|
||||
-promscrape.dockerswarmSDCheckInterval duration
|
||||
Interval for checking for changes in dockerswarm. This works only if dockerswarm_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config for details (default 30s)
|
||||
-promscrape.dropOriginalLabels
|
||||
Whether to drop original labels for scrape targets at /targets and /api/v1/targets pages. This may be needed for reducing memory usage when original labels for big number of scrape targets occupy big amounts of memory. Note that this reduces debuggability for improper per-target relabeling configs
|
||||
-promscrape.ec2SDCheckInterval duration
|
||||
Interval for checking for changes in ec2. This works only if ec2_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config for details (default 1m0s)
|
||||
-promscrape.eurekaSDCheckInterval duration
|
||||
Interval for checking for changes in eureka. This works only if eureka_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#eureka_sd_config for details (default 30s)
|
||||
-promscrape.fileSDCheckInterval duration
|
||||
Interval for checking for changes in 'file_sd_config'. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config for details (default 30s)
|
||||
-promscrape.gceSDCheckInterval duration
|
||||
Interval for checking for changes in gce. This works only if gce_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config for details (default 1m0s)
|
||||
-promscrape.kubernetes.apiServerTimeout duration
|
||||
How frequently to reload the full state from Kuberntes API server (default 30m0s)
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config for details (default 30s)
|
||||
-promscrape.maxDroppedTargets int
|
||||
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
|
||||
-promscrape.maxScrapeSize size
|
||||
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
|
||||
-promscrape.openstackSDCheckInterval duration
|
||||
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s)
|
||||
-promscrape.streamParse
|
||||
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
-promscrape.suppressDuplicateScrapeTargetErrors
|
||||
Whether to suppress 'duplicate scrape target' errors; see https://victoriametrics.github.io/vmagent.html#troubleshooting for details
|
||||
-promscrape.suppressScrapeErrors
|
||||
Whether to suppress scrape errors logging. The last error for each target is always available at '/targets' page even if scrape errors logging is suppressed
|
||||
-relabelConfig string
|
||||
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. See https://victoriametrics.github.io/#relabeling for details
|
||||
-retentionPeriod value
|
||||
Data with timestamps outside the retentionPeriod is automatically deleted
|
||||
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
|
||||
-search.cacheTimestampOffset duration
|
||||
The maximum duration since the current time for response data, which is always queried from the original raw data, without using the response cache. Increase this value if you see gaps in responses due to time synchronization issues between VictoriaMetrics and data sources (default 5m0s)
|
||||
-search.disableCache
|
||||
Whether to disable response caching. This may be useful during data backfilling
|
||||
-search.latencyOffset duration
|
||||
The time when data points become visible in query results after the collection. Too small value can result in incomplete last points for query results (default 30s)
|
||||
-search.logSlowQueryDuration duration
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging (default 5s)
|
||||
-search.maxConcurrentRequests int
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores. See also -search.maxQueueDuration (default 8)
|
||||
-search.maxExportDuration duration
|
||||
The maximum duration for /api/v1/export call (default 720h0m0s)
|
||||
-search.maxLookback duration
|
||||
Synonim to -search.lookback-delta from Prometheus. The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. See also '-search.maxStalenessInterval' flag, which has the same meaining due to historical reasons
|
||||
-search.maxPointsPerTimeseries int
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph (default 30000)
|
||||
-search.maxQueryDuration duration
|
||||
The maximum duration for query execution (default 30s)
|
||||
-search.maxQueryLen size
|
||||
The maximum search query length in bytes
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
|
||||
-search.maxQueueDuration duration
|
||||
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
|
||||
-search.maxStalenessInterval duration
|
||||
The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.maxLookback' flag, which has the same meaning due to historical reasons
|
||||
-search.maxStatusRequestDuration duration
|
||||
The maximum duration for /api/v1/status/* requests (default 5m0s)
|
||||
-search.maxStepForPointsAdjustment duration
|
||||
The maximum step when /api/v1/query_range handler adjusts points with timestamps closer than -search.latencyOffset to the current time. The adjustment is needed because such points may contain incomplete data (default 1m0s)
|
||||
-search.maxTagKeys int
|
||||
The maximum number of tag keys returned from /api/v1/labels (default 100000)
|
||||
-search.maxTagValueSuffixesPerSearch int
|
||||
The maximum number of tag value suffixes returned from /metrics/find (default 100000)
|
||||
-search.maxTagValues int
|
||||
The maximum number of tag values returned from /api/v1/label/<label_name>/values (default 100000)
|
||||
-search.maxUniqueTimeseries int
|
||||
The maximum number of unique time series each search can scan (default 300000)
|
||||
-search.minStalenessInterval duration
|
||||
The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
|
||||
-search.queryStats.lastQueriesCount int
|
||||
Query stats for /api/v1/status/top_queries is tracked on this number of last queries. Zero value disables query stats tracking (default 20000)
|
||||
-search.queryStats.minQueryDuration duration
|
||||
The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats
|
||||
-search.resetCacheAuthKey string
|
||||
Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call
|
||||
-search.treatDotsAsIsInRegexps
|
||||
Whether to treat dots as is in regexp label filters used in queries. For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter
|
||||
-selfScrapeInstance string
|
||||
Value for 'instance' label, which is added to self-scraped metrics (default "self")
|
||||
-selfScrapeInterval duration
|
||||
Interval for self-scraping own metrics at /metrics page
|
||||
-selfScrapeJob string
|
||||
Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics")
|
||||
-smallMergeConcurrency int
|
||||
The maximum number of CPU cores to use for small merges. Default value is used if set to 0
|
||||
-snapshotAuthKey string
|
||||
authKey, which must be passed in query string to /snapshot* pages
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-storageDataPath string
|
||||
Path to storage data (default "victoria-metrics-data")
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -35,6 +36,7 @@ var (
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
@@ -84,6 +86,9 @@ func main() {
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != "GET" {
|
||||
return false
|
||||
}
|
||||
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics.</h2></br>")
|
||||
fmt.Fprintf(w, "See docs at <a href='https://victoriametrics.github.io/'>https://victoriametrics.github.io/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints: </br>")
|
||||
@@ -115,3 +120,12 @@ func writeAPIHelp(w io.Writer, pathList [][]string) {
|
||||
fmt.Fprintf(w, "<a href='%s'>%q</a> - %s<br/>", p, p, doc)
|
||||
}
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
victoria-metrics is a time series database and monitoring solution.
|
||||
|
||||
See the docs at https://victoriametrics.github.io/
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -58,6 +57,7 @@ var (
|
||||
type test struct {
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
@@ -148,7 +148,7 @@ func setUp() {
|
||||
}
|
||||
|
||||
func processFlags() {
|
||||
envflag.Parse()
|
||||
flag.Parse()
|
||||
for _, fv := range []struct {
|
||||
flag string
|
||||
value string
|
||||
@@ -209,7 +209,7 @@ func testWrite(t *testing.T) {
|
||||
t.Errorf("error compressing %v %s", r, err)
|
||||
t.Fail()
|
||||
}
|
||||
httpWrite(t, testPromWriteHTTPPath, bytes.NewBuffer(data))
|
||||
httpWrite(t, testPromWriteHTTPPath, test.InsertQuery, bytes.NewBuffer(data))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -218,7 +218,7 @@ func testWrite(t *testing.T) {
|
||||
test := x
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpWrite(t, testWriteHTTPPath, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
httpWrite(t, testWriteHTTPPath, test.InsertQuery, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
})
|
||||
}
|
||||
})
|
||||
@@ -246,7 +246,7 @@ func testWrite(t *testing.T) {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
logger.Infof("writing %s", test.Data)
|
||||
httpWrite(t, testOpenTSDBWriteHTTPPath, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
httpWrite(t, testOpenTSDBWriteHTTPPath, test.InsertQuery, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
})
|
||||
}
|
||||
})
|
||||
@@ -324,10 +324,10 @@ func readIn(readFor string, t *testing.T, insertTime time.Time) []test {
|
||||
return tt
|
||||
}
|
||||
|
||||
func httpWrite(t *testing.T, address string, r io.Reader) {
|
||||
func httpWrite(t *testing.T, address, query string, r io.Reader) {
|
||||
t.Helper()
|
||||
s := newSuite(t)
|
||||
resp, err := http.Post(address, "", r)
|
||||
resp, err := http.Post(address+query, "", r)
|
||||
s.noError(err)
|
||||
s.noError(resp.Body.Close())
|
||||
s.equalInt(resp.StatusCode, 204)
|
||||
|
||||
12
app/victoria-metrics/multiarch/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8428
|
||||
ENTRYPOINT ["/victoria-metrics-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY victoria-metrics-${TARGETARCH}-prod ./victoria-metrics-prod
|
||||
@@ -11,6 +11,6 @@
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
{"metric":{"item":"y"},"values":[["{TIME_S-1m}","0.5"],["{TIME_S}","0.5"]]}
|
||||
{"metric":{"item":"y"},"values":[["{TIME_S-1m}","0.5"]]}
|
||||
]}}
|
||||
}
|
||||
|
||||
17
app/victoria-metrics/testdata/graphite/graphite-selector.json
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"name": "graphite-selector",
|
||||
"issue": "",
|
||||
"data": [
|
||||
"graphite-selector.bar.baz 1 {TIME_S-1m}",
|
||||
"graphite-selector.xxx.yy 2 {TIME_S-1m}",
|
||||
"graphite-selector.bb.cc 3 {TIME_S-1m}",
|
||||
"graphite-selector.a.baz 4 {TIME_S-1m}"],
|
||||
"query": ["/api/v1/query?query=sort({__graphite__='graphite-selector.*.baz'})&time={TIME_S-1m}"],
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"vector","result":[
|
||||
{"metric":{"__name__":"graphite-selector.bar.baz"},"value":["{TIME_S-1m}","1"]},
|
||||
{"metric":{"__name__":"graphite-selector.a.baz"},"value":["{TIME_S-1m}","4"]}
|
||||
]}
|
||||
}
|
||||
}
|
||||
16
app/victoria-metrics/testdata/graphite/name-plus-negative-filter.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"name": "name-plus-negative-filter",
|
||||
"issue": "",
|
||||
"data": [
|
||||
"name-plus-negative-filter;foo=123 1 {TIME_S-1m}",
|
||||
"name-plus-negative-filter;bar=123 2 {TIME_S-1m}",
|
||||
"name-plus-negative-filter;foo=qwe 3 {TIME_S-1m}"
|
||||
],
|
||||
"query": ["/api/v1/query?query={__name__='name-plus-negative-filter',foo!='123'}&time={TIME_S-1m}"],
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"vector","result":[
|
||||
{"metric":{"__name__":"name-plus-negative-filter","foo":"qwe"},"value":["{TIME_S-1m}","3"]}
|
||||
]}
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,6 @@
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
{"metric":{"__name__":"not_nan_as_missing_data","item":"x"},"values":[["{TIME_S-2m}","2"]]},
|
||||
{"metric":{"__name__":"not_nan_as_missing_data","item":"y"},"values":[["{TIME_S-2m}","4"],["{TIME_S-1m}","3"],["{TIME_S}","3"]]}
|
||||
{"metric":{"__name__":"not_nan_as_missing_data","item":"y"},"values":[["{TIME_S-2m}","4"],["{TIME_S-1m}","3"]]}
|
||||
]}}
|
||||
}
|
||||
|
||||
10
app/victoria-metrics/testdata/influxdb/with_extra_labels.json
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"name": "insert_with_extra_labels",
|
||||
"data": ["measurement,tag1=value1,tag2=value2 field6=1.23,field5=123 {TIME_NS}"],
|
||||
"insert_query": "?extra_label=job=test&extra_label=tag2=value10",
|
||||
"query": ["/api/v1/export?match={__name__!=''}"],
|
||||
"result_metrics": [
|
||||
{"metric":{"__name__":"measurement_field5","tag1":"value1","job": "test","tag2":"value10"},"values":[123], "timestamps": ["{TIME_MS}"]},
|
||||
{"metric":{"__name__":"measurement_field6","tag1":"value1","job": "test","tag2":"value10"},"values":[1.23], "timestamps": ["{TIME_MS}"]}
|
||||
]
|
||||
}
|
||||
9
app/victoria-metrics/testdata/opentsdbhttp/with_extra_labels.json
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "insert_with_extra_labels",
|
||||
"data": ["{\"metric\": \"opentsdbhttp.foobar\", \"value\": 1001, \"timestamp\": {TIME_S}, \"tags\": {\"bar\":\"baz\", \"x\": \"y\"}}"],
|
||||
"insert_query": "?extra_label=job=open-test&extra_label=x=z",
|
||||
"query": ["/api/v1/export?match={__name__!=''}"],
|
||||
"result_metrics": [
|
||||
{"metric":{"__name__":"opentsdbhttp.foobar","bar":"baz","x":"z","job": "open-test"},"values":[1001], "timestamps": ["{TIME_MSZ}"]}
|
||||
]
|
||||
}
|
||||
9
app/victoria-metrics/testdata/prometheus/with_extra_labels.json
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "basic_insertion_with_extra_labels",
|
||||
"insert_query": "?extra_label=job=prom-test&extra_label=baz=bar",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"prometheus.foobar\"},{\"name\":\"baz\",\"value\":\"qux\"}],\"samples\":[{\"value\":100000,\"timestamp\":\"{TIME_MS}\"}]}]"],
|
||||
"query": ["/api/v1/export?match={__name__!=''}"],
|
||||
"result_metrics": [
|
||||
{"metric":{"__name__":"prometheus.foobar","baz":"bar","job": "prom-test"},"values":[100000], "timestamps": ["{TIME_MS}"]}
|
||||
]
|
||||
}
|
||||
8
app/victoria-metrics/testdata/prometheus/with_request_extra_filter.json
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "basic_select_with_extra_labels",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"prometheus.tenant.limits\"},{\"name\":\"baz\",\"value\":\"qux\"},{\"name\":\"tenant\",\"value\":\"dev\"}],\"samples\":[{\"value\":100000,\"timestamp\":\"{TIME_MS}\"}]},{\"labels\":[{\"name\":\"__name__\",\"value\":\"prometheus.up\"},{\"name\":\"baz\",\"value\":\"qux\"}],\"samples\":[{\"value\":100000,\"timestamp\":\"{TIME_MS}\"}]}]"],
|
||||
"query": ["/api/v1/export?match={__name__!=''}&extra_label=tenant=dev"],
|
||||
"result_metrics": [
|
||||
{"metric":{"__name__":"prometheus.tenant.limits","baz":"qux","tenant": "dev"},"values":[100000], "timestamps": ["{TIME_MS}"]}
|
||||
]
|
||||
}
|
||||
@@ -78,3 +78,9 @@ vmagent-local-with-goarch:
|
||||
|
||||
vmagent-pure:
|
||||
APP_NAME=vmagent $(MAKE) app-local-pure
|
||||
|
||||
vmagent-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=vmagent $(MAKE) app-local-windows-with-goarch
|
||||
|
||||
vmagent-windows-amd64-prod:
|
||||
APP_NAME=vmagent $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
@@ -1,26 +1,26 @@
|
||||
## vmagent
|
||||
# vmagent
|
||||
|
||||
`vmagent` is a tiny but brave agent, which helps you collect metrics from various sources
|
||||
and stores them in [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
or any other Prometheus-compatible storage system that supports the `remote_write` protocol.
|
||||
`vmagent` is a tiny but mighty agent which helps you collect metrics from various sources
|
||||
and store them in [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
or any other Prometheus-compatible storage systems that support the `remote_write` protocol.
|
||||
|
||||
<img alt="vmagent" src="vmagent.png">
|
||||
|
||||
|
||||
### Motivation
|
||||
## Motivation
|
||||
|
||||
While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
|
||||
and RAM friendly to scrape metrics from Prometheus-compatible exporters to VictoriaMetrics.
|
||||
Also, we found that users’ infrastructure are snowflakes - no two are alike, and we decided to add more flexibility
|
||||
to `vmagent` (like the ability to push metrics instead of pulling them). We did our best and plan to do even more.
|
||||
and RAM friendly to scrape metrics from Prometheus-compatible exporters into VictoriaMetrics.
|
||||
Also, we found that our user's infrastructure are like snowflakes in that no two are alike. Therefore we decided to add more flexibility
|
||||
to `vmagent` such as the ability to push metrics instead of pulling them. We did our best and will continue to improve vmagent.
|
||||
|
||||
|
||||
### Features
|
||||
## Features
|
||||
|
||||
* Can be used as drop-in replacement for Prometheus for scraping targets such as [node_exporter](https://github.com/prometheus/node_exporter).
|
||||
* Can be used as a drop-in replacement for Prometheus for scraping targets such as [node_exporter](https://github.com/prometheus/node_exporter).
|
||||
See [Quick Start](#quick-start) for details.
|
||||
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
|
||||
* Accepts data via all the ingestion protocols supported by VictoriaMetrics:
|
||||
* Accepts data via all ingestion protocols supported by VictoriaMetrics:
|
||||
* Influx line protocol via `http://<vmagent>:8429/write`. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf).
|
||||
* Graphite plaintext protocol if `-graphiteListenAddr` command-line flag is set. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-send-data-from-graphite-compatible-agents-such-as-statsd).
|
||||
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-send-data-from-opentsdb-compatible-agents).
|
||||
@@ -30,19 +30,20 @@ to `vmagent` (like the ability to push metrics instead of pulling them). We did
|
||||
* Data in Prometheus exposition format. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-import-data-in-prometheus-exposition-format) for details.
|
||||
* Arbitrary CSV data via `http://<vmagent>:8429/api/v1/import/csv`. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-import-csv-data).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems.
|
||||
* Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as connection
|
||||
to remote storage is recovered. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared to Prometheus.
|
||||
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as the connection
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared with Prometheus.
|
||||
* Scrape targets can be spread among multiple `vmagent` instances when big number of targets must be scraped. See [these docs](#scraping-big-number-of-targets) for details.
|
||||
|
||||
|
||||
### Quick Start
|
||||
## Quick Start
|
||||
|
||||
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
|
||||
and pass the following flags to `vmagent` binary in order to start scraping Prometheus targets:
|
||||
Please download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
|
||||
and configure the following flags to the `vmagent` binary in order to start scraping Prometheus targets:
|
||||
|
||||
* `-promscrape.config` with the path to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`)
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. The `-remoteWrite.url` argument can be specified multiple times in order to replicate data concurrently to an arbitrary number of remote storage systems.
|
||||
* `-promscrape.config` with the path to Prometheus config file (usually located at `/etc/prometheus/prometheus.yml`)
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics, the `-remoteWrite.url` argument can be specified multiple times to replicate data concurrently to an arbitrary number of remote storage systems.
|
||||
|
||||
Example command line:
|
||||
|
||||
@@ -50,7 +51,7 @@ Example command line:
|
||||
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
If you only need to collect Influx data, then the following is sufficient:
|
||||
If you only need to collect Influx data, then the following command is sufficient:
|
||||
|
||||
```
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
@@ -60,10 +61,10 @@ Then send Influx data to `http://vmagent-host:8429`. See [these docs](https://vi
|
||||
|
||||
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags).
|
||||
|
||||
Pass `-help` to `vmagent` in order to see the full list of supported command-line flags with their descriptions.
|
||||
Pass `-help` to `vmagent` in order to see [the full list of supported command-line flags with their descriptions](#advanced-usage).
|
||||
|
||||
|
||||
### Configuration update
|
||||
## Configuration update
|
||||
|
||||
`vmagent` should be restarted in order to update config options set via command-line args.
|
||||
|
||||
@@ -79,141 +80,149 @@ Pass `-help` to `vmagent` in order to see the full list of supported command-lin
|
||||
There is also `-promscrape.configCheckInterval` command-line option, which can be used for automatic reloading configs from updated `-promscrape.config` file.
|
||||
|
||||
|
||||
### Use cases
|
||||
## Use cases
|
||||
|
||||
|
||||
#### IoT and Edge monitoring
|
||||
### IoT and Edge monitoring
|
||||
|
||||
`vmagent` can run and collect metrics in IoT and industrial networks with unreliable or scheduled connections to the remote storage.
|
||||
`vmagent` can run and collect metrics in IoT and industrial networks with unreliable or scheduled connections to their remote storage.
|
||||
It buffers the collected data in local files until the connection to remote storage becomes available and then sends the buffered
|
||||
data to the remote storage. It re-tries sending the data to remote storage on any errors.
|
||||
data to the remote storage. It re-tries sending the data to remote storage until any errors are resolved.
|
||||
The maximum buffer size can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
`vmagent` works on various architectures from IoT world - 32-bit arm, 64-bit arm, ppc64, 386, amd64.
|
||||
`vmagent` works on various architectures from the IoT world - 32-bit arm, 64-bit arm, ppc64, 386, amd64.
|
||||
See [the corresponding Makefile rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/Makefile) for details.
|
||||
|
||||
|
||||
#### Drop-in replacement for Prometheus
|
||||
### Drop-in replacement for Prometheus
|
||||
|
||||
If you use Prometheus only for scraping metrics from various targets and forwarding these metrics to remote storage,
|
||||
then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such a setup.
|
||||
If you use Prometheus only for scraping metrics from various targets and forwarding those metrics to remote storage
|
||||
then `vmagent` can replace Prometheus. Typically, `vmagent` requires lower amounts of RAM, CPU and network bandwidth compared with Prometheus.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
|
||||
#### Replication and high availability
|
||||
### Replication and high availability
|
||||
|
||||
`vmagent` replicates the collected metrics among multiple remote storage instances configured via `-remoteWrite.url` args.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instances.
|
||||
`vmagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again.
|
||||
Then it sends the buffered data to the remote storage in order to prevent data gaps in the remote storage.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instance.
|
||||
`vmagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again and then it sends the buffered data to the remote storage in order to prevent data gaps.
|
||||
|
||||
|
||||
#### Relabeling and filtering
|
||||
### Relabeling and filtering
|
||||
|
||||
`vmagent` can add, remove or update labels on the collected data before sending it to remote storage. Additionally,
|
||||
`vmagent` can add, remove or update labels on the collected data before sending it to the remote storage. Additionally,
|
||||
it can remove unwanted samples via Prometheus-like relabeling before sending the collected data to remote storage.
|
||||
See [these docs](#relabeling) for details.
|
||||
Please see [these docs](#relabeling) for details.
|
||||
|
||||
|
||||
#### Splitting data streams among multiple systems
|
||||
### Splitting data streams among multiple systems
|
||||
|
||||
`vmagent` supports splitting the collected data between muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
which is applied independently for each configured `-remoteWrite.url` destination. For instance, it is possible to replicate or split
|
||||
data among long-term remote storage, short-term remote storage and real-time analytical system [built on top of Kafka](https://github.com/Telefonica/prometheus-kafka-adapter).
|
||||
Note that each destination can receive its own subset of the collected data thanks to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
which is applied independently for each configured `-remoteWrite.url` destination. For example, it is possible to replicate or split
|
||||
data among long-term remote storage, short-term remote storage and a real-time analytical system [built on top of Kafka](https://github.com/Telefonica/prometheus-kafka-adapter).
|
||||
Note that each destination can receive it's own subset of the collected data due to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
|
||||
|
||||
#### Prometheus remote_write proxy
|
||||
### Prometheus remote_write proxy
|
||||
|
||||
`vmagent` may be used as a proxy for Prometheus data sent via Prometheus `remote_write` protocol. It can accept data via `remote_write` API
|
||||
at `/api/v1/write` endpoint, apply relabeling and filtering and then proxy it to another `remote_write` systems.
|
||||
`vmagent` can be used as a proxy for Prometheus data sent via Prometheus `remote_write` protocol. It can accept data via the `remote_write` API
|
||||
at the`/api/v1/write` endpoint. Then apply relabeling and filtering and proxy it to another `remote_write` system .
|
||||
The `vmagent` can be configured to encrypt the incoming `remote_write` requests with `-tls*` command-line flags.
|
||||
Additionally, Basic Auth can be enabled for the incoming `remote_write` requests with `-httpAuth.*` command-line flags.
|
||||
Also, Basic Auth can be enabled for the incoming `remote_write` requests with `-httpAuth.*` command-line flags.
|
||||
|
||||
|
||||
### remote_write for clustered version
|
||||
|
||||
### How to collect metrics in Prometheus format
|
||||
While `vmagent` can accept data in several supported protocols (OpenTSDB, Influx, Prometheus, Graphite) and scrape data from various targets, writes are always peformed in Promethes remote_write protocol. Therefore for the [clustered version](https://victoriametrics.github.io/Cluster-VictoriaMetrics.html), `-remoteWrite.url` the command-line flag should be configured as `<schema>://<vminsert-host>:8480/insert/<customer-id>/prometheus/api/v1/write`
|
||||
|
||||
Pass the path to `prometheus.yml` to `-promscrape.config` command-line flag. `vmagent` takes into account the following
|
||||
|
||||
## How to collect metrics in Prometheus format
|
||||
|
||||
Specify the path to `prometheus.yml` file via `-promscrape.config` command-line flag. `vmagent` takes into account the following
|
||||
sections from [Prometheus config file](https://prometheus.io/docs/prometheus/latest/configuration/configuration/):
|
||||
|
||||
* `global`
|
||||
* `scrape_configs`
|
||||
|
||||
All the other sections are ignored, including [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section.
|
||||
Use `-remoteWrite.*` command-line flags instead for configuring remote write settings.
|
||||
All other sections are ignored, including the [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section.
|
||||
Use `-remoteWrite.*` command-line flag instead for configuring remote write settings.
|
||||
|
||||
The following scrape types in [scrape_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) section are supported:
|
||||
|
||||
* `static_configs` - for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
|
||||
* `file_sd_configs` - for scraping targets defined in external files aka file-based service discover.
|
||||
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
|
||||
* `static_configs` - is for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
|
||||
* `file_sd_configs` - is for scraping targets defined in external files (aka file-based service discover).
|
||||
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details
|
||||
* `kubernetes_sd_configs` - for scraping targets in Kubernetes (k8s).
|
||||
See [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) for details.
|
||||
* `ec2_sd_configs` - for scraping targets in Amazon EC2.
|
||||
* `ec2_sd_configs` - is for scraping targets in Amazon EC2.
|
||||
See [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) for details.
|
||||
`vmagent` doesn't support `profile` config param and aws credentials file yet.
|
||||
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
|
||||
`vmagent` doesn't support the `profile` config param yet.
|
||||
* `gce_sd_configs` - is for scraping targets in Google Compute Engine (GCE).
|
||||
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
|
||||
`vmagent` provides the following additional functionality for `gce_sd_config`:
|
||||
* if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
|
||||
* if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
|
||||
* if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
|
||||
* `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
|
||||
* `consul_sd_configs` - for scraping targets registered in Consul.
|
||||
* if `project` arg is missing then `vmagent` uses the project for the instance where it runs;
|
||||
* if `zone` arg is missing then `vmagent` uses the zone for the instance where it runs;
|
||||
* if `zone` arg is equal to `"*"`, then `vmagent` discovers all the zones for the given project;
|
||||
* `zone` may contain an arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
|
||||
* `consul_sd_configs` - is for scraping the targets registered in Consul.
|
||||
See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details.
|
||||
* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
|
||||
* `dns_sd_configs` - is for scraping targets discovered from DNS records (SRV, A and AAAA).
|
||||
See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.
|
||||
* `openstack_sd_configs` - for scraping OpenStack targets.
|
||||
* `openstack_sd_configs` - is for scraping OpenStack targets.
|
||||
See [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) for details.
|
||||
[OpenStack identity API v3](https://docs.openstack.org/api-ref/identity/v3/) is supported only.
|
||||
* `dockerswarm_sd_configs` - for scraping Docker Swarm targets.
|
||||
* `dockerswarm_sd_configs` - is for scraping Docker Swarm targets.
|
||||
See [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config) for details.
|
||||
* `eureka_sd_configs` - for scraping targets registered in [Netflix Eureka](https://github.com/Netflix/eureka).
|
||||
* `eureka_sd_configs` - is for scraping targets registered in [Netflix Eureka](https://github.com/Netflix/eureka).
|
||||
See [eureka_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#eureka_sd_config) for details.
|
||||
|
||||
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
Please file feature requests to [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
|
||||
`vmagent` also support the following additional options in `scrape_config` section:
|
||||
`vmagent` also support the following additional options in `scrape_configs` section:
|
||||
|
||||
* `disable_compression: true` - for disabling response compression on a per-job basis. By default `vmagent` requests compressed responses from scrape targets
|
||||
in order to save network bandwidth.
|
||||
* `disable_keepalive: true` - for disabling [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis.
|
||||
By default `vmagent` uses keep-alive connections to scrape targets in order to reduce overhead on connection re-establishing.
|
||||
* `disable_compression: true` - to disable response compression on a per-job basis. By default `vmagent` requests compressed responses from scrape targets
|
||||
to save network bandwidth.
|
||||
* `disable_keepalive: true` - to disable [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis.
|
||||
By default, `vmagent` uses keep-alive connections to scrape targets to reduce overhead on connection re-establishing.
|
||||
* `stream_parse: true` - for scraping targets in a streaming manner. This may be useful for targets exporting big number of metrics.
|
||||
|
||||
Note that `vmagent` doesn't support `refresh_interval` option these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
|
||||
Note that `vmagent` doesn't support `refresh_interval` option for these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
|
||||
command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs`
|
||||
entries to 60s. Run `vmagent -help` in order to see default values for `-promscrape.*CheckInterval` flags.
|
||||
entries to 60s. Run `vmagent -help` in order to see default values for the `-promscrape.*CheckInterval` flags.
|
||||
|
||||
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
|
||||
|
||||
### Adding labels to metrics
|
||||
## Adding labels to metrics
|
||||
|
||||
Labels can be added to metrics via the following mechanisms:
|
||||
Labels can be added to metrics by the following mechanisms:
|
||||
|
||||
* Via `global -> external_labels` section in `-promscrape.config` file. These labels are added only to metrics scraped from targets configured in `-promscrape.config` file.
|
||||
* Via `-remoteWrite.label` command-line flag. These labels are added to all the collected metrics before sending them to `-remoteWrite.url`.
|
||||
* The `global -> external_labels` section in `-promscrape.config` file. These labels are added only to metrics scraped from targets configured in the `-promscrape.config` file. They aren't added to metrics collected via other [data ingestion protocols](https://victoriametrics.github.io/#how-to-import-time-series-data).
|
||||
* The `-remoteWrite.label` command-line flag. These labels are added to all the collected metrics before sending them to `-remoteWrite.url`. For example, the following command will start `vmagent`, which will add `{datacenter="foobar"}` label to all the metrics pushed to all the configured remote storage systems (all the `-remoteWrite.url` flag values):
|
||||
|
||||
```
|
||||
/path/to/vmagent -remoteWrite.label=datacenter=foobar ...
|
||||
```
|
||||
|
||||
|
||||
### Relabeling
|
||||
## Relabeling
|
||||
|
||||
`vmagent` supports [Prometheus relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config).
|
||||
Additionally it provides the following extra actions:
|
||||
and also provides the following actions:
|
||||
|
||||
* `replace_all`: replaces all the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the result in the `target_label`.
|
||||
* `labelmap_all`: replaces all the occurences of `regex` in all the label names with the `replacement`.
|
||||
* `keep_if_equal`: keeps the entry if all label values from `source_labels` are equal.
|
||||
* `replace_all`: replaces all of the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the results in the `target_label`.
|
||||
* `labelmap_all`: replaces all of the occurences of `regex` in all the label names with the `replacement`.
|
||||
* `keep_if_equal`: keeps the entry if all the label values from `source_labels` are equal.
|
||||
* `drop_if_equal`: drops the entry if all the label values from `source_labels` are equal.
|
||||
|
||||
The relabeling can be defined in the following places:
|
||||
|
||||
* At `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is applied to target labels.
|
||||
* At `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is applied to all the scraped metrics in the given `scrape_config`.
|
||||
* At `-remoteWrite.relabelConfig` file. This relabeling is aplied to all the collected metrics before sending them to remote storage.
|
||||
* At `-remoteWrite.urlRelabelConfig` files. This relabeling is applied to metrics before sending them to the corresponding `-remoteWrite.url`.
|
||||
* At the `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is applied to target labels.
|
||||
* At the `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is applied to all the scraped metrics in the given `scrape_config`.
|
||||
* At the `-remoteWrite.relabelConfig` file. This relabeling is aplied to all the collected metrics before sending them to remote storage.
|
||||
* At the `-remoteWrite.urlRelabelConfig` files. This relabeling is applied to metrics before sending them to the corresponding `-remoteWrite.url`.
|
||||
|
||||
Read more about relabeling in the following articles:
|
||||
You can read more about relabeling in the following articles:
|
||||
|
||||
* [How to use Relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2)
|
||||
* [Life of a label](https://www.robustperception.io/life-of-a-label)
|
||||
@@ -223,44 +232,108 @@ Read more about relabeling in the following articles:
|
||||
* [relabel_configs vs metric_relabel_configs](https://www.robustperception.io/relabel_configs-vs-metric_relabel_configs)
|
||||
|
||||
|
||||
### Monitoring
|
||||
## Scraping big number of targets
|
||||
|
||||
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
|
||||
A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc.
|
||||
In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` horizontal scaling and clustering).
|
||||
Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values.
|
||||
The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster.
|
||||
The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands
|
||||
spread scrape targets among a cluster of two `vmagent` instances:
|
||||
|
||||
```
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
|
||||
```
|
||||
|
||||
By default each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
|
||||
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
|
||||
start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances:
|
||||
|
||||
```
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ...
|
||||
```
|
||||
|
||||
If each target is scraped by multiple `vmagent` instances, then data deduplication must be enabled at remote storage pointed by `-remoteWrite.url`.
|
||||
See [these docs](https://victoriametrics.github.io/#deduplication) for details.
|
||||
|
||||
|
||||
## Scraping targets via a proxy
|
||||
|
||||
`vmagent` supports scraping targets via http, https and socks5 proxies. Proxy address must be specified in `proxy_url` option. For example, the following scrape config instructs
|
||||
target scraping via https proxy at `https://proxy-addr:1234`:
|
||||
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
proxy_url: https://proxy-addr:1234
|
||||
```
|
||||
|
||||
Proxy can be configured with the following optional settings:
|
||||
|
||||
* `proxy_authorization` for generic token authorization. See [Prometheus docs for details on authorization section](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config)
|
||||
* `proxy_bearer_token` and `proxy_bearer_token_file` for Bearer token authorization
|
||||
* `proxy_basic_auth` for Basic authorization. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config).
|
||||
* `proxy_tls_config` for TLS config. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config).
|
||||
|
||||
For example:
|
||||
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
proxy_url: https://proxy-addr:1234
|
||||
proxy_basic_auth:
|
||||
username: foobar
|
||||
password: secret
|
||||
proxy_tls_config:
|
||||
insecure_skip_verify: true
|
||||
cert_file: /path/to/cert
|
||||
key_file: /path/to/key
|
||||
ca_file: /path/to/ca
|
||||
server_name: real-server-name
|
||||
```
|
||||
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. We recommend setting up regular scraping of this page
|
||||
either through `vmagent` itself or by Prometheus so that the exported metrics may be analyzed later.
|
||||
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview.
|
||||
If you have suggestions, improvements or found a bug - feel free to open an issue on github or add review to the dashboard.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add a review to the dashboard.
|
||||
|
||||
`vmagent` also exports target statuses at the following handlers:
|
||||
`vmagent` also exports the status for various targets at the following handlers:
|
||||
|
||||
* `http://vmagent-host:8429/targets`. This handler returns human-readable plaintext status for every active target.
|
||||
This page is convenient to query from command line with `wget`, `curl` or similar tools.
|
||||
It accepts optional `show_original_labels=1` query arg, which shows the original labels per each target before applying relabeling.
|
||||
* `http://vmagent-host:8429/targets`. This handler returns human-readable status for every active target.
|
||||
This page is easy to query from the command line with `wget`, `curl` or similar tools.
|
||||
It accepts optional `show_original_labels=1` query arg which shows the original labels per each target before applying the relabeling.
|
||||
This information may be useful for debugging target relabeling.
|
||||
* `http://vmagent-host:8429/api/v1/targets`. This handler returns data compatible with [the corresponding page from Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets).
|
||||
|
||||
* `http://vmagent-host:8429/ready`. This handler returns http 200 status code when `vmagent` finishes initialization for all service_discovery configs.
|
||||
It may be useful for performing `vmagent` rolling update without scrape loss.
|
||||
* `http://vmagent-host:8429/ready`. This handler returns http 200 status code when `vmagent` finishes it's initialization for all service_discovery configs.
|
||||
It may be useful to perform `vmagent` rolling update without any scrape loss.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
## Troubleshooting
|
||||
|
||||
* It is recommended [setting up the official Grafana dashboard](#monitoring) in order to monitor `vmagent` state.
|
||||
* We recommend you [set up the official Grafana dashboard](#monitoring) in order to monitor the state of `vmagent'.
|
||||
|
||||
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
|
||||
since `vmagent` establishes at least a single TCP connection per each target.
|
||||
* We recommend you increase the maximum number of open files in the system (`ulimit -n`) when scraping a big number of targets,
|
||||
as `vmagent` establishes at least a single TCP connection per target.
|
||||
|
||||
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
|
||||
* When `vmagent` scrapes many unreliable targets, it can flood the error log with scrape errors. These errors can be suppressed
|
||||
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`
|
||||
and `http://vmagent-host:8429/api/v1/targets`.
|
||||
|
||||
* The `/api/v1/targets` page could be useful for debugging relabeling process for scrape targets.
|
||||
This page contains original labels for targets dropped during relabeling (see "droppedTargets" section in the page output). By default up to `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling, then increase `-promscrape.maxDroppedTargets` command-line flag value in order to see all the dropped targets. Note that tracking each dropped target requires up to 10Kb of RAM, so big values for `-promscrape.maxDroppedTargets` may result in increased memory usage if big number of scrape targets are dropped during relabeling.
|
||||
This page contains original labels for targets dropped during relabeling (see "droppedTargets" section in the page output). By default the `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling, then increase `-promscrape.maxDroppedTargets` command-line flag value to see all the dropped targets. Note that tracking each dropped target requires up to 10Kb of RAM. Therefore big values for `-promscrape.maxDroppedTargets` may result in increased memory usage if a big number of scrape targets are dropped during relabeling.
|
||||
|
||||
* If `vmagent` scrapes big number of targets, then `-promscrape.dropOriginalLabels` command-line option may be passed to `vmagent` in order to reduce memory usage.
|
||||
* If `vmagent` scrapes a big number of targets then the `-promscrape.dropOriginalLabels` command-line option may be passed to `vmagent` in order to reduce memory usage.
|
||||
This option drops `"discoveredLabels"` and `"droppedTargets"` lists at `/api/v1/targets` page, which may result in reduced debuggability for improperly configured per-target relabeling.
|
||||
|
||||
* If `vmagent` scrapes targets with millions of metrics per each target (for instance, when scraping [federation endpoints](https://prometheus.io/docs/prometheus/latest/federation/)),
|
||||
then it is recommended enabling `stream parsing mode` in order to reduce memory usage during scraping. This mode may be enabled either globally for all the scrape targets
|
||||
* If `vmagent` scrapes targets with millions of metrics per target (for example, when scraping [federation endpoints](https://prometheus.io/docs/prometheus/latest/federation/)),
|
||||
we recommend enabling `stream parsing mode` in order to reduce memory usage during scraping. This mode may be enabled either globally for all of the scrape targets
|
||||
by passing `-promscrape.streamParse` command-line flag or on a per-scrape target basis with `stream_parse: true` option. For example:
|
||||
|
||||
```yml
|
||||
@@ -277,33 +350,51 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
|
||||
'match[]': ['{__name__!=""}']
|
||||
```
|
||||
|
||||
Note that `sample_limit` option doesn't work if stream parsing is enabled, since the parsed data is pushed to remote storage as soon as it is parsed. So `sample_limit` option
|
||||
has no sense during stream parsing.
|
||||
Note that `sample_limit` option doesn't work if stream parsing is enabled because the parsed data is pushed to remote storage as soon as it is parsed. Therefore the `sample_limit` option
|
||||
doesn't make sense during stream parsing.
|
||||
|
||||
* It is recommended to increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows.
|
||||
* We recommend you increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page grows constantly.
|
||||
|
||||
* If you see gaps on the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set, then try increasing `-remoteWrite.queues`.
|
||||
Such gaps may appear because `vmagent` cannot keep up with sending the collected data to remote storage, so it starts dropping the buffered data
|
||||
* If you see gaps in the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set, try increasing `-remoteWrite.queues`.
|
||||
Such gaps may appear because `vmagent` cannot keep up with sending the collected data to remote storage. Therefore it starts dropping the buffered data
|
||||
if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
* `vmagent` drops data blocks if remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses. The number of dropped blocks can be monitored via `vmagent_remotewrite_packets_dropped_total` metric exported at [/metrics page](#monitoring).
|
||||
|
||||
* Use `-remoteWrite.queues=1` when `-remoteWrite.url` points to remote storage, which doesn't accept out-of-order samples (aka data backfilling). Such storage systems include Prometheus, Cortex and Thanos.
|
||||
|
||||
* `vmagent` buffers scraped data at the `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow large when remote storage is unavailable for extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want to send all the data from the directory to remote storage, simply stop `vmagent` and delete the directory.
|
||||
If you don't want to send all the data from the directory to remote storage then simply stop `vmagent` and delete the directory.
|
||||
|
||||
* By default `vmagent` masks `-remoteWrite.url` with `secret-url` values in logs and at `/metrics` page because
|
||||
the url may contain sensitive information such as auth tokens or passwords.
|
||||
Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
|
||||
|
||||
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
|
||||
or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
|
||||
See available options below if you prefer fixing the root cause of the error:
|
||||
* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at the beginning of some interval,
|
||||
then `scrape_align_interval` option must be used. For example, the following config aligns hourly scrapes to the beginning of hour:
|
||||
|
||||
The following `relabel_configs` section may help determining `__meta_*` labels resulting in duplicate targets:
|
||||
```yml
|
||||
- action: labelmap
|
||||
regex: __meta_(.*)
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
scrape_interval: 1h
|
||||
scrape_align_interval: 1h
|
||||
```
|
||||
|
||||
* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at specific offset, then `scrape_offset` option must be used.
|
||||
For example, the following config instructs `vmagent` to scrape the target at 10 seconds of every minute:
|
||||
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
scrape_interval: 1m
|
||||
scrape_offset: 10s
|
||||
```
|
||||
|
||||
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen to multiple ports
|
||||
or they use an init container. These errors can either be fixed or suppressed with the `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
|
||||
See the available options below if you prefer fixing the root cause of the error:
|
||||
|
||||
The following relabeling rule may be added to `relabel_configs` section in order to filter out pods with unneeded ports:
|
||||
```yml
|
||||
- action: keep_if_equal
|
||||
@@ -318,27 +409,27 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
|
||||
```
|
||||
|
||||
|
||||
### How to build from sources
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmagent` is located in `vmutils-*` archives there.
|
||||
We recommend using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmagent` is located in the `vmutils-*` archives .
|
||||
|
||||
|
||||
#### Development build
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmagent` from the root folder of the repository.
|
||||
It builds `vmagent` binary and puts it into the `bin` folder.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmagent` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds the `vmagent` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmagent-prod` from the root folder of the repository.
|
||||
2. Run `make vmagent-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmagent-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Building docker images
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
`<PKG_TAG>` is an auto-generated image tag, which depends on source code in [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
@@ -348,34 +439,34 @@ by setting it via `<ROOT_IMAGE>` environment variable. For example, the followin
|
||||
ROOT_IMAGE=scratch make package-vmagent
|
||||
```
|
||||
|
||||
#### ARM build
|
||||
### ARM build
|
||||
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
#### Development ARM build
|
||||
### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmagent-arm` or `make vmagent-arm64` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmagent-arm` or `make vmagent-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
It builds `vmagent-arm` or `vmagent-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
#### Production ARM build
|
||||
### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmagent-arm-prod` or `make vmagent-arm64-prod` from the root folder of the repository.
|
||||
2. Run `make vmagent-arm-prod` or `make vmagent-arm64-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmagent-arm-prod` or `vmagent-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
|
||||
### Profiling
|
||||
## Profiling
|
||||
|
||||
`vmagent` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
* Memory profile. It can be collected with the following command:
|
||||
* Memory profile can be collected with the following command:
|
||||
|
||||
```bash
|
||||
curl -s http://<vmagent-host>:8429/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
* CPU profile. It can be collected with the following command:
|
||||
* CPU profile can be collected with the following command:
|
||||
|
||||
```bash
|
||||
curl -s http://<vmagent-host>:8429/debug/pprof/profile > cpu.pprof
|
||||
@@ -384,3 +475,242 @@ curl -s http://<vmagent-host>:8429/debug/pprof/profile > cpu.pprof
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
|
||||
|
||||
## Advanced usage
|
||||
|
||||
`vmagent` can be fine-tuned with various command-line flags. Run `./vmagent -help` in order to see the full list of these flags with their desciptions and default values:
|
||||
|
||||
```
|
||||
./vmagent -help
|
||||
|
||||
vmagent collects metrics data via popular data ingestion protocols and routes them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://victoriametrics.github.io/vmagent.html .
|
||||
|
||||
-csvTrimTimestamp duration
|
||||
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-dryRun
|
||||
Whether to check only config files without running vmagent. The following files are checked: -promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . Unknown config entries are allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-graphiteListenAddr string
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
|
||||
-graphiteTrimTimestamp duration
|
||||
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help spreading incoming load among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for graceful shutdown of HTTP server. Highly loaded server may require increased value for graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this dealy the servier returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr='' (default ":8429")
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 104857600)
|
||||
-influx.databaseNames array
|
||||
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-influx.maxLineSize size
|
||||
The maximum size in bytes for a single Influx line during parsing
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
|
||||
-influxListenAddr string
|
||||
TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write
|
||||
-influxMeasurementFieldSeparator string
|
||||
Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol (default "_")
|
||||
-influxSkipMeasurement
|
||||
Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'
|
||||
-influxSkipSingleField
|
||||
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field
|
||||
-influxTrimTimestamp duration
|
||||
Trim timestamps for Influx line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
|
||||
-opentsdbListenAddr string
|
||||
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
|
||||
-opentsdbTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-opentsdbhttp.maxInsertRequestSize size
|
||||
The maximum size of OpenTSDB HTTP put request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
-opentsdbhttpTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof. It overrides httpAuth settings
|
||||
-promscrape.cluster.memberNum int
|
||||
The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster
|
||||
-promscrape.cluster.membersCount int
|
||||
The number of members in a cluster of scrapers. Each member must have an unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets
|
||||
-promscrape.cluster.replicationFactor int
|
||||
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 2, then the deduplication must be enabled at remote storage side. See https://victoriametrics.github.io/#deduplication (default 1)
|
||||
-promscrape.config string
|
||||
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. See https://victoriametrics.github.io/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
|
||||
-promscrape.config.dryRun
|
||||
Checks -promscrape.config file for errors and unsupported fields and then exits. Returns non-zero exit code on parsing errors and emits these errors to stderr. See also -promscrape.config.strictParse command-line flag. Pass -loggerLevel=ERROR if you don't need to see info messages in the output.
|
||||
-promscrape.config.strictParse
|
||||
Whether to allow only supported fields in -promscrape.config . By default unsupported fields are silently skipped
|
||||
-promscrape.configCheckInterval duration
|
||||
Interval for checking for changes in '-promscrape.config' file. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
Interval for checking for changes in Consul. This works only if consul_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config for details (default 30s)
|
||||
-promscrape.disableCompression
|
||||
Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
-promscrape.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets
|
||||
-promscrape.discovery.concurrency int
|
||||
The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.) (default 100)
|
||||
-promscrape.discovery.concurrentWaitTime duration
|
||||
The maximum duration for waiting to perform API requests if more than -promscrape.discovery.concurrency requests are simultaneously performed (default 1m0s)
|
||||
-promscrape.dnsSDCheckInterval duration
|
||||
Interval for checking for changes in dns. This works only if dns_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config for details (default 30s)
|
||||
-promscrape.dockerswarmSDCheckInterval duration
|
||||
Interval for checking for changes in dockerswarm. This works only if dockerswarm_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config for details (default 30s)
|
||||
-promscrape.dropOriginalLabels
|
||||
Whether to drop original labels for scrape targets at /targets and /api/v1/targets pages. This may be needed for reducing memory usage when original labels for big number of scrape targets occupy big amounts of memory. Note that this reduces debuggability for improper per-target relabeling configs
|
||||
-promscrape.ec2SDCheckInterval duration
|
||||
Interval for checking for changes in ec2. This works only if ec2_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config for details (default 1m0s)
|
||||
-promscrape.eurekaSDCheckInterval duration
|
||||
Interval for checking for changes in eureka. This works only if eureka_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#eureka_sd_config for details (default 30s)
|
||||
-promscrape.fileSDCheckInterval duration
|
||||
Interval for checking for changes in 'file_sd_config'. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config for details (default 30s)
|
||||
-promscrape.gceSDCheckInterval duration
|
||||
Interval for checking for changes in gce. This works only if gce_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config for details (default 1m0s)
|
||||
-promscrape.kubernetes.apiServerTimeout duration
|
||||
How frequently to reload the full state from Kuberntes API server (default 30m0s)
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config for details (default 30s)
|
||||
-promscrape.maxDroppedTargets int
|
||||
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
|
||||
-promscrape.maxScrapeSize size
|
||||
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
|
||||
-promscrape.openstackSDCheckInterval duration
|
||||
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s)
|
||||
-promscrape.streamParse
|
||||
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
-promscrape.suppressDuplicateScrapeTargetErrors
|
||||
Whether to suppress 'duplicate scrape target' errors; see https://victoriametrics.github.io/vmagent.html#troubleshooting for details
|
||||
-promscrape.suppressScrapeErrors
|
||||
Whether to suppress scrape errors logging. The last error for each target is always available at '/targets' page even if scrape errors logging is suppressed
|
||||
-remoteWrite.basicAuth.password array
|
||||
Optional basic auth password to use for -remoteWrite.url. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.basicAuth.username array
|
||||
Optional basic auth username to use for -remoteWrite.url. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.bearerToken array
|
||||
Optional bearer auth token to use for -remoteWrite.url. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.flushInterval duration
|
||||
Interval for flushing the data to remote storage. This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url (default 1s)
|
||||
-remoteWrite.label array
|
||||
Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.maxBlockSize size
|
||||
The maximum size in bytes of unpacked request to send to remote storage. It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 8388608)
|
||||
-remoteWrite.maxDiskUsagePerURL size
|
||||
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. Disk usage is unlimited if the value is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-remoteWrite.proxyURL array
|
||||
Optional proxy URL for writing data to -remoteWrite.url. Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.queues int
|
||||
The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues isn't enough for sending high volume of collected data to remote storage (default 4)
|
||||
-remoteWrite.rateLimit array
|
||||
Optional rate limit in bytes per second for data sent to -remoteWrite.url. By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data is sent after temporary unavailability of the remote storage
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.relabelConfig string
|
||||
Optional path to file with relabel_config entries. These entries are applied to all the metrics before sending them to -remoteWrite.url. See https://victoriametrics.github.io/vmagent.html#relabeling for details
|
||||
-remoteWrite.roundDigits array
|
||||
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.sendTimeout array
|
||||
Timeout for sending a single block of data to -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.showURL
|
||||
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-remoteWrite.significantFigures array
|
||||
The number of significant figures to leave in metric values before writing them to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default system CA is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsInsecureSkipVerify array
|
||||
Whether to skip tls verification when connecting to -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsKeyFile array
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsServerName array
|
||||
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tmpDataPath string
|
||||
Path to directory where temporary data for remote write component is stored. See also -remoteWrite.maxDiskUsagePerURL (default "vmagent-remotewrite-data")
|
||||
-remoteWrite.url array
|
||||
Remote storage URL to write data to. It must support Prometheus remote_write API. It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . Pass multiple -remoteWrite.url flags in order to write data concurrently to multiple remote storage systems
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.urlRelabelConfig array
|
||||
Optional path to relabel config for the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -33,7 +34,9 @@ var (
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, false, "", "", insertRows)
|
||||
return parser.ParseStream(r, false, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(db, rows, nil)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -41,17 +44,23 @@ func InsertHandlerForReader(r io.Reader) error {
|
||||
//
|
||||
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
|
||||
func InsertHandlerForHTTP(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, insertRows)
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(db, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(db string, rows []parser.Row) error {
|
||||
func insertRows(db string, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := getPushCtx()
|
||||
defer putPushCtx(ctx)
|
||||
|
||||
@@ -82,11 +91,13 @@ func insertRows(db string, rows []parser.Row) error {
|
||||
Value: db,
|
||||
})
|
||||
}
|
||||
commonLabels = append(commonLabels, extraLabels...)
|
||||
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
|
||||
if !*skipMeasurement {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, r.Measurement...)
|
||||
}
|
||||
skipFieldKey := len(r.Fields) == 1 && *skipSingleField
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1139
|
||||
skipFieldKey := len(r.Measurement) > 0 && len(r.Fields) == 1 && *skipSingleField
|
||||
if len(ctx.metricGroupBuf) > 0 && !skipFieldKey {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, *measurementFieldSeparator...)
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/influxutils"
|
||||
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
|
||||
influxserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/influx"
|
||||
opentsdbserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdb"
|
||||
@@ -40,7 +41,7 @@ var (
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to `http://<vmagent>:8429/write`")
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write")
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
@@ -144,6 +145,9 @@ func main() {
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != "GET" {
|
||||
return false
|
||||
}
|
||||
fmt.Fprintf(w, "vmagent - see docs at https://victoriametrics.github.io/vmagent.html")
|
||||
return true
|
||||
}
|
||||
@@ -204,10 +208,8 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/query":
|
||||
// Emulate fake response for influx query.
|
||||
// This is required for TSBS benchmark.
|
||||
influxQueryRequests.Inc()
|
||||
fmt.Fprintf(w, `{"results":[{"series":[{"values":[]}]}]}`)
|
||||
influxutils.WriteDatabaseNames(w)
|
||||
return true
|
||||
case "/targets":
|
||||
promscrapeTargetsRequests.Inc()
|
||||
|
||||
12
app/vmagent/multiarch/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8429
|
||||
ENTRYPOINT ["/vmagent-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmagent-${TARGETARCH}-prod ./vmagent-prod
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -19,12 +20,18 @@ var (
|
||||
// InsertHandler processes HTTP OpenTSDB put requests.
|
||||
// See http://opentsdb.net/docs/build/html/api_http/put.html
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, insertRows)
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -45,6 +52,7 @@ func insertRows(rows []parser.Row) error {
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
|
||||
@@ -31,7 +31,7 @@ func InsertHandler(req *http.Request) error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(rows, extraLabels)
|
||||
})
|
||||
}, nil)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -20,12 +21,18 @@ var (
|
||||
|
||||
// InsertHandler processes remote write for prometheus.
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, insertRows)
|
||||
return parser.ParseStream(req, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(tss, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(timeseries []prompb.TimeSeries) error {
|
||||
func insertRows(timeseries []prompb.TimeSeries, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -44,6 +51,7 @@ func insertRows(timeseries []prompb.TimeSeries) error {
|
||||
Value: bytesutil.ToUnsafeString(label.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for i := range ts.Samples {
|
||||
sample := &ts.Samples[i]
|
||||
|
||||
@@ -16,10 +16,14 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", "Optional rate limit in bytes per second for data sent to -remoteWrite.url. "+
|
||||
"By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"is sent after temporary unavailability of the remote storage")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", "Timeout for sending a single block of data to -remoteWrite.url")
|
||||
proxyURL = flagutil.NewArray("remoteWrite.proxyURL", "Optional proxy URL for writing data to -remoteWrite.url. Supported proxies: http, https, socks5. "+
|
||||
"Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
@@ -49,6 +53,8 @@ type client struct {
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
rl rateLimiter
|
||||
|
||||
bytesSent *metrics.Counter
|
||||
blocksSent *metrics.Counter
|
||||
requestDuration *metrics.Histogram
|
||||
@@ -113,6 +119,12 @@ func newClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqu
|
||||
},
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
if bytesPerSec := rateLimit.GetOptionalArgOrDefault(argIdx, 0); bytesPerSec > 0 {
|
||||
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
|
||||
c.rl.perSecondLimit = int64(bytesPerSec)
|
||||
}
|
||||
c.rl.limitReached = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remote_write_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
|
||||
c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL))
|
||||
@@ -148,7 +160,7 @@ func getTLSConfig(argIdx int) (*tls.Config, error) {
|
||||
if c.CAFile == "" && c.CertFile == "" && c.KeyFile == "" && c.ServerName == "" && !c.InsecureSkipVerify {
|
||||
return nil, nil
|
||||
}
|
||||
cfg, err := promauth.NewConfig(".", nil, "", "", c)
|
||||
cfg, err := promauth.NewConfig(".", nil, nil, "", "", c)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot populate TLS config: %w", err)
|
||||
}
|
||||
@@ -159,36 +171,46 @@ func getTLSConfig(argIdx int) (*tls.Config, error) {
|
||||
func (c *client) runWorker() {
|
||||
var ok bool
|
||||
var block []byte
|
||||
ch := make(chan struct{})
|
||||
ch := make(chan bool, 1)
|
||||
for {
|
||||
block, ok = c.fq.MustReadBlock(block[:0])
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
c.sendBlock(block)
|
||||
ch <- struct{}{}
|
||||
ch <- c.sendBlock(block)
|
||||
}()
|
||||
select {
|
||||
case <-ch:
|
||||
// The block has been sent successfully
|
||||
continue
|
||||
case ok := <-ch:
|
||||
if ok {
|
||||
// The block has been sent successfully
|
||||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
graceDuration := 5 * time.Second
|
||||
select {
|
||||
case <-ch:
|
||||
// The block has been sent successfully.
|
||||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
logger.Errorf("couldn't sent block with size %d bytes to %q in %.3f seconds during shutdown; dropping it",
|
||||
len(block), c.sanitizedURL, graceDuration.Seconds())
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *client) sendBlock(block []byte) {
|
||||
// sendBlock returns false only if c.stopCh is closed.
|
||||
// Otherwise it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlock(block []byte) bool {
|
||||
c.rl.register(len(block), c.stopCh)
|
||||
retryDuration := time.Second
|
||||
retriesCount := 0
|
||||
c.bytesSent.Add(len(block))
|
||||
@@ -219,12 +241,13 @@ again:
|
||||
}
|
||||
logger.Errorf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
t := time.NewTimer(retryDuration)
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
t.Stop()
|
||||
return
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
@@ -233,18 +256,18 @@ again:
|
||||
if statusCode/100 == 2 {
|
||||
_ = resp.Body.Close()
|
||||
c.requestsOKCount.Inc()
|
||||
return
|
||||
return true
|
||||
}
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.sanitizedURL, statusCode)).Inc()
|
||||
if statusCode == 409 {
|
||||
if statusCode == 409 || statusCode == 400 {
|
||||
// Just drop block on 409 status code like Prometheus does.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/873
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
// drop block on 400 status code,
|
||||
// not expected that remote server will be able to handle it on retry
|
||||
// should fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149
|
||||
_ = resp.Body.Close()
|
||||
logger.Errorf("unexpected status code received when sending a block with size %d bytes to %q: #%d; dropping the block like Prometheus does; "+
|
||||
"response body=%q", len(block), c.sanitizedURL, statusCode, body)
|
||||
c.packetsDropped.Inc()
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
// Unexpected status code returned
|
||||
@@ -261,13 +284,56 @@ again:
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
||||
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
|
||||
}
|
||||
t := time.NewTimer(retryDuration)
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
t.Stop()
|
||||
return
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
type rateLimiter struct {
|
||||
perSecondLimit int64
|
||||
|
||||
// mu protects budget and deadline from concurrent access.
|
||||
mu sync.Mutex
|
||||
|
||||
// The current budget. It is increased by perSecondLimit every second.
|
||||
budget int64
|
||||
|
||||
// The next deadline for increasing the budget by perSecondLimit
|
||||
deadline time.Time
|
||||
|
||||
limitReached *metrics.Counter
|
||||
}
|
||||
|
||||
func (rl *rateLimiter) register(dataLen int, stopCh <-chan struct{}) {
|
||||
limit := rl.perSecondLimit
|
||||
if limit <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
rl.mu.Lock()
|
||||
defer rl.mu.Unlock()
|
||||
|
||||
for rl.budget <= 0 {
|
||||
if d := time.Until(rl.deadline); d > 0 {
|
||||
rl.limitReached.Inc()
|
||||
t := timerpool.Get(d)
|
||||
select {
|
||||
case <-stopCh:
|
||||
timerpool.Put(t)
|
||||
return
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
}
|
||||
rl.budget += limit
|
||||
rl.deadline = time.Now().Add(time.Second)
|
||||
}
|
||||
rl.budget -= int64(dataLen)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
@@ -18,8 +19,7 @@ import (
|
||||
|
||||
var (
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
|
||||
"Higher value reduces network bandwidth usage at the cost of delayed push of scraped data to remote storage. "+
|
||||
"Minimum supported interval is 1 second")
|
||||
"This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url")
|
||||
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum size in bytes of unpacked request to send to remote storage. "+
|
||||
"It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics")
|
||||
)
|
||||
@@ -27,6 +27,9 @@ var (
|
||||
// the maximum number of rows to send per each block.
|
||||
const maxRowsPerBlock = 10000
|
||||
|
||||
// the maximum number of labels to send per each block.
|
||||
const maxLabelsPerBlock = 40000
|
||||
|
||||
type pendingSeries struct {
|
||||
mu sync.Mutex
|
||||
wr writeRequest
|
||||
@@ -35,9 +38,11 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(pushBlock func(block []byte)) *pendingSeries {
|
||||
func newPendingSeries(pushBlock func(block []byte), significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
ps.stopCh = make(chan struct{})
|
||||
ps.periodicFlusherWG.Add(1)
|
||||
go func() {
|
||||
@@ -85,9 +90,17 @@ type writeRequest struct {
|
||||
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
|
||||
lastFlushTime uint64
|
||||
|
||||
wr prompbmarshal.WriteRequest
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
significantFigures int
|
||||
|
||||
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
|
||||
roundDigits int
|
||||
|
||||
wr prompbmarshal.WriteRequest
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
labels []prompbmarshal.Label
|
||||
@@ -96,6 +109,8 @@ type writeRequest struct {
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset pushBlock, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
for i := range wr.tss {
|
||||
@@ -113,18 +128,36 @@ func (wr *writeRequest) reset() {
|
||||
}
|
||||
|
||||
func (wr *writeRequest) flush() {
|
||||
sortLabelsIfNeeded(wr.tss)
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock)
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
func (wr *writeRequest) adjustSampleValues() {
|
||||
samples := wr.samples
|
||||
if n := wr.significantFigures; n > 0 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
|
||||
}
|
||||
}
|
||||
if n := wr.roundDigits; n < 100 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
|
||||
tssDst := wr.tss
|
||||
for i := range src {
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
|
||||
if len(wr.samples) >= maxRowsPerBlock {
|
||||
if len(wr.samples) >= maxRowsPerBlock || len(wr.labels) >= maxLabelsPerBlock {
|
||||
wr.tss = tssDst
|
||||
wr.flush()
|
||||
tssDst = wr.tss
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
|
||||
var (
|
||||
unparsedLabelsGlobal = flagutil.NewArray("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple flags to metrics before sending them to remote storage")
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. These entries are applied to all the metrics "+
|
||||
"before sending them to -remoteWrite.url. See https://victoriametrics.github.io/vmagent.html#relabeling for details")
|
||||
relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url")
|
||||
@@ -41,7 +41,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
||||
len(*relabelConfigPaths), len(*remoteWriteURLs))
|
||||
}
|
||||
rcs.perURL = make([][]promrelabel.ParsedRelabelConfig, len(*remoteWriteURLs))
|
||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
// Skip empty relabel config.
|
||||
@@ -57,8 +57,8 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
}
|
||||
|
||||
type relabelConfigs struct {
|
||||
global []promrelabel.ParsedRelabelConfig
|
||||
perURL [][]promrelabel.ParsedRelabelConfig
|
||||
global *promrelabel.ParsedConfigs
|
||||
perURL []*promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
// initLabelsGlobal must be called after parsing command-line flags.
|
||||
@@ -79,8 +79,8 @@ func initLabelsGlobal() {
|
||||
}
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, prcs []promrelabel.ParsedRelabelConfig) []prompbmarshal.TimeSeries {
|
||||
if len(extraLabels) == 0 && len(prcs) == 0 {
|
||||
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 {
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
@@ -100,7 +100,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab
|
||||
labels = append(labels, *extraLabel)
|
||||
}
|
||||
}
|
||||
labels = promrelabel.ApplyRelabelConfigs(labels, labelsLen, prcs, true)
|
||||
labels = pcs.Apply(labels, labelsLen, true)
|
||||
if len(labels) == labelsLen {
|
||||
// Drop the current time series, since relabeling removed all the labels.
|
||||
continue
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -22,8 +21,9 @@ var (
|
||||
remoteWriteURLs = flagutil.NewArray("remoteWrite.url", "Remote storage URL to write data to. It must support Prometheus remote_write API. "+
|
||||
"It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url flags in order to write data concurrently to multiple remote storage systems")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored")
|
||||
queues = flag.Int("remoteWrite.queues", 4, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
queues = flag.Int("remoteWrite.queues", 4, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
@@ -31,9 +31,13 @@ var (
|
||||
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
|
||||
"Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. "+
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
significantFigures = flag.Int("remoteWrite.significantFigures", 0, "The number of significant figures to leave in metric values before writing them to remote storage. "+
|
||||
"See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. "+
|
||||
"This option may be used for increasing on-disk compression level for the stored metrics")
|
||||
significantFigures = flagutil.NewArrayInt("remoteWrite.significantFigures", "The number of significant figures to leave in metric values before writing them "+
|
||||
"to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. "+
|
||||
"This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits")
|
||||
roundDigits = flagutil.NewArrayInt("remoteWrite.roundDigits", "Round metric values to this number of decimal digits after the point before writing them to remote storage. "+
|
||||
"Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. "+
|
||||
"By default digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. "+
|
||||
"This option may be used for improving data compression for the stored metrics")
|
||||
)
|
||||
|
||||
var rwctxs []*remoteWriteCtx
|
||||
@@ -137,32 +141,23 @@ func Stop() {
|
||||
//
|
||||
// Note that wr may be modified by Push due to relabeling and rounding.
|
||||
func Push(wr *prompbmarshal.WriteRequest) {
|
||||
if *significantFigures > 0 {
|
||||
// Round values according to significantFigures
|
||||
for i := range wr.Timeseries {
|
||||
samples := wr.Timeseries[i].Samples
|
||||
for j := range samples {
|
||||
s := &samples[j]
|
||||
s.Value = decimal.Round(s.Value, *significantFigures)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var rctx *relabelCtx
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
prcsGlobal := rcs.global
|
||||
if len(prcsGlobal) > 0 || len(labelsGlobal) > 0 {
|
||||
pcsGlobal := rcs.global
|
||||
if pcsGlobal.Len() > 0 || len(labelsGlobal) > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
}
|
||||
tss := wr.Timeseries
|
||||
for len(tss) > 0 {
|
||||
// Process big tss in smaller blocks in order to reduce the maximum memory usage
|
||||
samplesCount := 0
|
||||
labelsCount := 0
|
||||
i := 0
|
||||
for i < len(tss) {
|
||||
samplesCount += len(tss[i].Samples)
|
||||
labelsCount += len(tss[i].Labels)
|
||||
i++
|
||||
if samplesCount > maxRowsPerBlock {
|
||||
if samplesCount >= maxRowsPerBlock || labelsCount >= maxLabelsPerBlock {
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -175,7 +170,7 @@ func Push(wr *prompbmarshal.WriteRequest) {
|
||||
}
|
||||
if rctx != nil {
|
||||
tssBlockLen := len(tssBlock)
|
||||
tssBlock = rctx.applyRelabeling(tssBlock, labelsGlobal, prcsGlobal)
|
||||
tssBlock = rctx.applyRelabeling(tssBlock, labelsGlobal, pcsGlobal)
|
||||
globalRelabelMetricsDropped.Add(tssBlockLen - len(tssBlock))
|
||||
}
|
||||
for _, rwctx := range rwctxs {
|
||||
@@ -213,9 +208,17 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL string, maxInmemoryBlocks int,
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
c := newClient(argIdx, remoteWriteURL, sanitizedURL, fq, *queues)
|
||||
pss := make([]*pendingSeries, *queues)
|
||||
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
|
||||
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
|
||||
pssLen := *queues
|
||||
if n := cgroup.AvailableCPUs(); pssLen > n {
|
||||
// There is no sense in running more than availableCPUs concurrent pendingSeries,
|
||||
// since every pendingSeries can saturate up to a single CPU.
|
||||
pssLen = n
|
||||
}
|
||||
pss := make([]*pendingSeries, pssLen)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock)
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, sf, rd)
|
||||
}
|
||||
return &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
@@ -233,10 +236,11 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
}
|
||||
rwctx.idx = 0
|
||||
rwctx.pss = nil
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
rwctx.fq.UnblockAllReaders()
|
||||
rwctx.c.MustStop()
|
||||
rwctx.c = nil
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
|
||||
rwctx.relabelMetricsDropped = nil
|
||||
}
|
||||
@@ -245,8 +249,8 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
prcs := rcs.perURL[rwctx.idx]
|
||||
if len(prcs) > 0 {
|
||||
pcs := rcs.perURL[rwctx.idx]
|
||||
if pcs.Len() > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
@@ -255,7 +259,7 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
v = tssRelabelPool.Get().(*[]prompbmarshal.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
tssLen := len(tss)
|
||||
tss = rctx.applyRelabeling(tss, nil, prcs)
|
||||
tss = rctx.applyRelabeling(tss, nil, pcs)
|
||||
rwctx.relabelMetricsDropped.Add(tssLen - len(tss))
|
||||
}
|
||||
pss := rwctx.pss
|
||||
|
||||
51
app/vmagent/remotewrite/sort_labels.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
var sortLabels = flag.Bool("sortLabels", false, `Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. `+
|
||||
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
|
||||
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
|
||||
`Enabled sorting for labels can slow down ingestion performance a bit`)
|
||||
|
||||
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
|
||||
func sortLabelsIfNeeded(tss []prompbmarshal.TimeSeries) {
|
||||
if !*sortLabels {
|
||||
return
|
||||
}
|
||||
// The slc is used for avoiding memory allocation when passing labels to sort.Sort.
|
||||
slc := sortLabelsCtxPool.Get().(*sortLabelsCtx)
|
||||
for i := range tss {
|
||||
slc.labels = tss[i].Labels
|
||||
sort.Sort(&slc.labels)
|
||||
}
|
||||
slc.labels = nil
|
||||
sortLabelsCtxPool.Put(slc)
|
||||
}
|
||||
|
||||
type sortLabelsCtx struct {
|
||||
labels sortedLabels
|
||||
}
|
||||
|
||||
var sortLabelsCtxPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &sortLabelsCtx{}
|
||||
},
|
||||
}
|
||||
|
||||
type sortedLabels []prompbmarshal.Label
|
||||
|
||||
func (sl *sortedLabels) Len() int { return len(*sl) }
|
||||
func (sl *sortedLabels) Less(i, j int) bool {
|
||||
a := *sl
|
||||
return a[i].Name < a[j].Name
|
||||
}
|
||||
func (sl *sortedLabels) Swap(i, j int) {
|
||||
a := *sl
|
||||
a[i], a[j] = a[j], a[i]
|
||||
}
|
||||
@@ -1,25 +1,17 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func statDial(network, addr string) (conn net.Conn, err error) {
|
||||
if !strings.HasPrefix(network, "tcp") {
|
||||
return nil, fmt.Errorf("unexpected network passed to statDial: %q; it must start from `tcp`", network)
|
||||
}
|
||||
if netutil.TCP6Enabled() {
|
||||
conn, err = fasthttp.DialDualStack(addr)
|
||||
} else {
|
||||
conn, err = fasthttp.Dial(addr)
|
||||
}
|
||||
func statDial(networkUnused, addr string) (conn net.Conn, err error) {
|
||||
network := netutil.GetTCPNetwork()
|
||||
conn, err = net.DialTimeout(network, addr, 5*time.Second)
|
||||
dialsTotal.Inc()
|
||||
if err != nil {
|
||||
dialErrors.Inc()
|
||||
|
||||
@@ -88,3 +88,9 @@ vmalert-local-with-goarch:
|
||||
|
||||
vmalert-pure:
|
||||
APP_NAME=vmalert $(MAKE) app-local-pure
|
||||
|
||||
vmalert-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=vmalert $(MAKE) app-local-windows-with-goarch
|
||||
|
||||
vmalert-windows-amd64-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
## vmalert
|
||||
# vmalert
|
||||
|
||||
`vmalert` executes a list of given [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
|
||||
or [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
rules against configured address.
|
||||
|
||||
### Features:
|
||||
## Features
|
||||
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
|
||||
* VictoriaMetrics [MetricsQL](https://victoriametrics.github.io/MetricsQL.html)
|
||||
support and expressions validation;
|
||||
@@ -12,10 +12,11 @@ rules against configured address.
|
||||
support;
|
||||
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager);
|
||||
* Keeps the alerts [state on restarts](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmalert#alerts-state-on-restarts);
|
||||
* Graphite datasource can be used for alerting and recording rules. See [these docs](#graphite) for details.
|
||||
* Lightweight without extra dependencies.
|
||||
|
||||
### Limitations:
|
||||
* `vmalert` execute queries against remote datasource which has reliability risks because of network.
|
||||
## Limitations
|
||||
* `vmalert` execute queries against remote datasource which has reliability risks because of network.
|
||||
It is recommended to configure alerts thresholds and rules expressions with understanding that network request
|
||||
may fail;
|
||||
* by default, rules execution is sequential within one group, but persisting of execution results to remote
|
||||
@@ -23,7 +24,7 @@ storage is asynchronous. Hence, user shouldn't rely on recording rules chaining
|
||||
recording rule is reused in next one;
|
||||
* `vmalert` has no UI, just an API for getting groups and rules statuses.
|
||||
|
||||
### QuickStart
|
||||
## QuickStart
|
||||
|
||||
To build `vmalert` from sources:
|
||||
```
|
||||
@@ -36,7 +37,7 @@ The build binary will be placed to `VictoriaMetrics/bin` folder.
|
||||
To start using `vmalert` you will need the following things:
|
||||
* list of rules - PromQL/MetricsQL expressions to execute;
|
||||
* datasource address - reachable VictoriaMetrics instance for rules execution;
|
||||
* notifier address - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing,
|
||||
* notifier address - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing,
|
||||
aggregating alerts and sending notifications.
|
||||
* remote write address - [remote write](https://prometheus.io/docs/prometheus/latest/storage/#remote-storage-integrations)
|
||||
compatible storage address for storing recording rules results and alerts state in for of timeseries. This is optional.
|
||||
@@ -50,16 +51,15 @@ Then configure `vmalert` accordingly:
|
||||
-remoteWrite.url=http://localhost:8428 \ # remote write compatible storage to persist rules
|
||||
-remoteRead.url=http://localhost:8428 \ # PromQL compatible datasource to restore alerts state from
|
||||
-external.label=cluster=east-1 \ # External label to be applied for each rule
|
||||
-external.label=replica=a \ # Multiple external labels may be set
|
||||
-evaluationInterval=3s # Default evaluation interval if not specified in rules group
|
||||
-external.label=replica=a # Multiple external labels may be set
|
||||
```
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
similar to Prometheus rules and configured using YAML. Configuration examples may be found
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
similar to Prometheus rules and configured using YAML. Configuration examples may be found
|
||||
in [testdata](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/config/testdata) folder.
|
||||
Every `rule` belongs to `group` and every configuration file may contain arbitrary number of groups:
|
||||
```yaml
|
||||
@@ -67,7 +67,7 @@ groups:
|
||||
[ - <rule_group> ]
|
||||
```
|
||||
|
||||
#### Groups
|
||||
### Groups
|
||||
|
||||
Each group has following attributes:
|
||||
```yaml
|
||||
@@ -78,34 +78,44 @@ name: <string>
|
||||
[ interval: <duration> | default = global.evaluation_interval ]
|
||||
|
||||
# How many rules execute at once. Increasing concurrency may speed
|
||||
# up round execution speed.
|
||||
# up round execution speed.
|
||||
[ concurrency: <integer> | default = 1 ]
|
||||
|
||||
# Optional type for expressions inside the rules. Supported values: "graphite" and "prometheus".
|
||||
# By default "prometheus" rule type is used.
|
||||
[ type: <string> ]
|
||||
|
||||
rules:
|
||||
[ - <rule> ... ]
|
||||
```
|
||||
|
||||
#### Rules
|
||||
### Rules
|
||||
|
||||
There are two types of Rules:
|
||||
* [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) -
|
||||
* [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) -
|
||||
Alerting rules allows to define alert conditions via [MetricsQL](https://victoriametrics.github.io/MetricsQL.html)
|
||||
and to send notifications about firing alerts to [Alertmanager](https://github.com/prometheus/alertmanager).
|
||||
* [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) -
|
||||
Recording rules allow you to precompute frequently needed or computationally expensive expressions
|
||||
* [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) -
|
||||
Recording rules allow you to precompute frequently needed or computationally expensive expressions
|
||||
and save their result as a new set of time series.
|
||||
|
||||
`vmalert` forbids to define duplicates - rules with the same combination of name, expression and labels
|
||||
within one group.
|
||||
within one group.
|
||||
|
||||
##### Alerting rules
|
||||
#### Alerting rules
|
||||
|
||||
The syntax for alerting rule is following:
|
||||
```yaml
|
||||
# The name of the alert. Must be a valid metric name.
|
||||
alert: <string>
|
||||
|
||||
# The MetricsQL expression to evaluate.
|
||||
# Optional type for the rule. Supported values: "graphite", "prometheus".
|
||||
# By default "prometheus" rule type is used.
|
||||
[ type: <string> ]
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default MetricsQL expression is used. If type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
expr: <string>
|
||||
|
||||
# Alerts are considered firing once they have been returned for this long.
|
||||
@@ -119,16 +129,22 @@ labels:
|
||||
# Annotations to add to each alert.
|
||||
annotations:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
```
|
||||
```
|
||||
|
||||
##### Recording rules
|
||||
#### Recording rules
|
||||
|
||||
The syntax for recording rules is following:
|
||||
```yaml
|
||||
# The name of the time series to output to. Must be a valid metric name.
|
||||
record: <string>
|
||||
|
||||
# The MetricsQL expression to evaluate.
|
||||
# Optional type for the rule. Supported values: "graphite", "prometheus".
|
||||
# By default "prometheus" rule type is used.
|
||||
[ type: <string> ]
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default MetricsQL expression is used. If type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
expr: <string>
|
||||
|
||||
# Labels to add or overwrite before storing the result.
|
||||
@@ -139,45 +155,58 @@ labels:
|
||||
For recording rules to work `-remoteWrite.url` must specified.
|
||||
|
||||
|
||||
#### Alerts state on restarts
|
||||
### Alerts state on restarts
|
||||
|
||||
`vmalert` has no local storage, so alerts state is stored in the process memory. Hence, after reloading of `vmalert`
|
||||
`vmalert` has no local storage, so alerts state is stored in the process memory. Hence, after reloading of `vmalert`
|
||||
the process alerts state will be lost. To avoid this situation, `vmalert` should be configured via the following flags:
|
||||
* `-remoteWrite.url` - URL to VictoriaMetrics (Single) or VMInsert (Cluster). `vmalert` will persist alerts state
|
||||
into the configured address in the form of time series named `ALERTS` and `ALERTS_FOR_STATE` via remote-write protocol.
|
||||
These are regular time series and may be queried from VM just as any other time series.
|
||||
* `-remoteWrite.url` - URL to VictoriaMetrics (Single) or VMInsert (Cluster). `vmalert` will persist alerts state
|
||||
into the configured address in the form of time series named `ALERTS` and `ALERTS_FOR_STATE` via remote-write protocol.
|
||||
These are regular time series and may be queried from VM just as any other time series.
|
||||
The state stored to the configured address on every rule evaluation.
|
||||
* `-remoteRead.url` - URL to VictoriaMetrics (Single) or VMSelect (Cluster). `vmalert` will try to restore alerts state
|
||||
* `-remoteRead.url` - URL to VictoriaMetrics (Single) or VMSelect (Cluster). `vmalert` will try to restore alerts state
|
||||
from configured address by querying time series with name `ALERTS_FOR_STATE`.
|
||||
|
||||
Both flags are required for the proper state restoring. Restore process may fail if time series are missing
|
||||
in configured `-remoteRead.url`, weren't updated in the last `1h` or received state doesn't match current `vmalert`
|
||||
in configured `-remoteRead.url`, weren't updated in the last `1h` or received state doesn't match current `vmalert`
|
||||
rules configuration.
|
||||
|
||||
|
||||
#### WEB
|
||||
### WEB
|
||||
|
||||
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
* `http://<vmalert-addr>/api/v1/groups` - list of all loaded groups and rules;
|
||||
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
|
||||
* `http://<vmalert-addr>/api/v1/<groupName>/<alertID>/status" ` - get alert status by ID.
|
||||
* `http://<vmalert-addr>/api/v1/<groupID>/<alertID>/status" ` - get alert status by ID.
|
||||
Used as alert source in AlertManager.
|
||||
* `http://<vmalert-addr>/metrics` - application metrics.
|
||||
* `http://<vmalert-addr>/-/reload` - hot configuration reload.
|
||||
|
||||
|
||||
### Configuration
|
||||
## Graphite
|
||||
|
||||
vmalert sends requests to `<-datasource.url>/render?format=json` during evaluation of alerting and recording rules
|
||||
if the corresponding group or rule contains `type: "graphite"` config option. It is expected that the `<-datasource.url>/render`
|
||||
implements [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) for `format=json`.
|
||||
When using vmalert with both `graphite` and `prometheus` rules configured against cluster version of VM do not forget
|
||||
to set `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL prefix automatically based on query type.
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
The shortlist of configuration flags is the following:
|
||||
```
|
||||
-datasource.appendTypePrefix
|
||||
Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to VMSelect URL.
|
||||
-datasource.basicAuth.password string
|
||||
Optional basic auth password for -datasource.url
|
||||
-datasource.basicAuth.username string
|
||||
Optional basic auth username for -datasource.url
|
||||
-datasource.lookback duration
|
||||
Lookback defines how far to look into past when evaluating queries. For example, if datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
|
||||
Lookback defines how far to look into past when evaluating queries. For example, if datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
|
||||
-datasource.maxIdleConnections int
|
||||
Defines the number of idle (keep-alive connections) to configured datasource.Consider to set this value equal to the value: groups_total * group.concurrency. Too low value may result into high number of sockets in TIME_WAIT state. (default 100)
|
||||
Defines the number of idle (keep-alive connections) to configured datasource.Consider to set this value equal to the value: groups_total * group.concurrency. Too low value may result into high number of sockets in TIME_WAIT state. (default 100)
|
||||
-datasource.queryStep duration
|
||||
queryStep defines how far a value can fallback to when evaluating queries. For example, if datasource.queryStep=15s then param "step" with value "15s" will be added to every query.
|
||||
-datasource.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default system CA is used
|
||||
-datasource.tlsCertFile string
|
||||
@@ -190,6 +219,8 @@ The shortlist of configuration flags is the following:
|
||||
Optional TLS server name to use for connections to -datasource.url. By default the server name from -datasource.url is used
|
||||
-datasource.url string
|
||||
Victoria Metrics or VMSelect url. Required parameter. E.g. http://127.0.0.1:8428
|
||||
-dryRun -rule
|
||||
Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP is used
|
||||
-envflag.enable
|
||||
@@ -200,12 +231,14 @@ The shortlist of configuration flags is the following:
|
||||
How often to evaluate the rules (default 1m0s)
|
||||
-external.alert.source string
|
||||
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used
|
||||
-external.label array
|
||||
Optional label in the form 'name=value' to add to all generated recording rules and alerts. Pass multiple -label flags in order to add multiple label sets.
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-external.url string
|
||||
External URL is used as alert's source for sent alerts to the notifier
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help spreading incoming load among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
@@ -224,26 +257,32 @@ The shortlist of configuration flags is the following:
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8880")
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-memory.allowedBytes value
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-notifier.basicAuth.password array
|
||||
Optional basic auth password for -datasource.url
|
||||
Optional basic auth password for -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.basicAuth.username array
|
||||
Optional basic auth username for -datasource.url
|
||||
Optional basic auth username for -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to -notifier.url. By default system CA is used
|
||||
@@ -251,8 +290,9 @@ The shortlist of configuration flags is the following:
|
||||
-notifier.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsInsecureSkipVerify
|
||||
-notifier.tlsInsecureSkipVerify array
|
||||
Whether to skip tls verification when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsKeyFile array
|
||||
Optional path to client-side TLS certificate key to use when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
@@ -307,11 +347,11 @@ The shortlist of configuration flags is the following:
|
||||
-remoteWrite.url string
|
||||
Optional URL to Victoria Metrics or VMInsert where to persist alerts state and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428
|
||||
-rule array
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
@@ -329,50 +369,50 @@ The shortlist of configuration flags is the following:
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
Pass `-help` to `vmalert` in order to see the full list of supported
|
||||
Pass `-help` to `vmalert` in order to see the full list of supported
|
||||
command-line flags with their descriptions.
|
||||
|
||||
To reload configuration without `vmalert` restart send SIGHUP signal
|
||||
or send GET request to `/-/reload` endpoint.
|
||||
|
||||
### Contributing
|
||||
## Contributing
|
||||
|
||||
`vmalert` is mostly designed and built by VictoriaMetrics community.
|
||||
Feel free to share your experience and ideas for improving this
|
||||
Feel free to share your experience and ideas for improving this
|
||||
software. Please keep simplicity as the main priority.
|
||||
|
||||
### How to build from sources
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using
|
||||
[binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
It is recommended using
|
||||
[binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
- `vmalert` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmalert` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmalert` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmalert` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-prod` from the root folder of the repository.
|
||||
2. Run `make vmalert-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmalert-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
|
||||
#### ARM build
|
||||
### ARM build
|
||||
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
#### Development ARM build
|
||||
### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmalert-arm` or `make vmalert-arm64` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmalert-arm` or `make vmalert-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmalert-arm` or `vmalert-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
#### Production ARM build
|
||||
### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-arm-prod` or `make vmalert-arm64-prod` from the root folder of the repository.
|
||||
2. Run `make vmalert-arm-prod` or `make vmalert-arm64-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmalert-arm-prod` or `vmalert-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
|
||||
// AlertingRule is basic alert entity
|
||||
type AlertingRule struct {
|
||||
Type datasource.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
@@ -50,6 +51,7 @@ type alertingRuleMetrics struct {
|
||||
|
||||
func newAlertingRule(group *Group, cfg config.Rule) *AlertingRule {
|
||||
ar := &AlertingRule{
|
||||
Type: cfg.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Alert,
|
||||
Expr: cfg.Expr,
|
||||
@@ -120,7 +122,7 @@ func (ar *AlertingRule) ID() uint64 {
|
||||
// Exec executes AlertingRule expression via the given Querier.
|
||||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||
func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series bool) ([]prompbmarshal.TimeSeries, error) {
|
||||
qMetrics, err := q.Query(ctx, ar.Expr)
|
||||
qMetrics, err := q.Query(ctx, ar.Expr, ar.Type)
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
|
||||
@@ -137,7 +139,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
||||
}
|
||||
}
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query) }
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query, ar.Type) }
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
@@ -310,6 +312,7 @@ func (ar *AlertingRule) RuleAPI() APIAlertingRule {
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
Type: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Expression: ar.Expr,
|
||||
For: ar.For.String(),
|
||||
@@ -404,7 +407,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||
return fmt.Errorf("querier is nil")
|
||||
}
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query) }
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query, ar.Type) }
|
||||
|
||||
// account for external labels in filter
|
||||
var labelsFilter string
|
||||
@@ -417,7 +420,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||
// remote write protocol which is used for state persistence in vmalert.
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q%s}[%ds])",
|
||||
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
|
||||
qMetrics, err := q.Query(ctx, expr)
|
||||
qMetrics, err := q.Query(ctx, expr, ar.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
@@ -21,6 +23,7 @@ import (
|
||||
// Group contains list of Rules grouped into
|
||||
// entity with one name and evaluation interval
|
||||
type Group struct {
|
||||
Type datasource.Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval time.Duration `yaml:"interval,omitempty"`
|
||||
@@ -44,6 +47,19 @@ func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal group configuration for checksum: %w", err)
|
||||
}
|
||||
// change default value to prometheus datasource.
|
||||
if g.Type.Get() == "" {
|
||||
g.Type.Set(datasource.NewPrometheusType())
|
||||
}
|
||||
// update rules with empty type.
|
||||
for i, r := range g.Rules {
|
||||
if r.Type.Get() == "" {
|
||||
r.Type.Set(g.Type)
|
||||
r.ID = HashRule(r)
|
||||
g.Rules[i] = r
|
||||
}
|
||||
}
|
||||
|
||||
h := md5.New()
|
||||
h.Write(b)
|
||||
g.Checksum = fmt.Sprintf("%x", h.Sum(nil))
|
||||
@@ -58,6 +74,7 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
if len(g.Rules) == 0 {
|
||||
return fmt.Errorf("group %q can't contain no rules", g.Name)
|
||||
}
|
||||
|
||||
uniqueRules := map[uint64]struct{}{}
|
||||
for _, r := range g.Rules {
|
||||
ruleName := r.Record
|
||||
@@ -72,7 +89,13 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if validateExpressions {
|
||||
if _, err := metricsql.Parse(r.Expr); err != nil {
|
||||
// its needed only for tests.
|
||||
// because correct types must be inherited after unmarshalling.
|
||||
exprValidator := g.Type.ValidateExpr
|
||||
if r.Type.Get() != "" {
|
||||
exprValidator = r.Type.ValidateExpr
|
||||
}
|
||||
if err := exprValidator(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
@@ -92,6 +115,7 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
// recording rule or alerting rule.
|
||||
type Rule struct {
|
||||
ID uint64
|
||||
Type datasource.Type `yaml:"type,omitempty"`
|
||||
Record string `yaml:"record,omitempty"`
|
||||
Alert string `yaml:"alert,omitempty"`
|
||||
Expr string `yaml:"expr"`
|
||||
@@ -169,6 +193,7 @@ func HashRule(r Rule) uint64 {
|
||||
h.Write([]byte("alerting"))
|
||||
h.Write([]byte(r.Alert))
|
||||
}
|
||||
h.Write([]byte(r.Type.Get()))
|
||||
kv := sortMap(r.Labels)
|
||||
for _, i := range kv {
|
||||
h.Write([]byte(i.key))
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
@@ -53,6 +55,10 @@ func TestParseBad(t *testing.T) {
|
||||
[]string{"testdata/dir/rules4-bad.rules"},
|
||||
"either `record` or `alert` must be set",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/rules1-bad.rules"},
|
||||
"bad graphite expr",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
_, err := Parse(tc.path, true, true)
|
||||
@@ -215,6 +221,75 @@ func TestGroup_Validate(t *testing.T) {
|
||||
},
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test thanos",
|
||||
Type: datasource.NewRawType("thanos"),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "unknown datasource type",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test graphite",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "some-description",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test prometheus",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "test graphite inherit",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
For: PromDuration{milliseconds: 10},
|
||||
},
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "test graphite prometheus bad expr",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
For: PromDuration{milliseconds: 10},
|
||||
},
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "invalid rule",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
err := tc.group.Validate(tc.validateAnnotations, tc.validateExpressions)
|
||||
|
||||
13
app/vmalert/config/testdata/dir/rules-update0-good.rules
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
groups:
|
||||
- name: TestUpdateGroup
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
rules:
|
||||
- alert: up
|
||||
expr: up == 0
|
||||
for: 30s
|
||||
- alert: up graphite
|
||||
expr: filterSeries(time('host.1',20),'>','0')
|
||||
for: 30s
|
||||
type: graphite
|
||||
12
app/vmalert/config/testdata/dir/rules-update1-good.rules
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
groups:
|
||||
- name: TestUpdateGroup
|
||||
interval: 30s
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: up
|
||||
expr: filterSeries(time('host.2',20),'>','0')
|
||||
for: 30s
|
||||
- alert: up graphite
|
||||
expr: filterSeries(time('host.1',20),'>','0')
|
||||
for: 30s
|
||||
type: graphite
|
||||
12
app/vmalert/config/testdata/rules1-bad.rules
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
groups:
|
||||
- name: TestGraphiteBadGroup
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500) by instance
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
@@ -17,6 +17,7 @@ groups:
|
||||
(up == 1)
|
||||
labels:
|
||||
job: '{{ $labels.job }}'
|
||||
dynamic: '{{ $x := query "up" | first | value }}{{ if eq 1.0 $x }}one{{ else }}unknown{{ end }}'
|
||||
annotations:
|
||||
description: Job {{ $labels.job }} is up!
|
||||
summary: All instances up {{ range query "up" }}
|
||||
|
||||
30
app/vmalert/config/testdata/rules3-good.rules
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
groups:
|
||||
- name: TestGroup
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- name: TestGroupPromMixed
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: sum(vm_tcplistener_conns) by (instance) > 1
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: HostDown
|
||||
type: graphite
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.up),'last','=', 0)
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
@@ -1,12 +1,14 @@
|
||||
package datasource
|
||||
|
||||
import "context"
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
// Querier interface wraps Query method which
|
||||
// executes given query and returns list of Metrics
|
||||
// as result
|
||||
type Querier interface {
|
||||
Query(ctx context.Context, query string) ([]Metric, error)
|
||||
Query(ctx context.Context, query string, engine Type) ([]Metric, error)
|
||||
}
|
||||
|
||||
// Metric is the basic entity which should be return by datasource
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
var (
|
||||
addr = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
appendTypePrefix = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to VMSelect URL.")
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
|
||||
|
||||
@@ -24,6 +25,8 @@ var (
|
||||
|
||||
lookBack = flag.Duration("datasource.lookback", 0, "Lookback defines how far to look into past when evaluating queries. "+
|
||||
"For example, if datasource.lookback=5m then param \"time\" with value now()-5m will be added to every query.")
|
||||
queryStep = flag.Duration("datasource.queryStep", 0, "queryStep defines how far a value can fallback to when evaluating queries. "+
|
||||
"For example, if datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query.")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, "Defines the number of idle (keep-alive connections) to configured datasource."+
|
||||
"Consider to set this value equal to the value: groups_total * group.concurrency. Too low value may result into high number of sockets in TIME_WAIT state.")
|
||||
)
|
||||
@@ -39,5 +42,5 @@ func Init() (Querier, error) {
|
||||
}
|
||||
tr.MaxIdleConns = *maxIdleConnections
|
||||
c := &http.Client{Transport: tr}
|
||||
return NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, *lookBack, c), nil
|
||||
return NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, *lookBack, *queryStep, *appendTypePrefix, c), nil
|
||||
}
|
||||
|
||||
89
app/vmalert/datasource/type.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/graphiteql"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
const graphiteType = "graphite"
|
||||
const prometheusType = "prometheus"
|
||||
|
||||
// Type represents data source type
|
||||
type Type struct {
|
||||
name string
|
||||
}
|
||||
|
||||
// NewPrometheusType returns prometheus datasource type
|
||||
func NewPrometheusType() Type {
|
||||
return Type{name: prometheusType}
|
||||
}
|
||||
|
||||
// NewGraphiteType returns graphite datasource type
|
||||
func NewGraphiteType() Type {
|
||||
return Type{name: graphiteType}
|
||||
}
|
||||
|
||||
// NewRawType returns datasource type from raw string
|
||||
// without validation.
|
||||
func NewRawType(d string) Type {
|
||||
return Type{name: d}
|
||||
}
|
||||
|
||||
// Get returns datasource type
|
||||
func (t *Type) Get() string {
|
||||
return t.name
|
||||
}
|
||||
|
||||
// Set changes datasource type
|
||||
func (t *Type) Set(d Type) {
|
||||
t.name = d.name
|
||||
}
|
||||
|
||||
// String implements String interface with default value.
|
||||
func (t Type) String() string {
|
||||
if t.name == "" {
|
||||
return prometheusType
|
||||
}
|
||||
return t.name
|
||||
}
|
||||
|
||||
// ValidateExpr validates query expression with datasource ql.
|
||||
func (t *Type) ValidateExpr(expr string) error {
|
||||
switch t.name {
|
||||
case graphiteType:
|
||||
if _, err := graphiteql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad graphite expr: %q, err: %w", expr, err)
|
||||
}
|
||||
case "", prometheusType:
|
||||
if _, err := metricsql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q", t.name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t *Type) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
switch s {
|
||||
case "":
|
||||
s = prometheusType
|
||||
case graphiteType, prometheusType:
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q, want %q or %q", s, prometheusType, graphiteType)
|
||||
}
|
||||
t.name = s
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t Type) MarshalYAML() (interface{}, error) {
|
||||
return t.name, nil
|
||||
}
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -46,39 +45,82 @@ func (r response) metrics() ([]Metric, error) {
|
||||
return ms, nil
|
||||
}
|
||||
|
||||
type graphiteResponse []graphiteResponseTarget
|
||||
|
||||
type graphiteResponseTarget struct {
|
||||
Target string `json:"target"`
|
||||
Tags map[string]string `json:"tags"`
|
||||
DataPoints [][2]float64 `json:"datapoints"`
|
||||
}
|
||||
|
||||
func (r graphiteResponse) metrics() []Metric {
|
||||
var ms []Metric
|
||||
for _, res := range r {
|
||||
if len(res.DataPoints) < 1 {
|
||||
continue
|
||||
}
|
||||
var m Metric
|
||||
// add only last value to the result.
|
||||
last := res.DataPoints[len(res.DataPoints)-1]
|
||||
m.Value = last[0]
|
||||
m.Timestamp = int64(last[1])
|
||||
for k, v := range res.Tags {
|
||||
m.AddLabel(k, v)
|
||||
}
|
||||
ms = append(ms, m)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
|
||||
// VMStorage represents vmstorage entity with ability to read and write metrics
|
||||
type VMStorage struct {
|
||||
c *http.Client
|
||||
queryURL string
|
||||
basicAuthUser string
|
||||
basicAuthPass string
|
||||
lookBack time.Duration
|
||||
c *http.Client
|
||||
datasourceURL string
|
||||
basicAuthUser string
|
||||
basicAuthPass string
|
||||
appendTypePrefix bool
|
||||
lookBack time.Duration
|
||||
queryStep time.Duration
|
||||
}
|
||||
|
||||
const queryPath = "/api/v1/query?query="
|
||||
const queryPath = "/api/v1/query"
|
||||
const graphitePath = "/render"
|
||||
|
||||
const prometheusPrefix = "/prometheus"
|
||||
const graphitePrefix = "/graphite"
|
||||
|
||||
// NewVMStorage is a constructor for VMStorage
|
||||
func NewVMStorage(baseURL, basicAuthUser, basicAuthPass string, lookBack time.Duration, c *http.Client) *VMStorage {
|
||||
func NewVMStorage(baseURL, basicAuthUser, basicAuthPass string, lookBack time.Duration, queryStep time.Duration, appendTypePrefix bool, c *http.Client) *VMStorage {
|
||||
return &VMStorage{
|
||||
c: c,
|
||||
basicAuthUser: basicAuthUser,
|
||||
basicAuthPass: basicAuthPass,
|
||||
queryURL: strings.TrimSuffix(baseURL, "/") + queryPath,
|
||||
lookBack: lookBack,
|
||||
c: c,
|
||||
basicAuthUser: basicAuthUser,
|
||||
basicAuthPass: basicAuthPass,
|
||||
datasourceURL: strings.TrimSuffix(baseURL, "/"),
|
||||
appendTypePrefix: appendTypePrefix,
|
||||
lookBack: lookBack,
|
||||
queryStep: queryStep,
|
||||
}
|
||||
}
|
||||
|
||||
// Query reads metrics from datasource by given query
|
||||
func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
|
||||
const (
|
||||
statusSuccess, statusError, rtVector = "success", "error", "vector"
|
||||
)
|
||||
q := s.queryURL + url.QueryEscape(query)
|
||||
if s.lookBack > 0 {
|
||||
lookBack := time.Now().Add(-s.lookBack)
|
||||
q += fmt.Sprintf("&time=%d", lookBack.Unix())
|
||||
// Query reads metrics from datasource by given query and type
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, dataSourceType Type) ([]Metric, error) {
|
||||
switch dataSourceType.name {
|
||||
case "", prometheusType:
|
||||
return s.queryDataSource(ctx, query, s.setPrometheusReqParams, parsePrometheusResponse)
|
||||
case graphiteType:
|
||||
return s.queryDataSource(ctx, query, s.setGraphiteReqParams, parseGraphiteResponse)
|
||||
default:
|
||||
return nil, fmt.Errorf("engine not found: %q", dataSourceType)
|
||||
}
|
||||
req, err := http.NewRequest("POST", q, nil)
|
||||
}
|
||||
|
||||
func (s *VMStorage) queryDataSource(
|
||||
ctx context.Context,
|
||||
query string,
|
||||
setReqParams func(r *http.Request, query string),
|
||||
processResponse func(r *http.Request, resp *http.Response,
|
||||
) ([]Metric, error)) ([]Metric, error) {
|
||||
req, err := http.NewRequest("POST", s.datasourceURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -86,6 +128,7 @@ func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
|
||||
if s.basicAuthPass != "" {
|
||||
req.SetBasicAuth(s.basicAuthUser, s.basicAuthPass)
|
||||
}
|
||||
setReqParams(req, query)
|
||||
resp, err := s.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting response from %s: %w", req.URL, err)
|
||||
@@ -95,9 +138,52 @@ func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("datasource returns unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL, body)
|
||||
}
|
||||
return processResponse(req, resp)
|
||||
}
|
||||
|
||||
func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
|
||||
if s.appendTypePrefix {
|
||||
r.URL.Path += prometheusPrefix
|
||||
}
|
||||
r.URL.Path += queryPath
|
||||
q := r.URL.Query()
|
||||
q.Set("query", query)
|
||||
if s.lookBack > 0 {
|
||||
lookBack := time.Now().Add(-s.lookBack)
|
||||
q.Set("time", fmt.Sprintf("%d", lookBack.Unix()))
|
||||
}
|
||||
if s.queryStep > 0 {
|
||||
q.Set("step", s.queryStep.String())
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
|
||||
func (s *VMStorage) setGraphiteReqParams(r *http.Request, query string) {
|
||||
if s.appendTypePrefix {
|
||||
r.URL.Path += graphitePrefix
|
||||
}
|
||||
r.URL.Path += graphitePath
|
||||
q := r.URL.Query()
|
||||
q.Set("format", "json")
|
||||
q.Set("target", query)
|
||||
from := "-5min"
|
||||
if s.lookBack > 0 {
|
||||
lookBack := time.Now().Add(-s.lookBack)
|
||||
from = strconv.FormatInt(lookBack.Unix(), 10)
|
||||
}
|
||||
q.Set("from", from)
|
||||
q.Set("until", "now")
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
|
||||
const (
|
||||
statusSuccess, statusError, rtVector = "success", "error", "vector"
|
||||
)
|
||||
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
r := &response{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing metrics for %s: %w", req.URL, err)
|
||||
return nil, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL, err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return nil, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL, r.ErrorType, r.Error)
|
||||
@@ -110,3 +196,11 @@ func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
|
||||
}
|
||||
return r.metrics()
|
||||
}
|
||||
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
r := &graphiteResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL, err)
|
||||
}
|
||||
return r.metrics(), nil
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ var (
|
||||
basicAuthName = "foo"
|
||||
basicAuthPass = "bar"
|
||||
query = "vm_rows"
|
||||
queryRender = "constantLine(10)"
|
||||
)
|
||||
|
||||
func TestVMSelectQuery(t *testing.T) {
|
||||
@@ -22,6 +23,13 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, request *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 7:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
@@ -61,26 +69,26 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
am := NewVMStorage(srv.URL, basicAuthName, basicAuthPass, time.Minute, srv.Client())
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
am := NewVMStorage(srv.URL, basicAuthName, basicAuthPass, time.Minute, 0, false, srv.Client())
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected connection error got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected invalid response status error got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected response body error got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected error status got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected unknown status got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected non-vector resultType error got nil")
|
||||
}
|
||||
m, err := am.Query(ctx, query)
|
||||
m, err := am.Query(ctx, query, NewPrometheusType())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -98,4 +106,22 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
m[0].Labels[0].Name != expected.Labels[0].Name {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
m, err = am.Query(ctx, queryRender, NewGraphiteType())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected = Metric{
|
||||
Labels: []Label{{Value: "constantLine(10)", Name: "name"}},
|
||||
Timestamp: 1611758403,
|
||||
Value: 10,
|
||||
}
|
||||
if m[0].Timestamp != expected.Timestamp &&
|
||||
m[0].Value != expected.Value &&
|
||||
m[0].Labels[0].Value != expected.Labels[0].Value &&
|
||||
m[0].Labels[0].Name != expected.Labels[0].Name {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ type Group struct {
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type datasource.Type
|
||||
Interval time.Duration
|
||||
Concurrency int
|
||||
Checksum string
|
||||
@@ -50,6 +51,7 @@ func newGroupMetrics(name, file string) *groupMetrics {
|
||||
|
||||
func newGroup(cfg config.Group, defaultInterval time.Duration, labels map[string]string) *Group {
|
||||
g := &Group{
|
||||
Type: cfg.Type,
|
||||
Name: cfg.Name,
|
||||
File: cfg.File,
|
||||
Interval: cfg.Interval,
|
||||
@@ -99,6 +101,7 @@ func (g *Group) ID() uint64 {
|
||||
hash.Write([]byte(g.File))
|
||||
hash.Write([]byte("\xff"))
|
||||
hash.Write([]byte(g.Name))
|
||||
hash.Write([]byte(g.Type.Get()))
|
||||
return hash.Sum64()
|
||||
}
|
||||
|
||||
@@ -157,6 +160,7 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
for _, nr := range rulesRegistry {
|
||||
newRules = append(newRules, nr)
|
||||
}
|
||||
g.Type = newGroup.Type
|
||||
g.Concurrency = newGroup.Concurrency
|
||||
g.Checksum = newGroup.Checksum
|
||||
g.Rules = newRules
|
||||
|
||||
@@ -38,7 +38,7 @@ func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string) ([]datasource.Metric, error) {
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ datasource.Type) ([]datasource.Metric, error) {
|
||||
fq.Lock()
|
||||
defer fq.Unlock()
|
||||
if fq.err != nil {
|
||||
|
||||
@@ -26,11 +26,11 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
rulePath = flagutil.NewArray("rule", `Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
rulePath = flagutil.NewArray("rule", `Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.`)
|
||||
|
||||
@@ -41,7 +41,7 @@ Rule files may contain %{ENV_VAR} placeholders, which are substituted by the cor
|
||||
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used`)
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used`)
|
||||
externalLabels = flagutil.NewArray("external.label", "Optional label in the form 'name=value' to add to all generated recording rules and alerts. "+
|
||||
"Pass multiple -label flags in order to add multiple label sets.")
|
||||
|
||||
|
||||
@@ -142,6 +142,7 @@ func (g *Group) toAPI() APIGroup {
|
||||
// encode as string to avoid rounding
|
||||
ID: fmt.Sprintf("%d", g.ID()),
|
||||
Name: g.Name,
|
||||
Type: g.Type.String(),
|
||||
File: g.File,
|
||||
Interval: g.Interval.String(),
|
||||
Concurrency: g.Concurrency,
|
||||
|
||||
@@ -9,6 +9,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
@@ -106,6 +108,18 @@ func TestManagerUpdate(t *testing.T) {
|
||||
Name: "ExampleAlertAlwaysFiring",
|
||||
Expr: "sum by(job) (up == 1)",
|
||||
}
|
||||
ExampleAlertGraphite = &AlertingRule{
|
||||
Name: "up graphite",
|
||||
Expr: "filterSeries(time('host.1',20),'>','0')",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
For: defaultEvalInterval,
|
||||
}
|
||||
ExampleAlertGraphite2 = &AlertingRule{
|
||||
Name: "up",
|
||||
Expr: "filterSeries(time('host.2',20),'>','0')",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
For: defaultEvalInterval,
|
||||
}
|
||||
)
|
||||
|
||||
testCases := []struct {
|
||||
@@ -122,6 +136,7 @@ func TestManagerUpdate(t *testing.T) {
|
||||
{
|
||||
File: "config/testdata/dir/rules1-good.rules",
|
||||
Name: "duplicatedGroupDiffFiles",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Interval: defaultEvalInterval,
|
||||
Rules: []Rule{
|
||||
&AlertingRule{
|
||||
@@ -146,12 +161,14 @@ func TestManagerUpdate(t *testing.T) {
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Rules: []Rule{VMRows},
|
||||
Interval: defaultEvalInterval,
|
||||
},
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Name: "TestGroup", Rules: []Rule{
|
||||
Conns,
|
||||
ExampleAlertAlwaysFiring,
|
||||
@@ -166,13 +183,16 @@ func TestManagerUpdate(t *testing.T) {
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Interval: defaultEvalInterval,
|
||||
Rules: []Rule{VMRows},
|
||||
},
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Name: "TestGroup", Rules: []Rule{
|
||||
Name: "TestGroup",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
Conns,
|
||||
ExampleAlertAlwaysFiring,
|
||||
}},
|
||||
@@ -186,12 +206,14 @@ func TestManagerUpdate(t *testing.T) {
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Interval: defaultEvalInterval,
|
||||
Rules: []Rule{VMRows},
|
||||
},
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Name: "TestGroup", Rules: []Rule{
|
||||
Conns,
|
||||
ExampleAlertAlwaysFiring,
|
||||
@@ -199,6 +221,23 @@ func TestManagerUpdate(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update prometheus to graphite type",
|
||||
initPath: "config/testdata/dir/rules-update0-good.rules",
|
||||
updatePath: "config/testdata/dir/rules-update1-good.rules",
|
||||
want: []*Group{
|
||||
{
|
||||
File: "config/testdata/dir/rules-update1-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Name: "TestUpdateGroup",
|
||||
Rules: []Rule{
|
||||
ExampleAlertGraphite2,
|
||||
ExampleAlertGraphite,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
|
||||
12
app/vmalert/multiarch/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8880
|
||||
ENTRYPOINT ["/vmalert-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmalert-${TARGETARCH}-prod ./vmalert-prod
|
||||
@@ -1,7 +1,6 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
@@ -11,10 +10,10 @@ import (
|
||||
|
||||
var (
|
||||
addrs = flagutil.NewArray("notifier.url", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
|
||||
basicAuthUsername = flagutil.NewArray("notifier.basicAuth.username", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flagutil.NewArray("notifier.basicAuth.password", "Optional basic auth password for -datasource.url")
|
||||
basicAuthUsername = flagutil.NewArray("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArray("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("notifier.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -notifier.url")
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("notifier.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to -notifier.url")
|
||||
tlsCertFile = flagutil.NewArray("notifier.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
tlsKeyFile = flagutil.NewArray("notifier.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
tlsCAFile = flagutil.NewArray("notifier.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
@@ -33,7 +32,7 @@ func Init(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
for i, addr := range *addrs {
|
||||
cert, key := tlsCertFile.GetOptionalArg(i), tlsKeyFile.GetOptionalArg(i)
|
||||
ca, serverName := tlsCAFile.GetOptionalArg(i), tlsServerName.GetOptionalArg(i)
|
||||
tr, err := utils.Transport(addr, cert, key, ca, serverName, *tlsInsecureSkipVerify)
|
||||
tr, err := utils.Transport(addr, cert, key, ca, serverName, tlsInsecureSkipVerify.GetOptionalArg(i))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
|
||||
@@ -167,6 +167,10 @@ func InitTemplateFunc(externalURL *url.URL) {
|
||||
"queryEscape": func(q string) string {
|
||||
return url.QueryEscape(q)
|
||||
},
|
||||
"crlfEscape": func(q string) string {
|
||||
q = strings.Replace(q, "\n", `\n`, -1)
|
||||
return strings.Replace(q, "\r", `\r`, -1)
|
||||
},
|
||||
"quotesEscape": func(q string) string {
|
||||
return strings.Replace(q, `"`, `\"`, -1)
|
||||
},
|
||||
@@ -174,7 +178,9 @@ func InitTemplateFunc(externalURL *url.URL) {
|
||||
// it is present here only for validation purposes, when there is no
|
||||
// provided datasource.
|
||||
"query": func(q string) ([]datasource.Metric, error) {
|
||||
return nil, nil
|
||||
// return non-empty slice to pass validation with chained functions in template
|
||||
// see issue #989 for details
|
||||
return []datasource.Metric{{}}, nil
|
||||
},
|
||||
"first": func(metrics []datasource.Metric) (datasource.Metric, error) {
|
||||
if len(metrics) > 0 {
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
// to evaluate configured Expression and
|
||||
// return TimeSeries as result.
|
||||
type RecordingRule struct {
|
||||
Type datasource.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
@@ -53,6 +54,7 @@ func (rr *RecordingRule) ID() uint64 {
|
||||
|
||||
func newRecordingRule(group *Group, cfg config.Rule) *RecordingRule {
|
||||
rr := &RecordingRule{
|
||||
Type: cfg.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
@@ -60,6 +62,7 @@ func newRecordingRule(group *Group, cfg config.Rule) *RecordingRule {
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = getOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
@@ -84,8 +87,7 @@ func (rr *RecordingRule) Exec(ctx context.Context, q datasource.Querier, series
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
qMetrics, err := q.Query(ctx, rr.Expr)
|
||||
|
||||
qMetrics, err := q.Query(ctx, rr.Expr, rr.Type)
|
||||
rr.mu.Lock()
|
||||
defer rr.mu.Unlock()
|
||||
|
||||
@@ -162,6 +164,7 @@ func (rr *RecordingRule) RuleAPI() APIRecordingRule {
|
||||
ID: fmt.Sprintf("%d", rr.ID()),
|
||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||
Name: rr.Name,
|
||||
Type: rr.Type.String(),
|
||||
Expression: rr.Expr,
|
||||
LastError: lastErr,
|
||||
LastExec: rr.lastExecTime,
|
||||
|
||||
@@ -35,5 +35,5 @@ func Init() (datasource.Querier, error) {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
c := &http.Client{Transport: tr}
|
||||
return datasource.NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, 0, c), nil
|
||||
return datasource.NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, 0, 0, false, c), nil
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
Timeout: cfg.WriteTimeout,
|
||||
Transport: cfg.Transport,
|
||||
},
|
||||
addr: strings.TrimSuffix(cfg.Addr, "/") + writePath,
|
||||
addr: strings.TrimSuffix(cfg.Addr, "/"),
|
||||
baUser: cfg.BasicAuthUser,
|
||||
baPass: cfg.BasicAuthPass,
|
||||
flushInterval: cfg.FlushInterval,
|
||||
@@ -231,6 +231,7 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
if c.baPass != "" {
|
||||
req.SetBasicAuth(c.baUser, c.baPass)
|
||||
}
|
||||
req.URL.Path += writePath
|
||||
resp, err := c.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
|
||||
|
||||
@@ -29,6 +29,9 @@ var pathList = [][]string{
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
switch r.URL.Path {
|
||||
case "/":
|
||||
if r.Method != "GET" {
|
||||
return false
|
||||
}
|
||||
for _, path := range pathList {
|
||||
p, doc := path[0], path[1]
|
||||
fmt.Fprintf(w, "<a href='%s'>%q</a> - %s<br/>", p, p, doc)
|
||||
|
||||
@@ -21,6 +21,7 @@ type APIAlert struct {
|
||||
// APIGroup represents Group for WEB view
|
||||
type APIGroup struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
ID string `json:"id"`
|
||||
File string `json:"file"`
|
||||
Interval string `json:"interval"`
|
||||
@@ -33,6 +34,7 @@ type APIGroup struct {
|
||||
type APIAlertingRule struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
GroupID string `json:"group_id"`
|
||||
Expression string `json:"expression"`
|
||||
For string `json:"for"`
|
||||
@@ -46,6 +48,7 @@ type APIAlertingRule struct {
|
||||
type APIRecordingRule struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
GroupID string `json:"group_id"`
|
||||
Expression string `json:"expression"`
|
||||
LastError string `json:"last_error"`
|
||||
|
||||
@@ -77,3 +77,9 @@ vmauth-local-with-goarch:
|
||||
|
||||
vmauth-pure:
|
||||
APP_NAME=vmauth $(MAKE) app-local-pure
|
||||
|
||||
vmauth-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=vmauth $(MAKE) app-local-windows-with-goarch
|
||||
|
||||
vmauth-windows-amd64-prod:
|
||||
APP_NAME=vmauth $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
## vmauth
|
||||
# vmauth
|
||||
|
||||
`vmauth` is a simple auth proxy and router for [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It reads username and password from [Basic Auth headers](https://en.wikipedia.org/wiki/Basic_access_authentication),
|
||||
matches them against configs pointed by `-auth.config` command-line flag and proxies incoming HTTP requests to the configured per-user `url_prefix` on successful match.
|
||||
|
||||
|
||||
### Quick start
|
||||
## Quick start
|
||||
|
||||
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
|
||||
and pass the following flag to `vmauth` binary in order to start authorizing and routing requests:
|
||||
@@ -23,10 +23,11 @@ Docker images for `vmauth` are available [here](https://hub.docker.com/r/victori
|
||||
|
||||
Pass `-help` to `vmauth` in order to see all the supported command-line flags with their descriptions.
|
||||
|
||||
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML, accounting, limits, etc.
|
||||
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML,
|
||||
accounting and rate limiting such as [vmgateway](https://victoriametrics.github.io/vmgateway.html).
|
||||
|
||||
|
||||
### Auth config
|
||||
## Auth config
|
||||
|
||||
Auth config is represented in the following simple `yml` format:
|
||||
|
||||
@@ -36,11 +37,15 @@ Auth config is represented in the following simple `yml` format:
|
||||
# Usernames must be unique.
|
||||
|
||||
users:
|
||||
# Requests with the 'Authorization: Bearer XXXX' header are proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
- bearer_token: "XXXX"
|
||||
url_prefix: "http://localhost:8428"
|
||||
|
||||
# The user for querying local single-node VictoriaMetrics.
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query
|
||||
# will be proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
- username: "local-single-node"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428"
|
||||
@@ -48,27 +53,40 @@ users:
|
||||
# The user for querying account 123 in VictoriaMetrics cluster
|
||||
# See https://victoriametrics.github.io/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://vmselect:8481/select/123/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://vmselect:8481/select/123/prometheus/api/v1/select
|
||||
# will be proxied to http://vmselect:8481/select/123/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect:8481/select/123/prometheus/api/v1/select
|
||||
- username: "cluster-select-account-123"
|
||||
password: "***"
|
||||
url_prefix: "http://vmselect:8481/select/123/prometheus"
|
||||
|
||||
# The user for inserting Prometheus data into VictoriaMetrics cluster under account 42
|
||||
# See https://victoriametrics.github.io/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the reuqests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://vminsert:8480/insert/42/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/write is routed to http://vminsert:8480/insert/42/prometheus/api/v1/write
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be proxied to http://vminsert:8480/insert/42/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/write is proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write
|
||||
- username: "cluster-insert-account-42"
|
||||
password: "***"
|
||||
url_prefix: "http://vminsert:8480/insert/42/prometheus"
|
||||
|
||||
|
||||
# A single user for querying and inserting data:
|
||||
# - Requests to http://vmauth:8427/api/v1/query, http://vmauth:8427/api/v1/query_range
|
||||
# and http://vmauth:8427/api/v1/label/<label_name>/values are proxied to http://vmselect:8481/select/42/prometheus.
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect:8480/select/42/prometheus/api/v1/query
|
||||
# - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write
|
||||
- username: "foobar"
|
||||
url_map:
|
||||
- src_paths: ["/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^/]+/values"]
|
||||
url_prefix: "http://vmselect:8481/select/42/prometheus"
|
||||
- src_paths: ["/api/v1/write"]
|
||||
url_prefix: "http://vminsert:8480/insert/42/prometheus"
|
||||
```
|
||||
|
||||
The config may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
This may be useful for passing secrets to the config.
|
||||
|
||||
|
||||
### Security
|
||||
## Security
|
||||
|
||||
Do not transfer Basic Auth headers in plaintext over untrusted networks. Enable https. This can be done by passing the following `-tls*` command-line flags to `vmauth`:
|
||||
|
||||
@@ -84,30 +102,30 @@ Do not transfer Basic Auth headers in plaintext over untrusted networks. Enable
|
||||
Alternatively, [https termination proxy](https://en.wikipedia.org/wiki/TLS_termination_proxy) may be put in front of `vmauth`.
|
||||
|
||||
|
||||
### Monitoring
|
||||
## Monitoring
|
||||
|
||||
`vmauth` exports various metrics in Prometheus exposition format at `http://vmauth-host:8427/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://victoriametrics.github.io/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
|
||||
### How to build from sources
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmauth` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmauth` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmauth` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmauth` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmauth-prod` from the root folder of the repository.
|
||||
2. Run `make vmauth-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmauth-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Building docker images
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmauth`. It builds `victoriametrics/vmauth:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
@@ -121,7 +139,7 @@ ROOT_IMAGE=scratch make package-vmauth
|
||||
```
|
||||
|
||||
|
||||
### Profiling
|
||||
## Profiling
|
||||
|
||||
`vmauth` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
@@ -142,7 +160,7 @@ The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
|
||||
|
||||
### Advanced usage
|
||||
## Advanced usage
|
||||
|
||||
Pass `-help` command-line arg to `vmauth` in order to see all the configuration options:
|
||||
|
||||
@@ -161,6 +179,8 @@ See the docs at https://victoriametrics.github.io/vmauth.html .
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help spreading incoming load among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
@@ -179,17 +199,23 @@ See the docs at https://victoriametrics.github.io/vmauth.html .
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections (default ":8427")
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-memory.allowedBytes value
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -28,13 +30,60 @@ type AuthConfig struct {
|
||||
|
||||
// UserInfo is user information read from authConfigPath
|
||||
type UserInfo struct {
|
||||
Username string `yaml:"username"`
|
||||
Password string `yaml:"password"`
|
||||
URLPrefix string `yaml:"url_prefix"`
|
||||
BearerToken string `yaml:"bearer_token"`
|
||||
Username string `yaml:"username"`
|
||||
Password string `yaml:"password"`
|
||||
URLPrefix string `yaml:"url_prefix"`
|
||||
URLMap []URLMap `yaml:"url_map"`
|
||||
|
||||
requests *metrics.Counter
|
||||
}
|
||||
|
||||
// URLMap is a mapping from source paths to target urls.
|
||||
type URLMap struct {
|
||||
SrcPaths []*SrcPath `yaml:"src_paths"`
|
||||
URLPrefix string `yaml:"url_prefix"`
|
||||
}
|
||||
|
||||
// SrcPath represents an src path
|
||||
type SrcPath struct {
|
||||
sOriginal string
|
||||
re *regexp.Regexp
|
||||
}
|
||||
|
||||
func (sp *SrcPath) match(s string) bool {
|
||||
prefix, ok := sp.re.LiteralPrefix()
|
||||
if ok {
|
||||
// Fast path - literal match
|
||||
return s == prefix
|
||||
}
|
||||
if !strings.HasPrefix(s, prefix) {
|
||||
return false
|
||||
}
|
||||
return sp.re.MatchString(s)
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements yaml.Unmarshaler
|
||||
func (sp *SrcPath) UnmarshalYAML(f func(interface{}) error) error {
|
||||
var s string
|
||||
if err := f(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
sAnchored := "^(?:" + s + ")$"
|
||||
re, err := regexp.Compile(sAnchored)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot build regexp from %q: %w", s, err)
|
||||
}
|
||||
sp.sOriginal = s
|
||||
sp.re = re
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalYAML implements yaml.Marshaler.
|
||||
func (sp *SrcPath) MarshalYAML() (interface{}, error) {
|
||||
return sp.sOriginal, nil
|
||||
}
|
||||
|
||||
func initAuthConfig() {
|
||||
if len(*authConfigPath) == 0 {
|
||||
logger.Fatalf("missing required `-auth.config` command-line flag")
|
||||
@@ -103,29 +152,87 @@ func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
if len(uis) == 0 {
|
||||
return nil, fmt.Errorf("`users` section cannot be empty in AuthConfig")
|
||||
}
|
||||
m := make(map[string]*UserInfo, len(uis))
|
||||
byAuthToken := make(map[string]*UserInfo, len(uis))
|
||||
byUsername := make(map[string]bool, len(uis))
|
||||
byBearerToken := make(map[string]bool, len(uis))
|
||||
for i := range uis {
|
||||
ui := &uis[i]
|
||||
if m[ui.Username] != nil {
|
||||
if ui.BearerToken == "" && ui.Username == "" {
|
||||
return nil, fmt.Errorf("either bearer_token or username must be set")
|
||||
}
|
||||
if ui.BearerToken != "" && ui.Username != "" {
|
||||
return nil, fmt.Errorf("bearer_token=%q and username=%q cannot be set simultaneously", ui.BearerToken, ui.Username)
|
||||
}
|
||||
if byBearerToken[ui.BearerToken] {
|
||||
return nil, fmt.Errorf("duplicate bearer_token found; bearer_token: %q", ui.BearerToken)
|
||||
}
|
||||
if byUsername[ui.Username] {
|
||||
return nil, fmt.Errorf("duplicate username found; username: %q", ui.Username)
|
||||
}
|
||||
urlPrefix := ui.URLPrefix
|
||||
// Remove trailing '/' from urlPrefix
|
||||
for strings.HasSuffix(urlPrefix, "/") {
|
||||
urlPrefix = urlPrefix[:len(urlPrefix)-1]
|
||||
authToken := getAuthToken(ui.BearerToken, ui.Username, ui.Password)
|
||||
if byAuthToken[authToken] != nil {
|
||||
return nil, fmt.Errorf("duplicate auth token found for bearer_token=%q, username=%q: %q", authToken, ui.BearerToken, ui.Username)
|
||||
}
|
||||
// Validate urlPrefix
|
||||
target, err := url.Parse(urlPrefix)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid `url_prefix: %q`: %w", urlPrefix, err)
|
||||
if len(ui.URLPrefix) > 0 {
|
||||
urlPrefix, err := sanitizeURLPrefix(ui.URLPrefix)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ui.URLPrefix = urlPrefix
|
||||
}
|
||||
if target.Scheme != "http" && target.Scheme != "https" {
|
||||
return nil, fmt.Errorf("unsupported scheme for `url_prefix: %q`: %q; must be `http` or `https`", urlPrefix, target.Scheme)
|
||||
for _, e := range ui.URLMap {
|
||||
if len(e.SrcPaths) == 0 {
|
||||
return nil, fmt.Errorf("missing `src_paths`")
|
||||
}
|
||||
urlPrefix, err := sanitizeURLPrefix(e.URLPrefix)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e.URLPrefix = urlPrefix
|
||||
}
|
||||
|
||||
ui.URLPrefix = urlPrefix
|
||||
ui.requests = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_requests_total{username=%q}`, ui.Username))
|
||||
m[ui.Username] = ui
|
||||
if len(ui.URLMap) == 0 && len(ui.URLPrefix) == 0 {
|
||||
return nil, fmt.Errorf("missing `url_prefix`")
|
||||
}
|
||||
if ui.BearerToken != "" {
|
||||
if ui.Password != "" {
|
||||
return nil, fmt.Errorf("password shouldn't be set for bearer_token %q", ui.BearerToken)
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_user_requests_total{username="bearer_token"}`)
|
||||
byBearerToken[ui.BearerToken] = true
|
||||
}
|
||||
if ui.Username != "" {
|
||||
ui.requests = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_requests_total{username=%q}`, ui.Username))
|
||||
byUsername[ui.Username] = true
|
||||
}
|
||||
byAuthToken[authToken] = ui
|
||||
}
|
||||
return m, nil
|
||||
return byAuthToken, nil
|
||||
}
|
||||
|
||||
func getAuthToken(bearerToken, username, password string) string {
|
||||
if bearerToken != "" {
|
||||
return "Bearer " + bearerToken
|
||||
}
|
||||
token := username + ":" + password
|
||||
token64 := base64.StdEncoding.EncodeToString([]byte(token))
|
||||
return "Basic " + token64
|
||||
}
|
||||
|
||||
func sanitizeURLPrefix(urlPrefix string) (string, error) {
|
||||
// Remove trailing '/' from urlPrefix
|
||||
for strings.HasSuffix(urlPrefix, "/") {
|
||||
urlPrefix = urlPrefix[:len(urlPrefix)-1]
|
||||
}
|
||||
// Validate urlPrefix
|
||||
target, err := url.Parse(urlPrefix)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid `url_prefix: %q`: %w", urlPrefix, err)
|
||||
}
|
||||
if target.Scheme != "http" && target.Scheme != "https" {
|
||||
return "", fmt.Errorf("unsupported scheme for `url_prefix: %q`: %q; must be `http` or `https`", urlPrefix, target.Scheme)
|
||||
}
|
||||
if target.Host == "" {
|
||||
return "", fmt.Errorf("missing hostname in `url_prefix %q`", urlPrefix)
|
||||
}
|
||||
return urlPrefix, nil
|
||||
}
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
func TestParseAuthConfigFailure(t *testing.T) {
|
||||
@@ -46,6 +50,27 @@ users:
|
||||
- username: foo
|
||||
url_prefix: //bar
|
||||
`)
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
url_prefix: http:///bar
|
||||
`)
|
||||
|
||||
// Username and bearer_token in a single config
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
bearer_token: bbb
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
|
||||
// Bearer_token and password in a single config
|
||||
f(`
|
||||
users:
|
||||
- password: foo
|
||||
bearer_token: bbb
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
|
||||
// Duplicate users
|
||||
f(`
|
||||
@@ -57,6 +82,42 @@ users:
|
||||
- username: foo
|
||||
url_prefix: https://sss.sss
|
||||
`)
|
||||
|
||||
// Duplicate bearer_tokens
|
||||
f(`
|
||||
users:
|
||||
- bearer_token: foo
|
||||
url_prefix: http://foo.bar
|
||||
- username: bar
|
||||
url_prefix: http://xxx.yyy
|
||||
- bearer_token: foo
|
||||
url_prefix: https://sss.sss
|
||||
`)
|
||||
|
||||
// Missing url_prefix in url_map
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
url_map:
|
||||
- src_paths: ["/foo/bar"]
|
||||
`)
|
||||
|
||||
// Missing src_paths in url_map
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
url_map:
|
||||
- url_prefix: http://foobar
|
||||
`)
|
||||
|
||||
// Invalid regexp in src_path.
|
||||
f(`
|
||||
users:
|
||||
- username: a
|
||||
url_map:
|
||||
- src_paths: ['fo[obar']
|
||||
url_prefix: http://foobar
|
||||
`)
|
||||
}
|
||||
|
||||
func TestParseAuthConfigSuccess(t *testing.T) {
|
||||
@@ -67,8 +128,8 @@ func TestParseAuthConfigSuccess(t *testing.T) {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
removeMetrics(m)
|
||||
if !reflect.DeepEqual(m, expectedAuthConfig) {
|
||||
t.Fatalf("unexpected auth config\ngot\n%v\nwant\n%v", m, expectedAuthConfig)
|
||||
if err := areEqualConfigs(m, expectedAuthConfig); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +140,7 @@ users:
|
||||
password: bar
|
||||
url_prefix: http://aaa:343/bbb
|
||||
`, map[string]*UserInfo{
|
||||
"foo": {
|
||||
getAuthToken("", "foo", "bar"): {
|
||||
Username: "foo",
|
||||
Password: "bar",
|
||||
URLPrefix: "http://aaa:343/bbb",
|
||||
@@ -94,15 +155,51 @@ users:
|
||||
- username: bar
|
||||
url_prefix: https://bar/x///
|
||||
`, map[string]*UserInfo{
|
||||
"foo": {
|
||||
getAuthToken("", "foo", ""): {
|
||||
Username: "foo",
|
||||
URLPrefix: "http://foo",
|
||||
},
|
||||
"bar": {
|
||||
getAuthToken("", "bar", ""): {
|
||||
Username: "bar",
|
||||
URLPrefix: "https://bar/x",
|
||||
},
|
||||
})
|
||||
|
||||
// non-empty URLMap
|
||||
f(`
|
||||
users:
|
||||
- bearer_token: foo
|
||||
url_map:
|
||||
- src_paths: ["/api/v1/query","/api/v1/query_range","/api/v1/label/[^./]+/.+"]
|
||||
url_prefix: http://vmselect/select/0/prometheus
|
||||
- src_paths: ["/api/v1/write"]
|
||||
url_prefix: http://vminsert/insert/0/prometheus
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("foo", "", ""): {
|
||||
BearerToken: "foo",
|
||||
URLMap: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
URLPrefix: "http://vmselect/select/0/prometheus",
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
URLPrefix: "http://vminsert/insert/0/prometheus",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func getSrcPaths(paths []string) []*SrcPath {
|
||||
var sps []*SrcPath
|
||||
for _, path := range paths {
|
||||
sps = append(sps, &SrcPath{
|
||||
sOriginal: path,
|
||||
re: regexp.MustCompile("^(?:" + path + ")$"),
|
||||
})
|
||||
}
|
||||
return sps
|
||||
}
|
||||
|
||||
func removeMetrics(m map[string]*UserInfo) {
|
||||
@@ -110,3 +207,18 @@ func removeMetrics(m map[string]*UserInfo) {
|
||||
info.requests = nil
|
||||
}
|
||||
}
|
||||
|
||||
func areEqualConfigs(a, b map[string]*UserInfo) error {
|
||||
aData, err := yaml.Marshal(a)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot marshal a: %w", err)
|
||||
}
|
||||
bData, err := yaml.Marshal(b)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot marshal b: %w", err)
|
||||
}
|
||||
if !bytes.Equal(aData, bData) {
|
||||
return fmt.Errorf("unexpected configs;\ngot\n%s\nwant\n%s", aData, bData)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -47,21 +47,24 @@ func main() {
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
username, password, ok := r.BasicAuth()
|
||||
if !ok {
|
||||
authToken := r.Header.Get("Authorization")
|
||||
if authToken == "" {
|
||||
w.Header().Set("WWW-Authenticate", `Basic realm="Restricted"`)
|
||||
http.Error(w, "missing `Authorization: Basic *` header", http.StatusUnauthorized)
|
||||
http.Error(w, "missing `Authorization` request header", http.StatusUnauthorized)
|
||||
return true
|
||||
}
|
||||
ac := authConfig.Load().(map[string]*UserInfo)
|
||||
info := ac[username]
|
||||
if info == nil || info.Password != password {
|
||||
httpserver.Errorf(w, r, "cannot find the provided username %q or password in config", username)
|
||||
ui := ac[authToken]
|
||||
if ui == nil {
|
||||
httpserver.Errorf(w, r, "cannot find the provided auth token %q in config", authToken)
|
||||
return true
|
||||
}
|
||||
ui.requests.Inc()
|
||||
targetURL, err := createTargetURL(ui, r.URL)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot determine targetURL: %s", err)
|
||||
return true
|
||||
}
|
||||
info.requests.Inc()
|
||||
|
||||
targetURL := createTargetURL(info.URLPrefix, r.URL)
|
||||
if _, err := url.Parse(targetURL); err != nil {
|
||||
httpserver.Errorf(w, r, "invalid targetURL=%q: %s", targetURL, err)
|
||||
return true
|
||||
|
||||
12
app/vmauth/multiarch/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8427
|
||||
ENTRYPOINT ["/vmauth-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmauth-${TARGETARCH}-prod ./vmauth-prod
|
||||
@@ -1,16 +1,31 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func createTargetURL(prefix string, u *url.URL) string {
|
||||
func createTargetURL(ui *UserInfo, uOrig *url.URL) (string, error) {
|
||||
u, err := url.Parse(uOrig.String())
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("cannot make a copy of %q: %w", u, err)
|
||||
}
|
||||
// Prevent from attacks with using `..` in r.URL.Path
|
||||
u.Path = path.Clean(u.Path)
|
||||
if !strings.HasPrefix(u.Path, "/") {
|
||||
u.Path = "/" + u.Path
|
||||
}
|
||||
return prefix + u.RequestURI()
|
||||
for _, e := range ui.URLMap {
|
||||
for _, sp := range e.SrcPaths {
|
||||
if sp.match(u.Path) {
|
||||
return e.URLPrefix + u.RequestURI(), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(ui.URLPrefix) > 0 {
|
||||
return ui.URLPrefix + u.RequestURI(), nil
|
||||
}
|
||||
return "", fmt.Errorf("missing route for %q", u)
|
||||
}
|
||||
|
||||
@@ -5,22 +5,102 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCreateTargetURL(t *testing.T) {
|
||||
f := func(prefix, requestURI, expectedTarget string) {
|
||||
func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
f := func(ui *UserInfo, requestURI, expectedTarget string) {
|
||||
t.Helper()
|
||||
u, err := url.Parse(requestURI)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
target := createTargetURL(prefix, u)
|
||||
target, err := createTargetURL(ui, u)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if target != expectedTarget {
|
||||
t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
|
||||
}
|
||||
}
|
||||
f("http://foo.bar", "", "http://foo.bar/.")
|
||||
f("http://foo.bar", "/", "http://foo.bar/")
|
||||
f("http://foo.bar", "a/b?c=d", "http://foo.bar/a/b?c=d")
|
||||
f("https://sss:3894/x/y", "/z", "https://sss:3894/x/y/z")
|
||||
f("https://sss:3894/x/y", "/../../aaa", "https://sss:3894/x/y/aaa")
|
||||
f("https://sss:3894/x/y", "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s/../d")
|
||||
// Simple routing with `url_prefix`
|
||||
f(&UserInfo{
|
||||
URLPrefix: "http://foo.bar",
|
||||
}, "", "http://foo.bar/.")
|
||||
f(&UserInfo{
|
||||
URLPrefix: "http://foo.bar",
|
||||
}, "/", "http://foo.bar/")
|
||||
f(&UserInfo{
|
||||
URLPrefix: "http://foo.bar",
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d")
|
||||
f(&UserInfo{
|
||||
URLPrefix: "https://sss:3894/x/y",
|
||||
}, "/z", "https://sss:3894/x/y/z")
|
||||
f(&UserInfo{
|
||||
URLPrefix: "https://sss:3894/x/y",
|
||||
}, "/../../aaa", "https://sss:3894/x/y/aaa")
|
||||
f(&UserInfo{
|
||||
URLPrefix: "https://sss:3894/x/y",
|
||||
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s/../d")
|
||||
|
||||
// Complex routing with `url_map`
|
||||
ui := &UserInfo{
|
||||
URLMap: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
|
||||
URLPrefix: "http://vmselect/0/prometheus",
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
URLPrefix: "http://vminsert/0/prometheus",
|
||||
},
|
||||
},
|
||||
URLPrefix: "http://default-server",
|
||||
}
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up")
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write")
|
||||
f(ui, "/api/v1/query_range", "http://default-server/api/v1/query_range")
|
||||
|
||||
// Complex routing regexp paths in `url_map`
|
||||
ui = &UserInfo{
|
||||
URLMap: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query(_range)?", "/api/v1/label/[^/]+/values"}),
|
||||
URLPrefix: "http://vmselect/0/prometheus",
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
URLPrefix: "http://vminsert/0/prometheus",
|
||||
},
|
||||
},
|
||||
URLPrefix: "http://default-server",
|
||||
}
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up")
|
||||
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up")
|
||||
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values")
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write")
|
||||
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar")
|
||||
}
|
||||
|
||||
func TestCreateTargetURLFailure(t *testing.T) {
|
||||
f := func(ui *UserInfo, requestURI string) {
|
||||
t.Helper()
|
||||
u, err := url.Parse(requestURI)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
target, err := createTargetURL(ui, u)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if target != "" {
|
||||
t.Fatalf("unexpected target=%q; want empty string", target)
|
||||
}
|
||||
}
|
||||
f(&UserInfo{}, "/foo/bar")
|
||||
f(&UserInfo{
|
||||
URLMap: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
|
||||
URLPrefix: "http://foobar/baz",
|
||||
},
|
||||
},
|
||||
}, "/api/v1/write")
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
## vmbackup
|
||||
# vmbackup
|
||||
|
||||
`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
|
||||
@@ -19,13 +19,13 @@ Backed up data can be restored with [vmrestore](https://victoriametrics.github.i
|
||||
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
|
||||
See also [vmbackuper](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) tool built on top of `vmbackup`. This tool simplifies
|
||||
See also [vmbackupmanager](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) tool built on top of `vmbackup`. This tool simplifies
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
|
||||
|
||||
### Use cases
|
||||
## Use cases
|
||||
|
||||
#### Regular backups
|
||||
### Regular backups
|
||||
|
||||
Regular backup can be performed with the following command:
|
||||
|
||||
@@ -40,7 +40,7 @@ vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-
|
||||
* `<path/to/new/backup>` is the destination path where new backup will be placed.
|
||||
|
||||
|
||||
#### Regular backups with server-side copy from existing backup
|
||||
### Regular backups with server-side copy from existing backup
|
||||
|
||||
If the destination GCS bucket already contains the previous backup at `-origin` path, then new backup can be sped up
|
||||
with the following command:
|
||||
@@ -52,7 +52,7 @@ vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-
|
||||
It saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
|
||||
|
||||
|
||||
#### Incremental backups
|
||||
### Incremental backups
|
||||
|
||||
Incremental backups performed if `-dst` points to an already existing backup. In this case only new data uploaded to remote storage.
|
||||
It saves time and network bandwidth costs when working with big backups:
|
||||
@@ -62,7 +62,7 @@ vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-
|
||||
```
|
||||
|
||||
|
||||
#### Smart backups
|
||||
### Smart backups
|
||||
|
||||
Smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
|
||||
|
||||
@@ -89,10 +89,10 @@ or from any day (`YYYYMMDD` backups). Note that hourly backup shouldn't run when
|
||||
|
||||
Do not forget removing old snapshots and backups when they are no longer needed for saving storage costs.
|
||||
|
||||
See also [vmbackuper tool](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for automating smart backups.
|
||||
See also [vmbackupmanager tool](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for automating smart backups.
|
||||
|
||||
|
||||
### How does it work?
|
||||
## How does it work?
|
||||
|
||||
The backup algorithm is the following:
|
||||
|
||||
@@ -118,7 +118,7 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
`vmbackup` can work improperly or slowly when these properties are violated.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
## Troubleshooting
|
||||
|
||||
* If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
|
||||
* If `vmbackup` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
|
||||
@@ -127,7 +127,7 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
at [cluster VictoriaMetrics](https://victoriametrics.github.io/Cluster-VictoriaMetrics.html) and vice versa.
|
||||
|
||||
|
||||
### Advanced usage
|
||||
## Advanced usage
|
||||
|
||||
|
||||
* Obtaining credentials from a file.
|
||||
@@ -191,20 +191,26 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond value
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit
|
||||
-maxBytesPerSecond size
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedBytes value
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-origin string
|
||||
@@ -222,24 +228,24 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
```
|
||||
|
||||
|
||||
### How to build from sources
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - see `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmbackup` from the root folder of the repository.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmbackup` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmbackup` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmbackup-prod` from the root folder of the repository.
|
||||
2. Run `make vmbackup-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmbackup-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Building docker images
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
|
||||
11
app/vmbackup/multiarch/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
ENTRYPOINT ["/vmbackup-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmbackup-${TARGETARCH}-prod ./vmbackup-prod
|
||||
144
app/vmbackupmanager/README.md
Normal file
@@ -0,0 +1,144 @@
|
||||
## Victoria Metrics Backup Manager
|
||||
|
||||
This service automates regular backup procedures. It supports the following backup intervals: **hourly**, **daily**, **weekly** and **monthly**. Multiple backup intervals may be configured simultaneously. I.e. the backup manager creates hourly backups every hour, while it creates daily backups every day, etc. Backup manager must have read access to the storage data, so best practice is to install it on the same machine (or as a sidecar) where the storage node is installed.
|
||||
The backup service makes a backup every hour and puts it to the latest folder and then copies data to the folders which represent the backup intervals (hourly, daily, weekly and monthly)
|
||||
|
||||
The required flags for running the service are as follows:
|
||||
|
||||
* -eula - should be true and means that you have the legal right to run a backup manager. That can either be a signed contract or an email with confirmation to run the service in a trial period
|
||||
* -storageDataPath - path to VictoriaMetrics or vmstorage data path to make backup from
|
||||
* -snapshot.createURL - VictoriaMetrics creates snapshot URL which will automatically be created during backup. Example: http://victoriametrics:8428/snaphsot/create
|
||||
* -dst - backup destination at s3, gcs or local filesystem
|
||||
* -credsFilePath - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set. See [https://cloud.google.com/iam/docs/creating-managing-service-account-keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) and [https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html](https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html)
|
||||
|
||||
|
||||
Backup schedule is controlled by the following flags:
|
||||
|
||||
* -disableHourly - disable hourly run. Default false
|
||||
* -disableDaily - disable daily run. Default false
|
||||
* -disableWeekly - disable weekly run. Default false
|
||||
* -disableMonthly - disable monthly run. Default false
|
||||
|
||||
By default, all flags are turned on and Backup Manager backups data every hour for every interval (hourly, daily, weekly and monthly).
|
||||
|
||||
|
||||
The backup manager creates the following directory hierarchy at **-dst**:
|
||||
|
||||
* /latest/ - contains the latest backup
|
||||
* /hourly/ - contains hourly backups. Each backup is named as *YYYY-MM-DD:HH*
|
||||
* /daily/ - contains daily backups. Each backup is named as *YYYY-MM-DD*
|
||||
* /weekly/ - contains weekly backups. Each backup is named as *YYYY-WW*
|
||||
* /monthly/ - contains monthly backups. Each backup is named as *YYYY-MM*
|
||||
|
||||
|
||||
To get the full list of supported flags please run the following command:
|
||||
|
||||
```console
|
||||
./vmbackupmanager --help
|
||||
```
|
||||
|
||||
The service creates a **full** backup each run. This means that the system can be restored fully from any particular backup using vmrestore. Backup manager uploads only the data that has been changed or created since the most recent backup (incremental backup).
|
||||
|
||||
*Please take into account that the first backup upload could take a significant amount of time as it needs to upload all of the data.*
|
||||
|
||||
There are two flags which could help with performance tuning:
|
||||
|
||||
* -maxBytesPerSecond - the maximum upload speed. There is no limit if it is set to 0
|
||||
* -concurrency - The number of concurrent workers. Higher concurrency may improve upload speed (default 10)
|
||||
|
||||
|
||||
### Example of Usage
|
||||
|
||||
GCS and cluster version. You need to have a credentials file in json format with following structure
|
||||
|
||||
credentials.json
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "<project>",
|
||||
"private_key_id": "",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\-----END PRIVATE KEY-----\n",
|
||||
"client_email": “test@<project>.iam.gserviceaccount.com",
|
||||
"client_id": "",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test%40<project>.iam.gserviceaccount.com"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Backup manager launched with the following configuration:
|
||||
|
||||
```console
|
||||
export NODE_IP=192.168.0.10
|
||||
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
|
||||
./vmbackupmanager -dst=gcs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create -eula
|
||||
```
|
||||
|
||||
Expected logs in vmbackupmanager:
|
||||
|
||||
```console
|
||||
info lib/backup/actions/backup.go:131 server-side copied 81 out of 81 parts from GCS{bucket: "vmstorage-data", dir: "192.168.0.10//latest/"} to GCS{bucket: "vmstorage-data", dir: "192.168.0.10//weekly/2020-34/"} in 2.549833008s
|
||||
info lib/backup/actions/backup.go:169 backed up 853315 bytes in 2.882 seconds; deleted 0 bytes; server-side copied 853315 bytes; uploaded 0 bytes
|
||||
```
|
||||
|
||||
Expected logs in vmstorage:
|
||||
|
||||
```console
|
||||
info VictoriaMetrics/lib/storage/table.go:146 creating table snapshot of "/vmstorage-data/data"...
|
||||
info VictoriaMetrics/lib/storage/storage.go:311 deleting snapshot "/vmstorage-data/snapshots/20200818201959-162C760149895DDA"...
|
||||
info VictoriaMetrics/lib/storage/storage.go:319 deleted snapshot "/vmstorage-data/snapshots/20200818201959-162C760149895DDA" in 0.169 seconds
|
||||
```
|
||||
|
||||
The result on the GCS bucket
|
||||
|
||||
- The root folder
|
||||
|
||||

|
||||
|
||||
- The latest folder
|
||||
|
||||

|
||||
|
||||
## Backup Retention Policy
|
||||
|
||||
Backup retention policy is controlled by:
|
||||
|
||||
* -keepLastHourly - keep the last N hourly backups. Disabled by default
|
||||
* -keepLastDaily - keep the last N daily backups. Disabled by default
|
||||
* -keepLastWeekly - keep the last N weekly backups. Disabled by default
|
||||
* -keepLastMonthly - keep the last N monthly backups. Disabled by default
|
||||
|
||||
*Note*: 0 value in every keepLast flag results into deletion ALL backups for particular type (hourly, daily, weekly and monthly)
|
||||
|
||||
Let’s assume we have a backup manager collecting daily backups for the past 10 days.
|
||||
|
||||

|
||||
|
||||
|
||||
We enable backup retention policy for backup manager by using following configuration:
|
||||
|
||||
```console
|
||||
export NODE_IP=192.168.0.10
|
||||
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
|
||||
./vmbackupmanager -dst=gcs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create
|
||||
-keepLastDaily=3 -eula
|
||||
```
|
||||
|
||||
Expected logs in backup manager on start:
|
||||
|
||||
```console
|
||||
info lib/logger/flag.go:20 flag "keepLastDaily" = "3"
|
||||
```
|
||||
|
||||
Expected logs in backup manager during retention cycle:
|
||||
|
||||
```console
|
||||
info app/vmbackupmanager/retention.go:106 daily backups to delete [daily/2021-02-13 daily/2021-02-12 daily/2021-02-11 daily/2021-02-10 daily/2021-02-09 daily/2021-02-08 daily/2021-02-07]
|
||||
```
|
||||
|
||||
The result on the GCS bucket. We see only 3 daily backups:
|
||||
|
||||

|
||||
BIN
app/vmbackupmanager/latest.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
app/vmbackupmanager/root.png
Normal file
|
After Width: | Height: | Size: 25 KiB |
BIN
app/vmbackupmanager/rp_daily_1.png
Normal file
|
After Width: | Height: | Size: 99 KiB |
BIN
app/vmbackupmanager/rp_daily_2.png
Normal file
|
After Width: | Height: | Size: 64 KiB |
79
app/vmctl/Makefile
Normal file
@@ -0,0 +1,79 @@
|
||||
# All these commands must run from repository root.
|
||||
|
||||
vmctl:
|
||||
APP_NAME=vmctl $(MAKE) app-local
|
||||
|
||||
vmctl-race:
|
||||
APP_NAME=vmctl RACE=-race $(MAKE) app-local
|
||||
|
||||
vmctl-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker
|
||||
|
||||
vmctl-pure-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-pure
|
||||
|
||||
vmctl-amd64-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-amd64
|
||||
|
||||
vmctl-arm-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-arm
|
||||
|
||||
vmctl-arm64-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-arm64
|
||||
|
||||
vmctl-ppc64le-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-ppc64le
|
||||
|
||||
vmctl-386-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-386
|
||||
|
||||
package-vmctl:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker
|
||||
|
||||
package-vmctl-pure:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-pure
|
||||
|
||||
package-vmctl-amd64:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-amd64
|
||||
|
||||
package-vmctl-arm:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-arm
|
||||
|
||||
package-vmctl-arm64:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-arm64
|
||||
|
||||
package-vmctl-ppc64le:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-ppc64le
|
||||
|
||||
package-vmctl-386:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-386
|
||||
|
||||
publish-vmctl:
|
||||
APP_NAME=vmctl $(MAKE) publish-via-docker
|
||||
|
||||
vmctl-amd64:
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-arm:
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-arm64:
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-ppc64le:
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-386:
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-local-with-goarch:
|
||||
APP_NAME=vmctl $(MAKE) app-local-with-goarch
|
||||
|
||||
vmctl-pure:
|
||||
APP_NAME=vmctl $(MAKE) app-local-pure
|
||||
|
||||
vmctl-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=vmctl $(MAKE) app-local-windows-with-goarch
|
||||
|
||||
vmctl-windows-amd64-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-windows-amd64
|
||||
448
app/vmctl/README.md
Normal file
@@ -0,0 +1,448 @@
|
||||
# vmctl
|
||||
|
||||
Victoria metrics command-line tool
|
||||
|
||||
Features:
|
||||
- [x] Prometheus: migrate data from Prometheus to VictoriaMetrics using snapshot API
|
||||
- [x] Thanos: migrate data from Thanos to VictoriaMetrics
|
||||
- [ ] ~~Prometheus: migrate data from Prometheus to VictoriaMetrics by query~~(discarded)
|
||||
- [x] InfluxDB: migrate data from InfluxDB to VictoriaMetrics
|
||||
- [ ] Storage Management: data re-balancing between nodes
|
||||
|
||||
## Articles
|
||||
|
||||
* [How to migrate data from Prometheus](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-d44a6728f043)
|
||||
* [How to migrate data from Prometheus. Filtering and modifying time series](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-filtering-and-modifying-time-series-6d40cea4bf21)
|
||||
|
||||
## How to build
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmctl` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmctl` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmctl` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmctl-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmctl-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmctl`. It builds `victoriametrics/vmctl:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmctl`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=scratch make package-vmctl
|
||||
```
|
||||
|
||||
### ARM build
|
||||
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
#### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.15.
|
||||
2. Run `make vmctl-arm` or `make vmctl-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmctl-arm` or `vmctl-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
#### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmctl-arm-prod` or `make vmctl-arm64-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmctl-arm-prod` or `vmctl-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
|
||||
## Migrating data from InfluxDB (1.x)
|
||||
|
||||
`vmctl` supports the `influx` mode to migrate data from InfluxDB to VictoriaMetrics time-series database.
|
||||
|
||||
See `./vmctl influx --help` for details and full list of flags.
|
||||
|
||||
To use migration tool please specify the InfluxDB address `--influx-addr`, the database `--influx-database` and VictoriaMetrics address `--vm-addr`.
|
||||
Flag `--vm-addr` for single-node VM is usually equal to `--httpListenAddr`, and for cluster version
|
||||
is equal to `--httpListenAddr` flag of VMInsert component. Please note, that vmctl performs initial readiness check for the given address
|
||||
by checking `/health` endpoint. For cluster version it is additionally required to specify the `--vm-account-id` flag.
|
||||
See more details for cluster version [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
|
||||
As soon as required flags are provided and all endpoints are accessible, `vmctl` will start the InfluxDB scheme exploration.
|
||||
Basically, it just fetches all fields and timeseries from the provided database and builds up registry of all available timeseries.
|
||||
Then `vmctl` sends fetch requests for each timeseries to InfluxDB one by one and pass results to VM importer.
|
||||
VM importer then accumulates received samples in batches and sends import requests to VM.
|
||||
|
||||
The importing process example for local installation of InfluxDB(`http://localhost:8086`)
|
||||
and single-node VictoriaMetrics(`http://localhost:8428`):
|
||||
```
|
||||
./vmctl influx --influx-database benchmark
|
||||
InfluxDB import mode
|
||||
2020/01/18 20:47:11 Exploring scheme for database "benchmark"
|
||||
2020/01/18 20:47:11 fetching fields: command: "show field keys"; database: "benchmark"; retention: "autogen"
|
||||
2020/01/18 20:47:11 found 10 fields
|
||||
2020/01/18 20:47:11 fetching series: command: "show series "; database: "benchmark"; retention: "autogen"
|
||||
Found 40000 timeseries to import. Continue? [Y/n] y
|
||||
40000 / 40000 [-----------------------------------------------------------------------------------------------------------------------------------------------] 100.00% 21 p/s
|
||||
2020/01/18 21:19:00 Import finished!
|
||||
2020/01/18 21:19:00 VictoriaMetrics importer stats:
|
||||
idle duration: 13m51.461434876s;
|
||||
time spent while importing: 17m56.923899847s;
|
||||
total samples: 345600000;
|
||||
samples/s: 320914.04;
|
||||
total bytes: 5.9 GB;
|
||||
bytes/s: 5.4 MB;
|
||||
import requests: 40001;
|
||||
2020/01/18 21:19:00 Total time: 31m48.467044016s
|
||||
```
|
||||
|
||||
### Data mapping
|
||||
|
||||
Vmctl maps Influx data the same way as VictoriaMetrics does by using the following rules:
|
||||
|
||||
* `influx-database` arg is mapped into `db` label value unless `db` tag exists in the Influx line.
|
||||
* Field names are mapped to time series names prefixed with {measurement}{separator} value,
|
||||
where {separator} equals to _ by default.
|
||||
It can be changed with `--influx-measurement-field-separator` command-line flag.
|
||||
* Field values are mapped to time series values.
|
||||
* Tags are mapped to Prometheus labels format as-is.
|
||||
|
||||
For example, the following Influx line:
|
||||
```
|
||||
foo,tag1=value1,tag2=value2 field1=12,field2=40
|
||||
```
|
||||
|
||||
is converted into the following Prometheus format data points:
|
||||
```
|
||||
foo_field1{tag1="value1", tag2="value2"} 12
|
||||
foo_field2{tag1="value1", tag2="value2"} 40
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
The configuration flags should contain self-explanatory descriptions.
|
||||
|
||||
### Filtering
|
||||
|
||||
The filtering consists of two parts: timeseries and time.
|
||||
The first step of application is to select all available timeseries
|
||||
for given database and retention. User may specify additional filtering
|
||||
condition via `--influx-filter-series` flag. For example:
|
||||
```
|
||||
./vmctl influx --influx-database benchmark \
|
||||
--influx-filter-series "on benchmark from cpu where hostname='host_1703'"
|
||||
InfluxDB import mode
|
||||
2020/01/26 14:23:29 Exploring scheme for database "benchmark"
|
||||
2020/01/26 14:23:29 fetching fields: command: "show field keys"; database: "benchmark"; retention: "autogen"
|
||||
2020/01/26 14:23:29 found 12 fields
|
||||
2020/01/26 14:23:29 fetching series: command: "show series on benchmark from cpu where hostname='host_1703'"; database: "benchmark"; retention: "autogen"
|
||||
Found 10 timeseries to import. Continue? [Y/n]
|
||||
```
|
||||
The timeseries select query would be following:
|
||||
`fetching series: command: "show series on benchmark from cpu where hostname='host_1703'"; database: "benchmark"; retention: "autogen"`
|
||||
|
||||
The second step of filtering is a time filter and it applies when fetching the datapoints from Influx.
|
||||
Time filtering may be configured with two flags:
|
||||
* --influx-filter-time-start
|
||||
* --influx-filter-time-end
|
||||
Here's an example of importing timeseries for one day only:
|
||||
`./vmctl influx --influx-database benchmark --influx-filter-series "where hostname='host_1703'" --influx-filter-time-start "2020-01-01T10:07:00Z" --influx-filter-time-end "2020-01-01T15:07:00Z"`
|
||||
|
||||
Please see more about time filtering [here](https://docs.influxdata.com/influxdb/v1.7/query_language/schema_exploration#filter-meta-queries-by-time).
|
||||
|
||||
## Migrating data from InfluxDB (2.x)
|
||||
|
||||
Migrating data from InfluxDB v2.x is not supported yet ([#32](https://github.com/VictoriaMetrics/vmctl/issues/32)).
|
||||
You may find useful a 3rd party solution for this - https://github.com/jonppe/influx_to_victoriametrics.
|
||||
|
||||
|
||||
## Migrating data from Prometheus
|
||||
|
||||
`vmctl` supports the `prometheus` mode for migrating data from Prometheus to VictoriaMetrics time-series database.
|
||||
Migration is based on reading Prometheus snapshot, which is basically a hard-link to Prometheus data files.
|
||||
|
||||
See `./vmctl prometheus --help` for details and full list of flags.
|
||||
|
||||
To use migration tool please specify the path to Prometheus snapshot `--prom-snapshot` and VictoriaMetrics address `--vm-addr`.
|
||||
More about Prometheus snapshots may be found [here](https://www.robustperception.io/taking-snapshots-of-prometheus-data).
|
||||
Flag `--vm-addr` for single-node VM is usually equal to `--httpListenAddr`, and for cluster version
|
||||
is equal to `--httpListenAddr` flag of VMInsert component. Please note, that vmctl performs initial readiness check for the given address
|
||||
by checking `/health` endpoint. For cluster version it is additionally required to specify the `--vm-account-id` flag.
|
||||
See more details for cluster version [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
|
||||
As soon as required flags are provided and all endpoints are accessible, `vmctl` will start the Prometheus snapshot exploration.
|
||||
Basically, it just fetches all available blocks in provided snapshot and read the metadata. It also does initial filtering by time
|
||||
if flags `--prom-filter-time-start` or `--prom-filter-time-end` were set. The exploration procedure prints some stats from read blocks.
|
||||
Please note that stats are not taking into account timeseries or samples filtering. This will be done during importing process.
|
||||
|
||||
The importing process takes the snapshot blocks revealed from Explore procedure and processes them one by one
|
||||
accumulating timeseries and samples. Please note, that `vmctl` relies on responses from Influx on this stage,
|
||||
so ensure that Explore queries are executed without errors or limits. Please see this
|
||||
[issue](https://github.com/VictoriaMetrics/vmctl/issues/30) for details.
|
||||
The data processed in chunks and then sent to VM.
|
||||
|
||||
The importing process example for local installation of Prometheus
|
||||
and single-node VictoriaMetrics(`http://localhost:8428`):
|
||||
```
|
||||
./vmctl prometheus --prom-snapshot=/path/to/snapshot \
|
||||
--vm-concurrency=1 \
|
||||
--vm-batch-size=200000 \
|
||||
--prom-concurrency=3
|
||||
Prometheus import mode
|
||||
Prometheus snapshot stats:
|
||||
blocks found: 14;
|
||||
blocks skipped: 0;
|
||||
min time: 1581288163058 (2020-02-09T22:42:43Z);
|
||||
max time: 1582409128139 (2020-02-22T22:05:28Z);
|
||||
samples: 32549106;
|
||||
series: 27289.
|
||||
Found 14 blocks to import. Continue? [Y/n] y
|
||||
14 / 14 [-------------------------------------------------------------------------------------------] 100.00% 0 p/s
|
||||
2020/02/23 15:50:03 Import finished!
|
||||
2020/02/23 15:50:03 VictoriaMetrics importer stats:
|
||||
idle duration: 6.152953029s;
|
||||
time spent while importing: 44.908522491s;
|
||||
total samples: 32549106;
|
||||
samples/s: 724786.84;
|
||||
total bytes: 669.1 MB;
|
||||
bytes/s: 14.9 MB;
|
||||
import requests: 323;
|
||||
import requests retries: 0;
|
||||
2020/02/23 15:50:03 Total time: 51.077451066s
|
||||
```
|
||||
|
||||
### Data mapping
|
||||
|
||||
VictoriaMetrics has very similar data model to Prometheus and supports [RemoteWrite integration](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage).
|
||||
So no data changes will be applied.
|
||||
|
||||
### Configuration
|
||||
|
||||
The configuration flags should contain self-explanatory descriptions.
|
||||
|
||||
### Filtering
|
||||
|
||||
The filtering consists of three parts: by timeseries and time.
|
||||
|
||||
Filtering by time may be configured via flags `--prom-filter-time-start` and `--prom-filter-time-end`
|
||||
in in RFC3339 format. This filter applied twice: to drop blocks out of range and to filter timeseries in blocks with
|
||||
overlapping time range.
|
||||
|
||||
Example of applying time filter:
|
||||
```
|
||||
./vmctl prometheus --prom-snapshot=/path/to/snapshot \
|
||||
--prom-filter-time-start=2020-02-07T00:07:01Z \
|
||||
--prom-filter-time-end=2020-02-11T00:07:01Z
|
||||
Prometheus import mode
|
||||
Prometheus snapshot stats:
|
||||
blocks found: 2;
|
||||
blocks skipped: 12;
|
||||
min time: 1581288163058 (2020-02-09T22:42:43Z);
|
||||
max time: 1581328800000 (2020-02-10T10:00:00Z);
|
||||
samples: 1657698;
|
||||
series: 3930.
|
||||
Found 2 blocks to import. Continue? [Y/n] y
|
||||
```
|
||||
|
||||
Please notice, that total amount of blocks in provided snapshot is 14, but only 2 of them were in provided
|
||||
time range. So other 12 blocks were marked as `skipped`. The amount of samples and series is not taken into account,
|
||||
since this is heavy operation and will be done during import process.
|
||||
|
||||
|
||||
Filtering by timeseries is configured with following flags:
|
||||
* `--prom-filter-label` - the label name, e.g. `__name__` or `instance`;
|
||||
* `--prom-filter-label-value` - the regular expression to filter the label value. By default matches all `.*`
|
||||
|
||||
For example:
|
||||
```
|
||||
./vmctl prometheus --prom-snapshot=/path/to/snapshot \
|
||||
--prom-filter-label="__name__" \
|
||||
--prom-filter-label-value="promhttp.*" \
|
||||
--prom-filter-time-start=2020-02-07T00:07:01Z \
|
||||
--prom-filter-time-end=2020-02-11T00:07:01Z
|
||||
Prometheus import mode
|
||||
Prometheus snapshot stats:
|
||||
blocks found: 2;
|
||||
blocks skipped: 12;
|
||||
min time: 1581288163058 (2020-02-09T22:42:43Z);
|
||||
max time: 1581328800000 (2020-02-10T10:00:00Z);
|
||||
samples: 1657698;
|
||||
series: 3930.
|
||||
Found 2 blocks to import. Continue? [Y/n] y
|
||||
14 / 14 [------------------------------------------------------------------------------------------------------------------------------------------------------] 100.00% ? p/s
|
||||
2020/02/23 15:51:07 Import finished!
|
||||
2020/02/23 15:51:07 VictoriaMetrics importer stats:
|
||||
idle duration: 0s;
|
||||
time spent while importing: 37.415461ms;
|
||||
total samples: 10128;
|
||||
samples/s: 270690.24;
|
||||
total bytes: 195.2 kB;
|
||||
bytes/s: 5.2 MB;
|
||||
import requests: 2;
|
||||
import requests retries: 0;
|
||||
2020/02/23 15:51:07 Total time: 7.153158218s
|
||||
```
|
||||
|
||||
## Migrating data from Thanos
|
||||
|
||||
Thanos uses the same storage engine as Prometheus and the data layout on-disk should be the same. That means
|
||||
`vmctl` in mode `prometheus` may be used for Thanos historical data migration as well.
|
||||
These instructions may vary based on the details of your Thanos configuration.
|
||||
Please read carefully and verify as you go. We assume you're using Thanos Sidecar on your Prometheus pods,
|
||||
and that you have a separate Thanos Store installation.
|
||||
|
||||
### Current data
|
||||
|
||||
1. For now, keep your Thanos Sidecar and Thanos-related Prometheus configuration, but add this to also stream
|
||||
metrics to VictoriaMetrics:
|
||||
```
|
||||
remote_write:
|
||||
- url: http://victoria-metrics:8428/api/v1/write
|
||||
```
|
||||
2. Make sure VM is running, of course. Now check the logs to make sure that Prometheus is sending and VM is receiving.
|
||||
In Prometheus, make sure there are no errors. On the VM side, you should see messages like this:
|
||||
```
|
||||
2020-04-27T18:38:46.474Z info VictoriaMetrics/lib/storage/partition.go:207 creating a partition "2020_04" with smallPartsPath="/victoria-metrics-data/data/small/2020_04", bigPartsPath="/victoria-metrics-data/data/big/2020_04"
|
||||
2020-04-27T18:38:46.506Z info VictoriaMetrics/lib/storage/partition.go:222 partition "2020_04" has been created
|
||||
```
|
||||
3. Now just wait. Within two hours, Prometheus should finish its current data file and hand it off to Thanos Store for long term
|
||||
storage.
|
||||
|
||||
### Historical data
|
||||
|
||||
Let's assume your data is stored on S3 served by minio. You first need to copy that out to a local filesystem,
|
||||
then import it into VM using `vmctl` in `prometheus` mode.
|
||||
1. Copy data from minio.
|
||||
1. Run the `minio/mc` Docker container.
|
||||
1. `mc config host add minio http://minio:9000 accessKey secretKey`, substituting appropriate values for the last 3 items.
|
||||
1. `mc cp -r minio/prometheus thanos-data`
|
||||
1. Import using `vmctl`.
|
||||
1. Follow the [instructions](#how-to-build) to compile `vmctl` on your machine.
|
||||
1. Use [prometheus](#migrating-data-from-prometheus) mode to import data:
|
||||
```
|
||||
vmctl prometheus --prom-snapshot thanos-data --vm-addr http://victoria-metrics:8428
|
||||
```
|
||||
|
||||
## Migrating data from VictoriaMetrics
|
||||
|
||||
### Native protocol
|
||||
|
||||
The [native binary protocol](https://victoriametrics.github.io/#how-to-export-data-in-native-format)
|
||||
was introduced in [1.42.0 release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.42.0)
|
||||
and provides the most efficient way to migrate data between VM instances: single to single, cluster to cluster,
|
||||
single to cluster and vice versa. Please note that both instances (source and destination) should be of v1.42.0
|
||||
or higher.
|
||||
|
||||
See `./vmctl vm-native --help` for details and full list of flags.
|
||||
|
||||
In this mode `vmctl` acts as a proxy between two VM instances, where time series filtering is done by "source" (`src`)
|
||||
and processing is done by "destination" (`dst`). Because of that, `vmctl` doesn't actually know how much data will be
|
||||
processed and can't show the progress bar. It will show the current processing speed and total number of processed bytes:
|
||||
|
||||
```
|
||||
./vmctl vm-native --vm-native-src-addr=http://localhost:8528 \
|
||||
--vm-native-dst-addr=http://localhost:8428 \
|
||||
--vm-native-filter-match='{job="vmagent"}' \
|
||||
--vm-native-filter-time-start='2020-01-01T20:07:00Z'
|
||||
VictoriaMetrics Native import mode
|
||||
Initing export pipe from "http://localhost:8528" with filters:
|
||||
filter: match[]={job="vmagent"}
|
||||
Initing import process to "http://localhost:8428":
|
||||
Total: 336.75 KiB ↖ Speed: 454.46 KiB p/s
|
||||
2020/10/13 17:04:59 Total time: 952.143376ms
|
||||
```
|
||||
|
||||
Importing tips:
|
||||
1. Migrating all the metrics from one VM to another may collide with existing application metrics
|
||||
(prefixed with `vm_`) at destination and lead to confusion when using
|
||||
[official Grafana dashboards](https://grafana.com/orgs/victoriametrics/dashboards).
|
||||
To avoid such situation try to filter out VM process metrics via `--vm-native-filter-match` flag.
|
||||
2. Migration is a backfilling process, so it is recommended to read
|
||||
[Backfilling tips](https://github.com/VictoriaMetrics/VictoriaMetrics#backfilling) section.
|
||||
3. `vmctl` doesn't provide relabeling or other types of labels management in this mode.
|
||||
Instead, use [relabeling in VictoriaMetrics](https://github.com/VictoriaMetrics/vmctl/issues/4#issuecomment-683424375).
|
||||
|
||||
|
||||
## Tuning
|
||||
|
||||
### Influx mode
|
||||
|
||||
The flag `--influx-concurrency` controls how many concurrent requests may be sent to InfluxDB while fetching
|
||||
timeseries. Please set it wisely to avoid InfluxDB overwhelming.
|
||||
|
||||
The flag `--influx-chunk-size` controls the max amount of datapoints to return in single chunk from fetch requests.
|
||||
Please see more details [here](https://docs.influxdata.com/influxdb/v1.7/guides/querying_data/#chunking).
|
||||
The chunk size is used to control InfluxDB memory usage, so it won't OOM on processing large timeseries with
|
||||
billions of datapoints.
|
||||
|
||||
### Prometheus mode
|
||||
|
||||
The flag `--prom-concurrency` controls how many concurrent readers will be reading the blocks in snapshot.
|
||||
Since snapshots are just files on disk it would be hard to overwhelm the system. Please go with value equal
|
||||
to number of free CPU cores.
|
||||
|
||||
### VictoriaMetrics importer
|
||||
|
||||
The flag `--vm-concurrency` controls the number of concurrent workers that process the input from InfluxDB query results.
|
||||
Please note that each import request can load up to a single vCPU core on VictoriaMetrics. So try to set it according
|
||||
to allocated CPU resources of your VictoriMetrics installation.
|
||||
|
||||
The flag `--vm-batch-size` controls max amount of samples collected before sending the import request.
|
||||
For example, if `--influx-chunk-size=500` and `--vm-batch-size=2000` then importer will process not more
|
||||
than 4 chunks before sending the request.
|
||||
|
||||
### Importer stats
|
||||
|
||||
After successful import `vmctl` prints some statistics for details.
|
||||
The important numbers to watch are following:
|
||||
- `idle duration` - shows time that importer spent while waiting for data from InfluxDB/Prometheus
|
||||
to fill up `--vm-batch-size` batch size. Value shows total duration across all workers configured
|
||||
via `--vm-concurrency`. High value may be a sign of too slow InfluxDB/Prometheus fetches or too
|
||||
high `--vm-concurrency` value. Try to improve it by increasing `--<mode>-concurrency` value or
|
||||
decreasing `--vm-concurrency` value.
|
||||
- `import requests` - shows how many import requests were issued to VM server.
|
||||
The import request is issued once the batch size(`--vm-batch-size`) is full and ready to be sent.
|
||||
Please prefer big batch sizes (50k-500k) to improve performance.
|
||||
- `import requests retries` - shows number of unsuccessful import requests. Non-zero value may be
|
||||
a sign of network issues or VM being overloaded. See the logs during import for error messages.
|
||||
|
||||
### Silent mode
|
||||
|
||||
By default `vmctl` waits confirmation from user before starting the import. If this is unwanted
|
||||
behavior and no user interaction required - pass `-s` flag to enable "silence" mode:
|
||||
```
|
||||
-s Whether to run in silent mode. If set to true no confirmation prompts will appear. (default: false)
|
||||
```
|
||||
|
||||
### Significant figures
|
||||
|
||||
`vmctl` allows to limit the number of [significant figures](https://en.wikipedia.org/wiki/Significant_figures)
|
||||
before importing. For example, the average value for response size is `102.342305` bytes and it has 9 significant figures.
|
||||
If you ask a human to pronounce this value then with high probability value will be rounded to first 4 or 5 figures
|
||||
because the rest aren't really that important to mention. In most cases, such a high precision is too much.
|
||||
Moreover, such values may be just a result of [floating point arithmetic](https://en.wikipedia.org/wiki/Floating-point_arithmetic),
|
||||
create a [false precision](https://en.wikipedia.org/wiki/False_precision) and result into bad compression ratio
|
||||
according to [information theory](https://en.wikipedia.org/wiki/Information_theory).
|
||||
|
||||
`vmctl` provides the following flags for improving data compression:
|
||||
|
||||
* `--vm-round-digits` flag for rounding processed values to the given number of decimal digits after the point.
|
||||
For example, `--vm-round-digits=2` would round `1.2345` to `1.23`. By default the rounding is disabled.
|
||||
|
||||
* `--vm-significant-figures` flag for limiting the number of significant figures in processed values. It takes no effect if set
|
||||
to 0 (by default), but set `--vm-significant-figures=5` and `102.342305` will be rounded to `102.34`.
|
||||
|
||||
The most common case for using these flags is to improve data compression for time series storing aggregation
|
||||
results such as `average`, `rate`, etc.
|
||||
|
||||
### Adding extra labels
|
||||
|
||||
`vmctl` allows to add extra labels to all imported series. It can be achived with flag `--vm-extra-label label=value`.
|
||||
If multiple labels needs to be added, set flag for each label, for example, `--vm-extra-label label1=value1 --vm-extra-label label2=value2`.
|
||||
If timeseries already have label, that must be added with `--vm-extra-label` flag, flag has priority and will override label value from timeseries.
|
||||
|
||||
6
app/vmctl/deployment/Dockerfile
Normal file
@@ -0,0 +1,6 @@
|
||||
ARG base_image
|
||||
FROM $base_image
|
||||
|
||||
ENTRYPOINT ["/vmctl-prod"]
|
||||
ARG src_binary
|
||||
COPY $src_binary ./vmctl-prod
|
||||
292
app/vmctl/flags.go
Normal file
@@ -0,0 +1,292 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
globalSilent = "s"
|
||||
)
|
||||
|
||||
var (
|
||||
globalFlags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: globalSilent,
|
||||
Value: false,
|
||||
Usage: "Whether to run in silent mode. If set to true no confirmation prompts will appear.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
vmAddr = "vm-addr"
|
||||
vmUser = "vm-user"
|
||||
vmPassword = "vm-password"
|
||||
vmAccountID = "vm-account-id"
|
||||
vmConcurrency = "vm-concurrency"
|
||||
vmCompress = "vm-compress"
|
||||
vmBatchSize = "vm-batch-size"
|
||||
vmSignificantFigures = "vm-significant-figures"
|
||||
vmRoundDigits = "vm-round-digits"
|
||||
vmExtraLabel = "vm-extra-label"
|
||||
)
|
||||
|
||||
var (
|
||||
vmFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: vmAddr,
|
||||
Value: "http://localhost:8428",
|
||||
Usage: "VictoriaMetrics address to perform import requests. \n" +
|
||||
"Should be the same as --httpListenAddr value for single-node version or VMInsert component. \n" +
|
||||
"Please note, that `vmctl` performs initial readiness check for the given address by checking `/health` endpoint.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmUser,
|
||||
Usage: "VictoriaMetrics username for basic auth",
|
||||
EnvVars: []string{"VM_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmPassword,
|
||||
Usage: "VictoriaMetrics password for basic auth",
|
||||
EnvVars: []string{"VM_PASSWORD"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmAccountID,
|
||||
Usage: "AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant). \n" +
|
||||
"It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer. \n" +
|
||||
"If projectID isn't set, then it equals to 0",
|
||||
},
|
||||
&cli.UintFlag{
|
||||
Name: vmConcurrency,
|
||||
Usage: "Number of workers concurrently performing import requests to VM",
|
||||
Value: 2,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmCompress,
|
||||
Value: true,
|
||||
Usage: "Whether to apply gzip compression to import requests",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: vmBatchSize,
|
||||
Value: 200e3,
|
||||
Usage: "How many samples importer collects before sending the import request to VM",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: vmSignificantFigures,
|
||||
Value: 0,
|
||||
Usage: "The number of significant figures to leave in metric values before importing. " +
|
||||
"See https://en.wikipedia.org/wiki/Significant_figures. Zero value saves all the significant figures. " +
|
||||
"This option may be used for increasing on-disk compression level for the stored metrics. " +
|
||||
"See also --vm-round-digits option",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: vmRoundDigits,
|
||||
Value: 100,
|
||||
Usage: "Round metric values to the given number of decimal digits after the point. " +
|
||||
"This option may be used for increasing on-disk compression level for the stored metrics",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
influxAddr = "influx-addr"
|
||||
influxUser = "influx-user"
|
||||
influxPassword = "influx-password"
|
||||
influxDB = "influx-database"
|
||||
influxRetention = "influx-retention-policy"
|
||||
influxChunkSize = "influx-chunk-size"
|
||||
influxConcurrency = "influx-concurrency"
|
||||
influxFilterSeries = "influx-filter-series"
|
||||
influxFilterTimeStart = "influx-filter-time-start"
|
||||
influxFilterTimeEnd = "influx-filter-time-end"
|
||||
influxMeasurementFieldSeparator = "influx-measurement-field-separator"
|
||||
)
|
||||
|
||||
var (
|
||||
influxFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: influxAddr,
|
||||
Value: "http://localhost:8086",
|
||||
Usage: "Influx server addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxUser,
|
||||
Usage: "Influx user",
|
||||
EnvVars: []string{"INFLUX_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxPassword,
|
||||
Usage: "Influx user password",
|
||||
EnvVars: []string{"INFLUX_PASSWORD"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxDB,
|
||||
Usage: "Influx database",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxRetention,
|
||||
Usage: "Influx retention policy",
|
||||
Value: "autogen",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: influxChunkSize,
|
||||
Usage: "The chunkSize defines max amount of series to be returned in one chunk",
|
||||
Value: 10e3,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: influxConcurrency,
|
||||
Usage: "Number of concurrently running fetch queries to InfluxDB",
|
||||
Value: 1,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxFilterSeries,
|
||||
Usage: "Influx filter expression to select series. E.g. \"from cpu where arch='x86' AND hostname='host_2753'\".\n" +
|
||||
"See for details https://docs.influxdata.com/influxdb/v1.7/query_language/schema_exploration#show-series",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxFilterTimeStart,
|
||||
Usage: "The time filter to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxFilterTimeEnd,
|
||||
Usage: "The time filter to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxMeasurementFieldSeparator,
|
||||
Usage: "The {separator} symbol used to concatenate {measurement} and {field} names into series name {measurement}{separator}{field}.",
|
||||
Value: "_",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
promSnapshot = "prom-snapshot"
|
||||
promConcurrency = "prom-concurrency"
|
||||
promFilterTimeStart = "prom-filter-time-start"
|
||||
promFilterTimeEnd = "prom-filter-time-end"
|
||||
promFilterLabel = "prom-filter-label"
|
||||
promFilterLabelValue = "prom-filter-label-value"
|
||||
)
|
||||
|
||||
var (
|
||||
promFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: promSnapshot,
|
||||
Usage: "Path to Prometheus snapshot. Pls see for details https://www.robustperception.io/taking-snapshots-of-prometheus-data",
|
||||
Required: true,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: promConcurrency,
|
||||
Usage: "Number of concurrently running snapshot readers",
|
||||
Value: 1,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterTimeStart,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterTimeEnd,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterLabel,
|
||||
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", promFilterLabel),
|
||||
Value: ".*",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
vmNativeFilterMatch = "vm-native-filter-match"
|
||||
vmNativeFilterTimeStart = "vm-native-filter-time-start"
|
||||
vmNativeFilterTimeEnd = "vm-native-filter-time-end"
|
||||
|
||||
vmNativeSrcAddr = "vm-native-src-addr"
|
||||
vmNativeSrcUser = "vm-native-src-user"
|
||||
vmNativeSrcPassword = "vm-native-src-password"
|
||||
|
||||
vmNativeDstAddr = "vm-native-dst-addr"
|
||||
vmNativeDstUser = "vm-native-dst-user"
|
||||
vmNativeDstPassword = "vm-native-dst-password"
|
||||
)
|
||||
|
||||
var (
|
||||
vmNativeFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeFilterMatch,
|
||||
Usage: "Time series selector to match series for export. For example, select {instance!=\"localhost\"} will " +
|
||||
"match all series with \"instance\" label different to \"localhost\".\n" +
|
||||
" See more details here https://github.com/VictoriaMetrics/VictoriaMetrics#how-to-export-data-in-native-format",
|
||||
Value: `{__name__!=""}`,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeFilterTimeStart,
|
||||
Usage: "The time filter may contain either unix timestamp in seconds or RFC3339 values. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeFilterTimeEnd,
|
||||
Usage: "The time filter may contain either unix timestamp in seconds or RFC3339 values. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcAddr,
|
||||
Usage: "VictoriaMetrics address to perform export from. \n" +
|
||||
" Should be the same as --httpListenAddr value for single-node version or VMSelect component." +
|
||||
" If exporting from cluster version - include the tenet token in address.",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcUser,
|
||||
Usage: "VictoriaMetrics username for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_SRC_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcPassword,
|
||||
Usage: "VictoriaMetrics password for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_SRC_PASSWORD"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstAddr,
|
||||
Usage: "VictoriaMetrics address to perform import to. \n" +
|
||||
" Should be the same as --httpListenAddr value for single-node version or VMInsert component." +
|
||||
" If importing into cluster version - include the tenet token in address.",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstUser,
|
||||
Usage: "VictoriaMetrics username for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_DST_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstPassword,
|
||||
Usage: "VictoriaMetrics password for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_DST_PASSWORD"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func mergeFlags(flags ...[]cli.Flag) []cli.Flag {
|
||||
var result []cli.Flag
|
||||
for _, f := range flags {
|
||||
result = append(result, f...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
146
app/vmctl/influx.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
)
|
||||
|
||||
type influxProcessor struct {
|
||||
ic *influx.Client
|
||||
im *vm.Importer
|
||||
cc int
|
||||
separator string
|
||||
}
|
||||
|
||||
func newInfluxProcessor(ic *influx.Client, im *vm.Importer, cc int, separator string) *influxProcessor {
|
||||
if cc < 1 {
|
||||
cc = 1
|
||||
}
|
||||
return &influxProcessor{
|
||||
ic: ic,
|
||||
im: im,
|
||||
cc: cc,
|
||||
separator: separator,
|
||||
}
|
||||
}
|
||||
|
||||
func (ip *influxProcessor) run(silent bool) error {
|
||||
series, err := ip.ic.Explore()
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore query failed: %s", err)
|
||||
}
|
||||
if len(series) < 1 {
|
||||
return fmt.Errorf("found no timeseries to import")
|
||||
}
|
||||
|
||||
question := fmt.Sprintf("Found %d timeseries to import. Continue?", len(series))
|
||||
if !silent && !prompt(question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
bar := pb.StartNew(len(series))
|
||||
seriesCh := make(chan *influx.Series)
|
||||
errCh := make(chan error)
|
||||
ip.im.ResetStats()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(ip.cc)
|
||||
for i := 0; i < ip.cc; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for s := range seriesCh {
|
||||
if err := ip.do(s); err != nil {
|
||||
errCh <- fmt.Errorf("request failed for %q.%q: %s", s.Measurement, s.Field, err)
|
||||
return
|
||||
}
|
||||
bar.Increment()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
for _, s := range series {
|
||||
select {
|
||||
case infErr := <-errCh:
|
||||
return fmt.Errorf("influx error: %s", infErr)
|
||||
case vmErr := <-ip.im.Errors():
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
case seriesCh <- s:
|
||||
}
|
||||
}
|
||||
|
||||
close(seriesCh)
|
||||
wg.Wait()
|
||||
ip.im.Close()
|
||||
// drain import errors channel
|
||||
for vmErr := range ip.im.Errors() {
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
}
|
||||
bar.Finish()
|
||||
log.Println("Import finished!")
|
||||
log.Print(ip.im.Stats())
|
||||
return nil
|
||||
}
|
||||
|
||||
const dbLabel = "db"
|
||||
|
||||
func (ip *influxProcessor) do(s *influx.Series) error {
|
||||
cr, err := ip.ic.FetchDataPoints(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch datapoints: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = cr.Close()
|
||||
}()
|
||||
var name string
|
||||
if s.Measurement != "" {
|
||||
name = fmt.Sprintf("%s%s%s", s.Measurement, ip.separator, s.Field)
|
||||
} else {
|
||||
name = s.Field
|
||||
}
|
||||
|
||||
labels := make([]vm.LabelPair, len(s.LabelPairs))
|
||||
var containsDBLabel bool
|
||||
for i, lp := range s.LabelPairs {
|
||||
if lp.Name == dbLabel {
|
||||
containsDBLabel = true
|
||||
break
|
||||
}
|
||||
labels[i] = vm.LabelPair{
|
||||
Name: lp.Name,
|
||||
Value: lp.Value,
|
||||
}
|
||||
}
|
||||
if !containsDBLabel {
|
||||
labels = append(labels, vm.LabelPair{
|
||||
Name: dbLabel,
|
||||
Value: ip.ic.Database(),
|
||||
})
|
||||
}
|
||||
|
||||
for {
|
||||
time, values, err := cr.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
// skip empty results
|
||||
if len(time) < 1 {
|
||||
continue
|
||||
}
|
||||
ip.im.Input() <- &vm.TimeSeries{
|
||||
Name: name,
|
||||
LabelPairs: labels,
|
||||
Timestamps: time,
|
||||
Values: values,
|
||||
}
|
||||
}
|
||||
}
|
||||
362
app/vmctl/influx/influx.go
Normal file
@@ -0,0 +1,362 @@
|
||||
package influx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
influx "github.com/influxdata/influxdb/client/v2"
|
||||
)
|
||||
|
||||
// Client represents a wrapper over
|
||||
// influx HTTP client
|
||||
type Client struct {
|
||||
influx.Client
|
||||
|
||||
database string
|
||||
retention string
|
||||
chunkSize int
|
||||
|
||||
filterSeries string
|
||||
filterTime string
|
||||
}
|
||||
|
||||
// Config contains fields required
|
||||
// for Client configuration
|
||||
type Config struct {
|
||||
Addr string
|
||||
Username string
|
||||
Password string
|
||||
Database string
|
||||
Retention string
|
||||
ChunkSize int
|
||||
|
||||
Filter Filter
|
||||
}
|
||||
|
||||
// Filter contains configuration for filtering
|
||||
// the timeseries
|
||||
type Filter struct {
|
||||
Series string
|
||||
TimeStart string
|
||||
TimeEnd string
|
||||
}
|
||||
|
||||
// Series holds the time series
|
||||
type Series struct {
|
||||
Measurement string
|
||||
Field string
|
||||
LabelPairs []LabelPair
|
||||
}
|
||||
|
||||
var valueEscaper = strings.NewReplacer(`\`, `\\`, `'`, `\'`)
|
||||
|
||||
func (s Series) fetchQuery(timeFilter string) string {
|
||||
f := &strings.Builder{}
|
||||
fmt.Fprintf(f, "select %q from %q", s.Field, s.Measurement)
|
||||
if len(s.LabelPairs) > 0 || len(timeFilter) > 0 {
|
||||
f.WriteString(" where")
|
||||
}
|
||||
for i, pair := range s.LabelPairs {
|
||||
pairV := valueEscaper.Replace(pair.Value)
|
||||
fmt.Fprintf(f, " %q='%s'", pair.Name, pairV)
|
||||
if i != len(s.LabelPairs)-1 {
|
||||
f.WriteString(" and")
|
||||
}
|
||||
}
|
||||
if len(timeFilter) > 0 {
|
||||
if len(s.LabelPairs) > 0 {
|
||||
f.WriteString(" and")
|
||||
}
|
||||
fmt.Fprintf(f, " %s", timeFilter)
|
||||
}
|
||||
return f.String()
|
||||
}
|
||||
|
||||
// LabelPair is the key-value record
|
||||
// of time series label
|
||||
type LabelPair struct {
|
||||
Name string
|
||||
Value string
|
||||
}
|
||||
|
||||
// NewClient creates and returns influx client
|
||||
// configured with passed Config
|
||||
func NewClient(cfg Config) (*Client, error) {
|
||||
c := influx.HTTPConfig{
|
||||
Addr: cfg.Addr,
|
||||
Username: cfg.Username,
|
||||
Password: cfg.Password,
|
||||
InsecureSkipVerify: true,
|
||||
}
|
||||
hc, err := influx.NewHTTPClient(c)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to establish conn: %s", err)
|
||||
}
|
||||
if _, _, err := hc.Ping(time.Second); err != nil {
|
||||
return nil, fmt.Errorf("ping failed: %s", err)
|
||||
}
|
||||
|
||||
chunkSize := cfg.ChunkSize
|
||||
if chunkSize < 1 {
|
||||
chunkSize = 10e3
|
||||
}
|
||||
|
||||
client := &Client{
|
||||
Client: hc,
|
||||
database: cfg.Database,
|
||||
retention: cfg.Retention,
|
||||
chunkSize: chunkSize,
|
||||
filterTime: timeFilter(cfg.Filter.TimeStart, cfg.Filter.TimeEnd),
|
||||
filterSeries: cfg.Filter.Series,
|
||||
}
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// Database returns database name
|
||||
func (c Client) Database() string {
|
||||
return c.database
|
||||
}
|
||||
|
||||
func timeFilter(start, end string) string {
|
||||
if start == "" && end == "" {
|
||||
return ""
|
||||
}
|
||||
var tf string
|
||||
if start != "" {
|
||||
tf = fmt.Sprintf("time >= '%s'", start)
|
||||
}
|
||||
if end != "" {
|
||||
if tf != "" {
|
||||
tf += " and "
|
||||
}
|
||||
tf += fmt.Sprintf("time <= '%s'", end)
|
||||
}
|
||||
return tf
|
||||
}
|
||||
|
||||
// Explore checks the existing data schema in influx
|
||||
// by checking available fields and series,
|
||||
// which unique combination represents all possible
|
||||
// time series existing in database.
|
||||
// The explore required to reduce the load on influx
|
||||
// by querying field of the exact time series at once,
|
||||
// instead of fetching all of the values over and over.
|
||||
//
|
||||
// May contain non-existing time series.
|
||||
func (c *Client) Explore() ([]*Series, error) {
|
||||
log.Printf("Exploring scheme for database %q", c.database)
|
||||
mFields, err := c.fieldsByMeasurement()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get field keys: %s", err)
|
||||
}
|
||||
|
||||
series, err := c.getSeries()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get series: %s", err)
|
||||
}
|
||||
|
||||
var iSeries []*Series
|
||||
for _, s := range series {
|
||||
fields, ok := mFields[s.Measurement]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("can't find field keys for measurement %q", s.Measurement)
|
||||
}
|
||||
for _, field := range fields {
|
||||
is := &Series{
|
||||
Measurement: s.Measurement,
|
||||
Field: field,
|
||||
LabelPairs: s.LabelPairs,
|
||||
}
|
||||
iSeries = append(iSeries, is)
|
||||
}
|
||||
}
|
||||
return iSeries, nil
|
||||
}
|
||||
|
||||
// ChunkedResponse is a wrapper over influx.ChunkedResponse.
|
||||
// Used for better memory usage control while iterating
|
||||
// over huge time series.
|
||||
type ChunkedResponse struct {
|
||||
cr *influx.ChunkedResponse
|
||||
iq influx.Query
|
||||
field string
|
||||
}
|
||||
|
||||
// Close closes cr.
|
||||
func (cr *ChunkedResponse) Close() error {
|
||||
return cr.cr.Close()
|
||||
}
|
||||
|
||||
// Next reads the next part/chunk of time series.
|
||||
// Returns io.EOF when time series was read entirely.
|
||||
func (cr *ChunkedResponse) Next() ([]int64, []float64, error) {
|
||||
resp, err := cr.cr.NextResponse()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if resp.Error() != nil {
|
||||
return nil, nil, fmt.Errorf("response error for %s: %s", cr.iq.Command, resp.Error())
|
||||
}
|
||||
if len(resp.Results) != 1 {
|
||||
return nil, nil, fmt.Errorf("unexpected number of results in response: %d", len(resp.Results))
|
||||
}
|
||||
results, err := parseResult(resp.Results[0])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if len(results) < 1 {
|
||||
return nil, nil, nil
|
||||
}
|
||||
r := results[0]
|
||||
|
||||
const key = "time"
|
||||
timestamps, ok := r.values[key]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("response doesn't contain field %q", key)
|
||||
}
|
||||
|
||||
fieldValues, ok := r.values[cr.field]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("response doesn't contain filed %q", cr.field)
|
||||
}
|
||||
values := make([]float64, len(fieldValues))
|
||||
for i, fv := range fieldValues {
|
||||
v, err := toFloat64(fv)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to convert value %q.%v to float64: %s",
|
||||
cr.field, v, err)
|
||||
}
|
||||
values[i] = v
|
||||
}
|
||||
|
||||
ts := make([]int64, len(results[0].values[key]))
|
||||
for i, v := range timestamps {
|
||||
t, err := parseDate(v.(string))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
ts[i] = t
|
||||
}
|
||||
return ts, values, nil
|
||||
}
|
||||
|
||||
// FetchDataPoints performs SELECT request to fetch
|
||||
// datapoints for particular field.
|
||||
func (c *Client) FetchDataPoints(s *Series) (*ChunkedResponse, error) {
|
||||
iq := influx.Query{
|
||||
Command: s.fetchQuery(c.filterTime),
|
||||
Database: c.database,
|
||||
RetentionPolicy: c.retention,
|
||||
Chunked: true,
|
||||
ChunkSize: 1e4,
|
||||
}
|
||||
cr, err := c.QueryAsChunk(iq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query %q err: %s", iq.Command, err)
|
||||
}
|
||||
return &ChunkedResponse{cr, iq, s.Field}, nil
|
||||
}
|
||||
|
||||
func (c *Client) fieldsByMeasurement() (map[string][]string, error) {
|
||||
q := influx.Query{
|
||||
Command: "show field keys",
|
||||
Database: c.database,
|
||||
RetentionPolicy: c.retention,
|
||||
}
|
||||
log.Printf("fetching fields: %s", stringify(q))
|
||||
qValues, err := c.do(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while executing query %q: %s", q.Command, err)
|
||||
}
|
||||
|
||||
var total int
|
||||
var skipped int
|
||||
const fKey = "fieldKey"
|
||||
const fType = "fieldType"
|
||||
result := make(map[string][]string, len(qValues))
|
||||
for _, qv := range qValues {
|
||||
types := qv.values[fType]
|
||||
fields := qv.values[fKey]
|
||||
values := make([]string, 0)
|
||||
for key, field := range fields {
|
||||
if types[key].(string) == "string" {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
values = append(values, field.(string))
|
||||
total++
|
||||
}
|
||||
result[qv.name] = values
|
||||
}
|
||||
|
||||
if skipped > 0 {
|
||||
log.Printf("found %d fields; skipped %d non-numeric fields", total, skipped)
|
||||
} else {
|
||||
log.Printf("found %d fields", total)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *Client) getSeries() ([]*Series, error) {
|
||||
com := "show series"
|
||||
if c.filterSeries != "" {
|
||||
com = fmt.Sprintf("%s %s", com, c.filterSeries)
|
||||
}
|
||||
q := influx.Query{
|
||||
Command: com,
|
||||
Database: c.database,
|
||||
RetentionPolicy: c.retention,
|
||||
Chunked: true,
|
||||
ChunkSize: c.chunkSize,
|
||||
}
|
||||
|
||||
log.Printf("fetching series: %s", stringify(q))
|
||||
cr, err := c.QueryAsChunk(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while executing query %q: %s", q.Command, err)
|
||||
}
|
||||
|
||||
const key = "key"
|
||||
var result []*Series
|
||||
for {
|
||||
resp, err := cr.NextResponse()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if resp.Error() != nil {
|
||||
return nil, fmt.Errorf("response error for query %q: %s", q.Command, resp.Error())
|
||||
}
|
||||
qValues, err := parseResult(resp.Results[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, qv := range qValues {
|
||||
for _, v := range qv.values[key] {
|
||||
s := &Series{}
|
||||
if err := s.unmarshal(v.(string)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result = append(result, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Printf("found %d series", len(result))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *Client) do(q influx.Query) ([]queryValues, error) {
|
||||
res, err := c.Query(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query %q err: %s", q.Command, err)
|
||||
}
|
||||
if len(res.Results) < 1 {
|
||||
return nil, fmt.Errorf("exploration query %q returned 0 results", q.Command)
|
||||
}
|
||||
return parseResult(res.Results[0])
|
||||
}
|
||||
127
app/vmctl/influx/influx_test.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package influx
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestFetchQuery(t *testing.T) {
|
||||
testCases := []struct {
|
||||
s Series
|
||||
timeFilter string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: `select "value" from "cpu" where "foo"='bar'`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
{
|
||||
Name: "baz",
|
||||
Value: "qux",
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: `select "value" from "cpu" where "foo"='bar' and "baz"='qux'`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "b'ar",
|
||||
},
|
||||
},
|
||||
},
|
||||
timeFilter: "time >= now()",
|
||||
expected: `select "value" from "cpu" where "foo"='b\'ar' and time >= now()`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "name",
|
||||
Value: `dev-mapper-centos\x2dswap.swap`,
|
||||
},
|
||||
{
|
||||
Name: "state",
|
||||
Value: "dev-mapp'er-c'en'tos",
|
||||
},
|
||||
},
|
||||
},
|
||||
timeFilter: "time >= now()",
|
||||
expected: `select "value" from "cpu" where "name"='dev-mapper-centos\\x2dswap.swap' and "state"='dev-mapp\'er-c\'en\'tos' and time >= now()`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
},
|
||||
timeFilter: "time >= now()",
|
||||
expected: `select "value" from "cpu" where time >= now()`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
},
|
||||
expected: `select "value" from "cpu"`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
query := tc.s.fetchQuery(tc.timeFilter)
|
||||
if query != tc.expected {
|
||||
t.Fatalf("got: \n%s;\nexpected: \n%s", query, tc.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTimeFilter(t *testing.T) {
|
||||
testCases := []struct {
|
||||
start string
|
||||
end string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
start: "2020-01-01T20:07:00Z",
|
||||
end: "2020-01-01T21:07:00Z",
|
||||
expected: "time >= '2020-01-01T20:07:00Z' and time <= '2020-01-01T21:07:00Z'",
|
||||
},
|
||||
{
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
start: "2020-01-01T20:07:00Z",
|
||||
expected: "time >= '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
{
|
||||
end: "2020-01-01T21:07:00Z",
|
||||
expected: "time <= '2020-01-01T21:07:00Z'",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
f := timeFilter(tc.start, tc.end)
|
||||
if f != tc.expected {
|
||||
t.Fatalf("got: \n%q;\nexpected: \n%q", f, tc.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
191
app/vmctl/influx/parser.go
Normal file
@@ -0,0 +1,191 @@
|
||||
package influx
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
influx "github.com/influxdata/influxdb/client/v2"
|
||||
)
|
||||
|
||||
type queryValues struct {
|
||||
name string
|
||||
values map[string][]interface{}
|
||||
}
|
||||
|
||||
func parseResult(r influx.Result) ([]queryValues, error) {
|
||||
if len(r.Err) > 0 {
|
||||
return nil, fmt.Errorf("result error: %s", r.Err)
|
||||
}
|
||||
qValues := make([]queryValues, len(r.Series))
|
||||
for i, row := range r.Series {
|
||||
values := make(map[string][]interface{}, len(row.Values))
|
||||
for _, value := range row.Values {
|
||||
for idx, v := range value {
|
||||
key := row.Columns[idx]
|
||||
values[key] = append(values[key], v)
|
||||
}
|
||||
}
|
||||
qValues[i] = queryValues{
|
||||
name: row.Name,
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
return qValues, nil
|
||||
}
|
||||
|
||||
func toFloat64(v interface{}) (float64, error) {
|
||||
switch i := v.(type) {
|
||||
case json.Number:
|
||||
return i.Float64()
|
||||
case float64:
|
||||
return i, nil
|
||||
case float32:
|
||||
return float64(i), nil
|
||||
case int64:
|
||||
return float64(i), nil
|
||||
case int32:
|
||||
return float64(i), nil
|
||||
case int:
|
||||
return float64(i), nil
|
||||
case uint64:
|
||||
return float64(i), nil
|
||||
case uint32:
|
||||
return float64(i), nil
|
||||
case uint:
|
||||
return float64(i), nil
|
||||
case string:
|
||||
return strconv.ParseFloat(i, 64)
|
||||
default:
|
||||
return 0, fmt.Errorf("unexpected value type %v", i)
|
||||
}
|
||||
}
|
||||
|
||||
func parseDate(dateStr string) (int64, error) {
|
||||
startTime, err := time.Parse(time.RFC3339, dateStr)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse %q: %s", dateStr, err)
|
||||
}
|
||||
return startTime.UnixNano() / 1e6, nil
|
||||
}
|
||||
|
||||
func stringify(q influx.Query) string {
|
||||
return fmt.Sprintf("command: %q; database: %q; retention: %q",
|
||||
q.Command, q.Database, q.RetentionPolicy)
|
||||
}
|
||||
|
||||
func (s *Series) unmarshal(v string) error {
|
||||
noEscapeChars := strings.IndexByte(v, '\\') < 0
|
||||
n := nextUnescapedChar(v, ',', noEscapeChars)
|
||||
if n < 0 {
|
||||
s.Measurement = unescapeTagValue(v, noEscapeChars)
|
||||
return nil
|
||||
}
|
||||
s.Measurement = unescapeTagValue(v[:n], noEscapeChars)
|
||||
var err error
|
||||
s.LabelPairs, err = unmarshalTags(v[n+1:], noEscapeChars)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to unmarhsal tags: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalTags(s string, noEscapeChars bool) ([]LabelPair, error) {
|
||||
var result []LabelPair
|
||||
for {
|
||||
lp := LabelPair{}
|
||||
n := nextUnescapedChar(s, ',', noEscapeChars)
|
||||
if n < 0 {
|
||||
if err := lp.unmarshal(s, noEscapeChars); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(lp.Name) == 0 || len(lp.Value) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
result = append(result, lp)
|
||||
return result, nil
|
||||
}
|
||||
if err := lp.unmarshal(s[:n], noEscapeChars); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s = s[n+1:]
|
||||
if len(lp.Name) == 0 || len(lp.Value) == 0 {
|
||||
continue
|
||||
}
|
||||
result = append(result, lp)
|
||||
}
|
||||
}
|
||||
|
||||
func (lp *LabelPair) unmarshal(s string, noEscapeChars bool) error {
|
||||
n := nextUnescapedChar(s, '=', noEscapeChars)
|
||||
if n < 0 {
|
||||
return fmt.Errorf("missing tag value for %q", s)
|
||||
}
|
||||
lp.Name = unescapeTagValue(s[:n], noEscapeChars)
|
||||
lp.Value = unescapeTagValue(s[n+1:], noEscapeChars)
|
||||
return nil
|
||||
}
|
||||
|
||||
func unescapeTagValue(s string, noEscapeChars bool) string {
|
||||
if noEscapeChars {
|
||||
// Fast path - no escape chars.
|
||||
return s
|
||||
}
|
||||
n := strings.IndexByte(s, '\\')
|
||||
if n < 0 {
|
||||
return s
|
||||
}
|
||||
|
||||
// Slow path. Remove escape chars.
|
||||
dst := make([]byte, 0, len(s))
|
||||
for {
|
||||
dst = append(dst, s[:n]...)
|
||||
s = s[n+1:]
|
||||
if len(s) == 0 {
|
||||
return string(append(dst, '\\'))
|
||||
}
|
||||
ch := s[0]
|
||||
if ch != ' ' && ch != ',' && ch != '=' && ch != '\\' {
|
||||
dst = append(dst, '\\')
|
||||
}
|
||||
dst = append(dst, ch)
|
||||
s = s[1:]
|
||||
n = strings.IndexByte(s, '\\')
|
||||
if n < 0 {
|
||||
return string(append(dst, s...))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func nextUnescapedChar(s string, ch byte, noEscapeChars bool) int {
|
||||
if noEscapeChars {
|
||||
// Fast path: just search for ch in s, since s has no escape chars.
|
||||
return strings.IndexByte(s, ch)
|
||||
}
|
||||
|
||||
sOrig := s
|
||||
again:
|
||||
n := strings.IndexByte(s, ch)
|
||||
if n < 0 {
|
||||
return -1
|
||||
}
|
||||
if n == 0 {
|
||||
return len(sOrig) - len(s) + n
|
||||
}
|
||||
if s[n-1] != '\\' {
|
||||
return len(sOrig) - len(s) + n
|
||||
}
|
||||
nOrig := n
|
||||
slashes := 0
|
||||
for n > 0 && s[n-1] == '\\' {
|
||||
slashes++
|
||||
n--
|
||||
}
|
||||
if slashes&1 == 0 {
|
||||
return len(sOrig) - len(s) + nOrig
|
||||
}
|
||||
s = s[nOrig+1:]
|
||||
goto again
|
||||
}
|
||||
60
app/vmctl/influx/parser_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package influx
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSeries_Unmarshal(t *testing.T) {
|
||||
tag := func(name, value string) LabelPair {
|
||||
return LabelPair{
|
||||
Name: name,
|
||||
Value: value,
|
||||
}
|
||||
}
|
||||
series := func(measurement string, lp ...LabelPair) Series {
|
||||
return Series{
|
||||
Measurement: measurement,
|
||||
LabelPairs: lp,
|
||||
}
|
||||
}
|
||||
testCases := []struct {
|
||||
got string
|
||||
want Series
|
||||
}{
|
||||
{
|
||||
got: "cpu",
|
||||
want: series("cpu"),
|
||||
},
|
||||
{
|
||||
got: "cpu,host=localhost",
|
||||
want: series("cpu", tag("host", "localhost")),
|
||||
},
|
||||
{
|
||||
got: "cpu,host=localhost,instance=instance",
|
||||
want: series("cpu", tag("host", "localhost"), tag("instance", "instance")),
|
||||
},
|
||||
{
|
||||
got: `fo\,bar\=baz,x\=\b=\\a\,\=\q\ `,
|
||||
want: series("fo,bar=baz", tag(`x=\b`, `\a,=\q `)),
|
||||
},
|
||||
{
|
||||
got: "cpu,host=192.168.0.1,instance=fe80::fdc8:5e36:c2c6:baac%utun1",
|
||||
want: series("cpu", tag("host", "192.168.0.1"), tag("instance", "fe80::fdc8:5e36:c2c6:baac%utun1")),
|
||||
},
|
||||
{
|
||||
got: `cpu,db=db1,host=localhost,server=host\=localhost\ user\=user\ `,
|
||||
want: series("cpu", tag("db", "db1"),
|
||||
tag("host", "localhost"), tag("server", "host=localhost user=user ")),
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
s := Series{}
|
||||
if err := s.unmarshal(tc.got); err != nil {
|
||||
t.Fatalf("%q: unmarshal err: %s", tc.got, err)
|
||||
}
|
||||
if !reflect.DeepEqual(s, tc.want) {
|
||||
t.Fatalf("%q: expected\n%#v\nto be equal\n%#v", tc.got, s, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
159
app/vmctl/main.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
func main() {
|
||||
start := time.Now()
|
||||
app := &cli.App{
|
||||
Name: "vmctl",
|
||||
Usage: "Victoria metrics command-line tool",
|
||||
Version: buildinfo.Version,
|
||||
Commands: []*cli.Command{
|
||||
{
|
||||
Name: "influx",
|
||||
Usage: "Migrate timeseries from InfluxDB",
|
||||
Flags: mergeFlags(globalFlags, influxFlags, vmFlags),
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("InfluxDB import mode")
|
||||
|
||||
iCfg := influx.Config{
|
||||
Addr: c.String(influxAddr),
|
||||
Username: c.String(influxUser),
|
||||
Password: c.String(influxPassword),
|
||||
Database: c.String(influxDB),
|
||||
Retention: c.String(influxRetention),
|
||||
Filter: influx.Filter{
|
||||
Series: c.String(influxFilterSeries),
|
||||
TimeStart: c.String(influxFilterTimeStart),
|
||||
TimeEnd: c.String(influxFilterTimeEnd),
|
||||
},
|
||||
ChunkSize: c.Int(influxChunkSize),
|
||||
}
|
||||
influxClient, err := influx.NewClient(iCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create influx client: %s", err)
|
||||
}
|
||||
|
||||
vmCfg := initConfigVM(c)
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
|
||||
processor := newInfluxProcessor(influxClient, importer,
|
||||
c.Int(influxConcurrency), c.String(influxMeasurementFieldSeparator))
|
||||
return processor.run(c.Bool(globalSilent))
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "prometheus",
|
||||
Usage: "Migrate timeseries from Prometheus",
|
||||
Flags: mergeFlags(globalFlags, promFlags, vmFlags),
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("Prometheus import mode")
|
||||
|
||||
vmCfg := initConfigVM(c)
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
|
||||
promCfg := prometheus.Config{
|
||||
Snapshot: c.String(promSnapshot),
|
||||
Filter: prometheus.Filter{
|
||||
TimeMin: c.String(promFilterTimeStart),
|
||||
TimeMax: c.String(promFilterTimeEnd),
|
||||
Label: c.String(promFilterLabel),
|
||||
LabelValue: c.String(promFilterLabelValue),
|
||||
},
|
||||
}
|
||||
cl, err := prometheus.NewClient(promCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create prometheus client: %s", err)
|
||||
}
|
||||
pp := prometheusProcessor{
|
||||
cl: cl,
|
||||
im: importer,
|
||||
cc: c.Int(promConcurrency),
|
||||
}
|
||||
return pp.run(c.Bool(globalSilent))
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "vm-native",
|
||||
Usage: "Migrate time series between VictoriaMetrics installations via native binary format",
|
||||
Flags: vmNativeFlags,
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("VictoriaMetrics Native import mode")
|
||||
|
||||
if c.String(vmNativeFilterMatch) == "" {
|
||||
return fmt.Errorf("flag %q can't be empty", vmNativeFilterMatch)
|
||||
}
|
||||
|
||||
p := vmNativeProcessor{
|
||||
filter: filter{
|
||||
match: c.String(vmNativeFilterMatch),
|
||||
timeStart: c.String(vmNativeFilterTimeStart),
|
||||
timeEnd: c.String(vmNativeFilterTimeEnd),
|
||||
},
|
||||
src: &vmNativeClient{
|
||||
addr: strings.Trim(c.String(vmNativeSrcAddr), "/"),
|
||||
user: c.String(vmNativeSrcUser),
|
||||
password: c.String(vmNativeSrcPassword),
|
||||
},
|
||||
dst: &vmNativeClient{
|
||||
addr: strings.Trim(c.String(vmNativeDstAddr), "/"),
|
||||
user: c.String(vmNativeDstUser),
|
||||
password: c.String(vmNativeDstPassword),
|
||||
extraLabels: c.StringSlice(vmExtraLabel),
|
||||
},
|
||||
}
|
||||
return p.run()
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
c := make(chan os.Signal, 1)
|
||||
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-c
|
||||
fmt.Println("\r- Execution cancelled")
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
err := app.Run(os.Args)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
log.Printf("Total time: %v", time.Since(start))
|
||||
}
|
||||
|
||||
func initConfigVM(c *cli.Context) vm.Config {
|
||||
return vm.Config{
|
||||
Addr: c.String(vmAddr),
|
||||
User: c.String(vmUser),
|
||||
Password: c.String(vmPassword),
|
||||
Concurrency: uint8(c.Int(vmConcurrency)),
|
||||
Compress: c.Bool(vmCompress),
|
||||
AccountID: c.String(vmAccountID),
|
||||
BatchSize: c.Int(vmBatchSize),
|
||||
SignificantFigures: c.Int(vmSignificantFigures),
|
||||
RoundDigits: c.Int(vmRoundDigits),
|
||||
ExtraLabels: c.StringSlice(vmExtraLabel),
|
||||
}
|
||||
}
|
||||
11
app/vmctl/multiarch/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
ENTRYPOINT ["/vmctl-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmctl-${TARGETARCH}-prod ./vmctl-prod
|
||||
131
app/vmctl/prometheus.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
"github.com/prometheus/prometheus/tsdb"
|
||||
)
|
||||
|
||||
type prometheusProcessor struct {
|
||||
// prometheus client fetches and reads
|
||||
// snapshot blocks
|
||||
cl *prometheus.Client
|
||||
// importer performs import requests
|
||||
// for timeseries data returned from
|
||||
// snapshot blocks
|
||||
im *vm.Importer
|
||||
// cc stands for concurrency
|
||||
// and defines number of concurrently
|
||||
// running snapshot block readers
|
||||
cc int
|
||||
}
|
||||
|
||||
func (pp *prometheusProcessor) run(silent bool) error {
|
||||
blocks, err := pp.cl.Explore()
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore failed: %s", err)
|
||||
}
|
||||
if len(blocks) < 1 {
|
||||
return fmt.Errorf("found no blocks to import")
|
||||
}
|
||||
question := fmt.Sprintf("Found %d blocks to import. Continue?", len(blocks))
|
||||
if !silent && !prompt(question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
bar := pb.StartNew(len(blocks))
|
||||
blockReadersCh := make(chan tsdb.BlockReader)
|
||||
errCh := make(chan error, pp.cc)
|
||||
pp.im.ResetStats()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(pp.cc)
|
||||
for i := 0; i < pp.cc; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for br := range blockReadersCh {
|
||||
if err := pp.do(br); err != nil {
|
||||
errCh <- fmt.Errorf("read failed for block %q: %s", br.Meta().ULID, err)
|
||||
return
|
||||
}
|
||||
bar.Increment()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
for _, br := range blocks {
|
||||
select {
|
||||
case promErr := <-errCh:
|
||||
close(blockReadersCh)
|
||||
return fmt.Errorf("prometheus error: %s", promErr)
|
||||
case vmErr := <-pp.im.Errors():
|
||||
close(blockReadersCh)
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
case blockReadersCh <- br:
|
||||
}
|
||||
}
|
||||
|
||||
close(blockReadersCh)
|
||||
wg.Wait()
|
||||
// wait for all buffers to flush
|
||||
pp.im.Close()
|
||||
// drain import errors channel
|
||||
for vmErr := range pp.im.Errors() {
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
}
|
||||
bar.Finish()
|
||||
log.Println("Import finished!")
|
||||
log.Print(pp.im.Stats())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
|
||||
ss, err := pp.cl.Read(b)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read block: %s", err)
|
||||
}
|
||||
for ss.Next() {
|
||||
var name string
|
||||
var labels []vm.LabelPair
|
||||
series := ss.At()
|
||||
|
||||
for _, label := range series.Labels() {
|
||||
if label.Name == "__name__" {
|
||||
name = label.Value
|
||||
continue
|
||||
}
|
||||
labels = append(labels, vm.LabelPair{
|
||||
Name: label.Name,
|
||||
Value: label.Value,
|
||||
})
|
||||
}
|
||||
if name == "" {
|
||||
return fmt.Errorf("failed to find `__name__` label in labelset for block %v", b.Meta().ULID)
|
||||
}
|
||||
|
||||
var timestamps []int64
|
||||
var values []float64
|
||||
it := series.Iterator()
|
||||
for it.Next() {
|
||||
t, v := it.At()
|
||||
timestamps = append(timestamps, t)
|
||||
values = append(values, v)
|
||||
}
|
||||
if err := it.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
pp.im.Input() <- &vm.TimeSeries{
|
||||
Name: name,
|
||||
LabelPairs: labels,
|
||||
Timestamps: timestamps,
|
||||
Values: values,
|
||||
}
|
||||
}
|
||||
return ss.Err()
|
||||
}
|
||||
147
app/vmctl/prometheus/prometheus.go
Normal file
@@ -0,0 +1,147 @@
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
"github.com/prometheus/prometheus/tsdb"
|
||||
)
|
||||
|
||||
// Config contains a list of params needed
|
||||
// for reading Prometheus snapshots
|
||||
type Config struct {
|
||||
// Path to snapshot directory
|
||||
Snapshot string
|
||||
|
||||
Filter Filter
|
||||
}
|
||||
|
||||
// Filter contains configuration for filtering
|
||||
// the timeseries
|
||||
type Filter struct {
|
||||
TimeMin string
|
||||
TimeMax string
|
||||
Label string
|
||||
LabelValue string
|
||||
}
|
||||
|
||||
// Client is a wrapper over Prometheus tsdb.DBReader
|
||||
type Client struct {
|
||||
*tsdb.DBReadOnly
|
||||
filter filter
|
||||
}
|
||||
|
||||
type filter struct {
|
||||
min, max int64
|
||||
label string
|
||||
labelValue string
|
||||
}
|
||||
|
||||
func (f filter) inRange(min, max int64) bool {
|
||||
fmin, fmax := f.min, f.max
|
||||
if min == 0 {
|
||||
fmin = min
|
||||
}
|
||||
if fmax == 0 {
|
||||
fmax = max
|
||||
}
|
||||
return min <= fmax && fmin <= max
|
||||
}
|
||||
|
||||
// NewClient creates and validates new Client
|
||||
// with given Config
|
||||
func NewClient(cfg Config) (*Client, error) {
|
||||
db, err := tsdb.OpenDBReadOnly(cfg.Snapshot, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open snapshot %q: %s", cfg.Snapshot, err)
|
||||
}
|
||||
c := &Client{DBReadOnly: db}
|
||||
min, max, err := parseTime(cfg.Filter.TimeMin, cfg.Filter.TimeMax)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse time in filter: %s", err)
|
||||
}
|
||||
c.filter = filter{
|
||||
min: min,
|
||||
max: max,
|
||||
label: cfg.Filter.Label,
|
||||
labelValue: cfg.Filter.LabelValue,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Explore fetches all available blocks from a snapshot
|
||||
// and collects the Meta() data from each block.
|
||||
// Explore does initial filtering by time-range
|
||||
// for snapshot blocks but does not take into account
|
||||
// label filters.
|
||||
func (c *Client) Explore() ([]tsdb.BlockReader, error) {
|
||||
blocks, err := c.Blocks()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch blocks: %s", err)
|
||||
}
|
||||
s := &Stats{
|
||||
Filtered: c.filter.min != 0 || c.filter.max != 0 || c.filter.label != "",
|
||||
Blocks: len(blocks),
|
||||
}
|
||||
var blocksToImport []tsdb.BlockReader
|
||||
for _, block := range blocks {
|
||||
meta := block.Meta()
|
||||
if !c.filter.inRange(meta.MinTime, meta.MaxTime) {
|
||||
s.SkippedBlocks++
|
||||
continue
|
||||
}
|
||||
if s.MinTime == 0 || meta.MinTime < s.MinTime {
|
||||
s.MinTime = meta.MinTime
|
||||
}
|
||||
if s.MaxTime == 0 || meta.MaxTime > s.MaxTime {
|
||||
s.MaxTime = meta.MaxTime
|
||||
}
|
||||
s.Samples += meta.Stats.NumSamples
|
||||
s.Series += meta.Stats.NumSeries
|
||||
blocksToImport = append(blocksToImport, block)
|
||||
}
|
||||
fmt.Println(s)
|
||||
return blocksToImport, nil
|
||||
}
|
||||
|
||||
// Read reads the given BlockReader according to configured
|
||||
// time and label filters.
|
||||
func (c *Client) Read(block tsdb.BlockReader) (storage.SeriesSet, error) {
|
||||
minTime, maxTime := block.Meta().MinTime, block.Meta().MaxTime
|
||||
if c.filter.min != 0 {
|
||||
minTime = c.filter.min
|
||||
}
|
||||
if c.filter.max != 0 {
|
||||
maxTime = c.filter.max
|
||||
}
|
||||
q, err := tsdb.NewBlockQuerier(block, minTime, maxTime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ss := q.Select(false, nil, labels.MustNewMatcher(labels.MatchRegexp, c.filter.label, c.filter.labelValue))
|
||||
return ss, nil
|
||||
}
|
||||
|
||||
func parseTime(start, end string) (int64, int64, error) {
|
||||
var s, e int64
|
||||
if start == "" && end == "" {
|
||||
return 0, 0, nil
|
||||
}
|
||||
if start != "" {
|
||||
v, err := time.Parse(time.RFC3339, start)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("failed to parse %q: %s", start, err)
|
||||
}
|
||||
s = v.UnixNano() / int64(time.Millisecond)
|
||||
}
|
||||
if end != "" {
|
||||
v, err := time.Parse(time.RFC3339, end)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("failed to parse %q: %s", end, err)
|
||||
}
|
||||
e = v.UnixNano() / int64(time.Millisecond)
|
||||
}
|
||||
return s, e, nil
|
||||
}
|
||||
34
app/vmctl/prometheus/prometheus_test.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInRange(t *testing.T) {
|
||||
testCases := []struct {
|
||||
filterMin, filterMax int64
|
||||
blockMin, blockMax int64
|
||||
expected bool
|
||||
}{
|
||||
{0, 0, 1, 2, true},
|
||||
{0, 3, 1, 2, true},
|
||||
{0, 3, 4, 5, false},
|
||||
{3, 0, 1, 2, false},
|
||||
{3, 0, 2, 4, true},
|
||||
{3, 10, 1, 2, false},
|
||||
{3, 10, 1, 4, true},
|
||||
{3, 10, 5, 9, true},
|
||||
{3, 10, 9, 12, true},
|
||||
{3, 10, 12, 15, false},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
f := filter{
|
||||
min: tc.filterMin,
|
||||
max: tc.filterMax,
|
||||
}
|
||||
got := f.inRange(tc.blockMin, tc.blockMax)
|
||||
if got != tc.expected {
|
||||
t.Fatalf("got %v; expected %v: %v", got, tc.expected, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
38
app/vmctl/prometheus/stats.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Stats represents data migration stats.
|
||||
type Stats struct {
|
||||
Filtered bool
|
||||
MinTime int64
|
||||
MaxTime int64
|
||||
Samples uint64
|
||||
Series uint64
|
||||
Blocks int
|
||||
SkippedBlocks int
|
||||
}
|
||||
|
||||
// String returns string representation for s.
|
||||
func (s Stats) String() string {
|
||||
str := fmt.Sprintf("Prometheus snapshot stats:\n"+
|
||||
" blocks found: %d;\n"+
|
||||
" blocks skipped by time filter: %d;\n"+
|
||||
" min time: %d (%v);\n"+
|
||||
" max time: %d (%v);\n"+
|
||||
" samples: %d;\n"+
|
||||
" series: %d.",
|
||||
s.Blocks, s.SkippedBlocks,
|
||||
s.MinTime, time.Unix(s.MinTime/1e3, 0).Format(time.RFC3339),
|
||||
s.MaxTime, time.Unix(s.MaxTime/1e3, 0).Format(time.RFC3339),
|
||||
s.Samples, s.Series)
|
||||
|
||||
if s.Filtered {
|
||||
str += "\n* Stats numbers are based on blocks meta info and don't account for applied filters."
|
||||
}
|
||||
|
||||
return str
|
||||
}
|
||||
33
app/vmctl/utils.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
)
|
||||
|
||||
func prompt(question string) bool {
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
fmt.Print(question, " [Y/n] ")
|
||||
answer, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
answer = strings.TrimSpace(strings.ToLower(answer))
|
||||
if answer == "" || answer == "yes" || answer == "y" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func wrapErr(vmErr *vm.ImportError) error {
|
||||
var errTS string
|
||||
for _, ts := range vmErr.Batch {
|
||||
errTS += fmt.Sprintf("%s for timestamps range %d - %d\n",
|
||||
ts.String(), ts.Timestamps[0], ts.Timestamps[len(ts.Timestamps)-1])
|
||||
}
|
||||
return fmt.Errorf("%s with error: %s", errTS, vmErr.Err)
|
||||
}
|
||||
47
app/vmctl/vm/stats.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package vm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type stats struct {
|
||||
sync.Mutex
|
||||
samples uint64
|
||||
bytes uint64
|
||||
requests uint64
|
||||
retries uint64
|
||||
startTime time.Time
|
||||
idleDuration time.Duration
|
||||
}
|
||||
|
||||
func (s *stats) String() string {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
totalImportDuration := time.Since(s.startTime)
|
||||
totalImportDurationS := totalImportDuration.Seconds()
|
||||
var samplesPerS float64
|
||||
if s.samples > 0 && totalImportDurationS > 0 {
|
||||
samplesPerS = float64(s.samples) / totalImportDurationS
|
||||
}
|
||||
bytesPerS := byteCountSI(0)
|
||||
if s.bytes > 0 && totalImportDurationS > 0 {
|
||||
bytesPerS = byteCountSI(int64(float64(s.bytes) / totalImportDurationS))
|
||||
}
|
||||
|
||||
return fmt.Sprintf("VictoriaMetrics importer stats:\n"+
|
||||
" idle duration: %v;\n"+
|
||||
" time spent while importing: %v;\n"+
|
||||
" total samples: %d;\n"+
|
||||
" samples/s: %.2f;\n"+
|
||||
" total bytes: %s;\n"+
|
||||
" bytes/s: %s;\n"+
|
||||
" import requests: %d;\n"+
|
||||
" import requests retries: %d;",
|
||||
s.idleDuration, totalImportDuration,
|
||||
s.samples, samplesPerS,
|
||||
byteCountSI(int64(s.bytes)), bytesPerS,
|
||||
s.requests, s.retries)
|
||||
}
|
||||
82
app/vmctl/vm/timeseries.go
Normal file
@@ -0,0 +1,82 @@
|
||||
package vm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// TimeSeries represents a time series.
|
||||
type TimeSeries struct {
|
||||
Name string
|
||||
LabelPairs []LabelPair
|
||||
Timestamps []int64
|
||||
Values []float64
|
||||
}
|
||||
|
||||
// LabelPair represents a label
|
||||
type LabelPair struct {
|
||||
Name string
|
||||
Value string
|
||||
}
|
||||
|
||||
// String returns user-readable ts.
|
||||
func (ts TimeSeries) String() string {
|
||||
s := ts.Name
|
||||
if len(ts.LabelPairs) < 1 {
|
||||
return s
|
||||
}
|
||||
var labels string
|
||||
for i, lp := range ts.LabelPairs {
|
||||
labels += fmt.Sprintf("%s=%q", lp.Name, lp.Value)
|
||||
if i < len(ts.LabelPairs)-1 {
|
||||
labels += ","
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("%s{%s}", s, labels)
|
||||
}
|
||||
|
||||
// cWriter used to avoid error checking
|
||||
// while doing Write calls.
|
||||
// cWriter caches the first error if any
|
||||
// and discards all sequential write calls
|
||||
type cWriter struct {
|
||||
w io.Writer
|
||||
n int
|
||||
err error
|
||||
}
|
||||
|
||||
func (cw *cWriter) printf(format string, args ...interface{}) {
|
||||
if cw.err != nil {
|
||||
return
|
||||
}
|
||||
n, err := fmt.Fprintf(cw.w, format, args...)
|
||||
cw.n += n
|
||||
cw.err = err
|
||||
}
|
||||
|
||||
//"{"metric":{"__name__":"cpu_usage_guest","arch":"x64","hostname":"host_19",},"timestamps":[1567296000000,1567296010000],"values":[1567296000000,66]}
|
||||
func (ts *TimeSeries) write(w io.Writer) (int, error) {
|
||||
pointsCount := len(ts.Timestamps)
|
||||
if pointsCount == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
cw := &cWriter{w: w}
|
||||
cw.printf(`{"metric":{"__name__":%q`, ts.Name)
|
||||
if len(ts.LabelPairs) > 0 {
|
||||
for _, lp := range ts.LabelPairs {
|
||||
cw.printf(",%q:%q", lp.Name, lp.Value)
|
||||
}
|
||||
}
|
||||
|
||||
cw.printf(`},"timestamps":[`)
|
||||
for i := 0; i < pointsCount-1; i++ {
|
||||
cw.printf(`%d,`, ts.Timestamps[i])
|
||||
}
|
||||
cw.printf(`%d],"values":[`, ts.Timestamps[pointsCount-1])
|
||||
for i := 0; i < pointsCount-1; i++ {
|
||||
cw.printf(`%v,`, ts.Values[i])
|
||||
}
|
||||
cw.printf("%v]}\n", ts.Values[pointsCount-1])
|
||||
return cw.n, cw.err
|
||||
}
|
||||
89
app/vmctl/vm/timeseries_test.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package vm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTimeSeries_Write(t *testing.T) {
|
||||
var testCases = []struct {
|
||||
name string
|
||||
ts *TimeSeries
|
||||
exp string
|
||||
}{
|
||||
{
|
||||
name: "one datapoint",
|
||||
ts: &TimeSeries{
|
||||
Name: "foo",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "key",
|
||||
Value: "val",
|
||||
},
|
||||
},
|
||||
Timestamps: []int64{1577877162200},
|
||||
Values: []float64{1},
|
||||
},
|
||||
exp: `{"metric":{"__name__":"foo","key":"val"},"timestamps":[1577877162200],"values":[1]}`,
|
||||
},
|
||||
{
|
||||
name: "multiple samples",
|
||||
ts: &TimeSeries{
|
||||
Name: "foo",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "key",
|
||||
Value: "val",
|
||||
},
|
||||
},
|
||||
Timestamps: []int64{1577877162200, 15778771622400, 15778771622600},
|
||||
Values: []float64{1, 1.6263, 32.123},
|
||||
},
|
||||
exp: `{"metric":{"__name__":"foo","key":"val"},"timestamps":[1577877162200,15778771622400,15778771622600],"values":[1,1.6263,32.123]}`,
|
||||
},
|
||||
{
|
||||
name: "no samples",
|
||||
ts: &TimeSeries{
|
||||
Name: "foo",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "key",
|
||||
Value: "val",
|
||||
},
|
||||
},
|
||||
},
|
||||
exp: ``,
|
||||
},
|
||||
{
|
||||
name: "inf values",
|
||||
ts: &TimeSeries{
|
||||
Name: "foo",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "key",
|
||||
Value: "val",
|
||||
},
|
||||
},
|
||||
Timestamps: []int64{1577877162200, 1577877162200, 1577877162200},
|
||||
Values: []float64{0, math.Inf(-1), math.Inf(1)},
|
||||
},
|
||||
exp: `{"metric":{"__name__":"foo","key":"val"},"timestamps":[1577877162200,1577877162200,1577877162200],"values":[0,-Inf,+Inf]}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
b := &bytes.Buffer{}
|
||||
_, err := tc.ts.write(b)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
got := strings.TrimSpace(b.String())
|
||||
if got != tc.exp {
|
||||
t.Fatalf("\ngot: %q\nwant: %q", got, tc.exp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
384
app/vmctl/vm/vm.go
Normal file
@@ -0,0 +1,384 @@
|
||||
package vm
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
)
|
||||
|
||||
// Config contains list of params to configure
|
||||
// the Importer
|
||||
type Config struct {
|
||||
// VictoriaMetrics address to perform import requests
|
||||
// --httpListenAddr value for single node version
|
||||
// --httpListenAddr value of VMSelect component for cluster version
|
||||
Addr string
|
||||
// Concurrency defines number of worker
|
||||
// performing the import requests concurrently
|
||||
Concurrency uint8
|
||||
// Whether to apply gzip compression
|
||||
Compress bool
|
||||
// AccountID for cluster version.
|
||||
// Empty value assumes it is a single node version
|
||||
AccountID string
|
||||
// BatchSize defines how many samples
|
||||
// importer collects before sending the import request
|
||||
BatchSize int
|
||||
// User name for basic auth
|
||||
User string
|
||||
// Password for basic auth
|
||||
Password string
|
||||
// SignificantFigures defines the number of significant figures to leave
|
||||
// in metric values before importing.
|
||||
// Zero value saves all the significant decimal places
|
||||
SignificantFigures int
|
||||
// RoundDigits defines the number of decimal digits after the point that must be left
|
||||
// in metric values before importing.
|
||||
RoundDigits int
|
||||
// ExtraLabels that will be added to all imported series. Must be in label=value format.
|
||||
ExtraLabels []string
|
||||
}
|
||||
|
||||
// Importer performs insertion of timeseries
|
||||
// via VictoriaMetrics import protocol
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master#how-to-import-time-series-data
|
||||
type Importer struct {
|
||||
addr string
|
||||
importPath string
|
||||
compress bool
|
||||
user string
|
||||
password string
|
||||
|
||||
close chan struct{}
|
||||
input chan *TimeSeries
|
||||
errors chan *ImportError
|
||||
|
||||
wg sync.WaitGroup
|
||||
once sync.Once
|
||||
|
||||
s *stats
|
||||
}
|
||||
|
||||
// ResetStats resets im stats.
|
||||
func (im *Importer) ResetStats() {
|
||||
im.s = &stats{
|
||||
startTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// Stats returns im stats.
|
||||
func (im *Importer) Stats() string {
|
||||
return im.s.String()
|
||||
}
|
||||
|
||||
// AddExtraLabelsToImportPath - adds extra labels query params to given url path.
|
||||
func AddExtraLabelsToImportPath(path string, extraLabels []string) (string, error) {
|
||||
dst := path
|
||||
separator := "?"
|
||||
for _, extraLabel := range extraLabels {
|
||||
if !strings.Contains(extraLabel, "=") {
|
||||
return path, fmt.Errorf("bad format for extra_label flag, it must be `key=value`, got: %q", extraLabel)
|
||||
}
|
||||
if strings.Contains(dst, "?") {
|
||||
separator = "&"
|
||||
}
|
||||
dst += fmt.Sprintf("%sextra_label=%s", separator, extraLabel)
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// NewImporter creates new Importer for the given cfg.
|
||||
func NewImporter(cfg Config) (*Importer, error) {
|
||||
if cfg.Concurrency < 1 {
|
||||
return nil, fmt.Errorf("concurrency can't be lower than 1")
|
||||
}
|
||||
|
||||
addr := strings.TrimRight(cfg.Addr, "/")
|
||||
// if single version
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master#how-to-import-time-series-data
|
||||
importPath := addr + "/api/v1/import"
|
||||
if cfg.AccountID != "" {
|
||||
// if cluster version
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster#url-format
|
||||
importPath = fmt.Sprintf("%s/insert/%s/prometheus/api/v1/import", addr, cfg.AccountID)
|
||||
}
|
||||
importPath, err := AddExtraLabelsToImportPath(importPath, cfg.ExtraLabels)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
im := &Importer{
|
||||
addr: addr,
|
||||
importPath: importPath,
|
||||
compress: cfg.Compress,
|
||||
user: cfg.User,
|
||||
password: cfg.Password,
|
||||
close: make(chan struct{}),
|
||||
input: make(chan *TimeSeries, cfg.Concurrency*4),
|
||||
errors: make(chan *ImportError, cfg.Concurrency),
|
||||
}
|
||||
if err := im.Ping(); err != nil {
|
||||
return nil, fmt.Errorf("ping to %q failed: %s", addr, err)
|
||||
}
|
||||
|
||||
if cfg.BatchSize < 1 {
|
||||
cfg.BatchSize = 1e5
|
||||
}
|
||||
|
||||
im.wg.Add(int(cfg.Concurrency))
|
||||
for i := 0; i < int(cfg.Concurrency); i++ {
|
||||
go func() {
|
||||
defer im.wg.Done()
|
||||
im.startWorker(cfg.BatchSize, cfg.SignificantFigures, cfg.RoundDigits)
|
||||
}()
|
||||
}
|
||||
im.ResetStats()
|
||||
return im, nil
|
||||
}
|
||||
|
||||
// ImportError is type of error generated
|
||||
// in case of unsuccessful import request
|
||||
type ImportError struct {
|
||||
// The batch of timeseries that failed
|
||||
Batch []*TimeSeries
|
||||
// The error that appeared during insert
|
||||
Err error
|
||||
}
|
||||
|
||||
// Errors returns a channel for receiving
|
||||
// import errors if any
|
||||
func (im *Importer) Errors() chan *ImportError { return im.errors }
|
||||
|
||||
// Input returns a channel for sending timeseries
|
||||
// that need to be imported
|
||||
func (im *Importer) Input() chan<- *TimeSeries { return im.input }
|
||||
|
||||
// Close sends signal to all goroutines to exit
|
||||
// and waits until they are finished
|
||||
func (im *Importer) Close() {
|
||||
im.once.Do(func() {
|
||||
close(im.close)
|
||||
im.wg.Wait()
|
||||
close(im.errors)
|
||||
})
|
||||
}
|
||||
|
||||
func (im *Importer) startWorker(batchSize, significantFigures, roundDigits int) {
|
||||
var batch []*TimeSeries
|
||||
var dataPoints int
|
||||
var waitForBatch time.Time
|
||||
for {
|
||||
select {
|
||||
case <-im.close:
|
||||
if err := im.Import(batch); err != nil {
|
||||
im.errors <- &ImportError{
|
||||
Batch: batch,
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
return
|
||||
case ts := <-im.input:
|
||||
// init waitForBatch when first
|
||||
// value was received
|
||||
if waitForBatch.IsZero() {
|
||||
waitForBatch = time.Now()
|
||||
}
|
||||
|
||||
if significantFigures > 0 {
|
||||
for i, v := range ts.Values {
|
||||
ts.Values[i] = decimal.RoundToSignificantFigures(v, significantFigures)
|
||||
}
|
||||
}
|
||||
if roundDigits < 100 {
|
||||
for i, v := range ts.Values {
|
||||
ts.Values[i] = decimal.RoundToDecimalDigits(v, roundDigits)
|
||||
}
|
||||
}
|
||||
|
||||
batch = append(batch, ts)
|
||||
dataPoints += len(ts.Values)
|
||||
if dataPoints < batchSize {
|
||||
continue
|
||||
}
|
||||
im.s.Lock()
|
||||
im.s.idleDuration += time.Since(waitForBatch)
|
||||
im.s.Unlock()
|
||||
|
||||
if err := im.flush(batch); err != nil {
|
||||
im.errors <- &ImportError{
|
||||
Batch: batch,
|
||||
Err: err,
|
||||
}
|
||||
// make a new batch, since old one was referenced as err
|
||||
batch = make([]*TimeSeries, len(batch))
|
||||
}
|
||||
batch = batch[:0]
|
||||
dataPoints = 0
|
||||
waitForBatch = time.Now()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
// TODO: make configurable
|
||||
backoffRetries = 5
|
||||
backoffFactor = 1.7
|
||||
backoffMinDuration = time.Second
|
||||
)
|
||||
|
||||
func (im *Importer) flush(b []*TimeSeries) error {
|
||||
var err error
|
||||
for i := 0; i < backoffRetries; i++ {
|
||||
err = im.Import(b)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if errors.Is(err, ErrBadRequest) {
|
||||
return err // fail fast if not recoverable
|
||||
}
|
||||
im.s.Lock()
|
||||
im.s.retries++
|
||||
im.s.Unlock()
|
||||
backoff := float64(backoffMinDuration) * math.Pow(backoffFactor, float64(i))
|
||||
time.Sleep(time.Duration(backoff))
|
||||
}
|
||||
return fmt.Errorf("import failed with %d retries: %s", backoffRetries, err)
|
||||
}
|
||||
|
||||
// Ping sends a ping to im.addr.
|
||||
func (im *Importer) Ping() error {
|
||||
url := fmt.Sprintf("%s/health", im.addr)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create request to %q: %s", im.addr, err)
|
||||
}
|
||||
if im.user != "" {
|
||||
req.SetBasicAuth(im.user, im.password)
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("bad status code: %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Import imports tsBatch.
|
||||
func (im *Importer) Import(tsBatch []*TimeSeries) error {
|
||||
if len(tsBatch) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
pr, pw := io.Pipe()
|
||||
req, err := http.NewRequest("POST", im.importPath, pr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create request to %q: %s", im.addr, err)
|
||||
}
|
||||
if im.user != "" {
|
||||
req.SetBasicAuth(im.user, im.password)
|
||||
}
|
||||
if im.compress {
|
||||
req.Header.Set("Content-Encoding", "gzip")
|
||||
}
|
||||
|
||||
errCh := make(chan error)
|
||||
go func() {
|
||||
errCh <- do(req)
|
||||
close(errCh)
|
||||
}()
|
||||
|
||||
w := io.Writer(pw)
|
||||
if im.compress {
|
||||
zw, err := gzip.NewWriterLevel(pw, 1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unexpected error when creating gzip writer: %s", err)
|
||||
}
|
||||
w = zw
|
||||
}
|
||||
bw := bufio.NewWriterSize(w, 16*1024)
|
||||
|
||||
var totalSamples, totalBytes int
|
||||
for _, ts := range tsBatch {
|
||||
n, err := ts.write(bw)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write err: %w", err)
|
||||
}
|
||||
totalBytes += n
|
||||
totalSamples += len(ts.Values)
|
||||
}
|
||||
if err := bw.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
if im.compress {
|
||||
err := w.(*gzip.Writer).Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := pw.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
requestErr := <-errCh
|
||||
if requestErr != nil {
|
||||
return fmt.Errorf("import request error for %q: %w", im.addr, requestErr)
|
||||
}
|
||||
|
||||
im.s.Lock()
|
||||
im.s.bytes += uint64(totalBytes)
|
||||
im.s.samples += uint64(totalSamples)
|
||||
im.s.requests++
|
||||
im.s.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ErrBadRequest represents bad request error.
|
||||
var ErrBadRequest = errors.New("bad request")
|
||||
|
||||
func do(req *http.Request) error {
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unexpected error when performing request: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
if resp.StatusCode != http.StatusNoContent {
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read response body for status code %d: %s", resp.StatusCode, err)
|
||||
}
|
||||
if resp.StatusCode == http.StatusBadRequest {
|
||||
return fmt.Errorf("%w: unexpected response code %d: %s", ErrBadRequest, resp.StatusCode, string(body))
|
||||
}
|
||||
return fmt.Errorf("unexpected response code %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func byteCountSI(b int64) string {
|
||||
const unit = 1000
|
||||
if b < unit {
|
||||
return fmt.Sprintf("%d B", b)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := b / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB",
|
||||
float64(b)/float64(div), "kMGTPE"[exp])
|
||||
}
|
||||
69
app/vmctl/vm/vm_test.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package vm
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestAddExtraLabelsToImportPath(t *testing.T) {
|
||||
type args struct {
|
||||
path string
|
||||
extraLabels []string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "ok w/o extra labels",
|
||||
args: args{
|
||||
path: "/api/v1/import",
|
||||
},
|
||||
want: "/api/v1/import",
|
||||
},
|
||||
{
|
||||
name: "ok one extra label",
|
||||
args: args{
|
||||
path: "/api/v1/import",
|
||||
extraLabels: []string{"instance=host-1"},
|
||||
},
|
||||
want: "/api/v1/import?extra_label=instance=host-1",
|
||||
},
|
||||
{
|
||||
name: "ok two extra labels",
|
||||
args: args{
|
||||
path: "/api/v1/import",
|
||||
extraLabels: []string{"instance=host-2", "job=vmagent"},
|
||||
},
|
||||
want: "/api/v1/import?extra_label=instance=host-2&extra_label=job=vmagent",
|
||||
},
|
||||
{
|
||||
name: "ok two extra with exist param",
|
||||
args: args{
|
||||
path: "/api/v1/import?timeout=50",
|
||||
extraLabels: []string{"instance=host-2", "job=vmagent"},
|
||||
},
|
||||
want: "/api/v1/import?timeout=50&extra_label=instance=host-2&extra_label=job=vmagent",
|
||||
},
|
||||
{
|
||||
name: "bad incorrect format for extra label",
|
||||
args: args{
|
||||
path: "/api/v1/import",
|
||||
extraLabels: []string{"label=value", "bad_label_wo_value"},
|
||||
},
|
||||
want: "/api/v1/import",
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := AddExtraLabelsToImportPath(tt.args.path, tt.args.extraLabels)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("AddExtraLabelsToImportPath() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if got != tt.want {
|
||||
t.Errorf("AddExtraLabelsToImportPath() got = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
143
app/vmctl/vm_native.go
Normal file
@@ -0,0 +1,143 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
)
|
||||
|
||||
type vmNativeProcessor struct {
|
||||
filter filter
|
||||
|
||||
dst *vmNativeClient
|
||||
src *vmNativeClient
|
||||
}
|
||||
|
||||
type vmNativeClient struct {
|
||||
addr string
|
||||
user string
|
||||
password string
|
||||
extraLabels []string
|
||||
}
|
||||
|
||||
type filter struct {
|
||||
match string
|
||||
timeStart string
|
||||
timeEnd string
|
||||
}
|
||||
|
||||
func (f filter) String() string {
|
||||
s := fmt.Sprintf("\n\tfilter: match[]=%s", f.match)
|
||||
if f.timeStart != "" {
|
||||
s += fmt.Sprintf("\n\tstart: %s", f.timeStart)
|
||||
}
|
||||
if f.timeEnd != "" {
|
||||
s += fmt.Sprintf("\n\tend: %s", f.timeEnd)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
const (
|
||||
nativeExportAddr = "api/v1/export/native"
|
||||
nativeImportAddr = "api/v1/import/native"
|
||||
|
||||
barTpl = `Total: {{counters . }} {{ cycle . "↖" "↗" "↘" "↙" }} Speed: {{speed . }} {{string . "suffix"}}`
|
||||
)
|
||||
|
||||
func (p *vmNativeProcessor) run() error {
|
||||
pr, pw := io.Pipe()
|
||||
|
||||
fmt.Printf("Initing export pipe from %q with filters: %s\n", p.src.addr, p.filter)
|
||||
exportReader, err := p.exportPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init export pipe: %s", err)
|
||||
}
|
||||
|
||||
sync := make(chan struct{})
|
||||
nativeImportAddr, err := vm.AddExtraLabelsToImportPath(nativeImportAddr, p.dst.extraLabels)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer func() { close(sync) }()
|
||||
u := fmt.Sprintf("%s/%s", p.dst.addr, nativeImportAddr)
|
||||
req, err := http.NewRequest("POST", u, pr)
|
||||
if err != nil {
|
||||
log.Fatalf("cannot create import request to %q: %s", p.dst.addr, err)
|
||||
}
|
||||
importResp, err := p.dst.do(req, http.StatusNoContent)
|
||||
if err != nil {
|
||||
log.Fatalf("import request failed: %s", err)
|
||||
}
|
||||
if err := importResp.Body.Close(); err != nil {
|
||||
log.Fatalf("cannot close import response body: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
fmt.Printf("Initing import process to %q:\n", p.dst.addr)
|
||||
bar := pb.ProgressBarTemplate(barTpl).Start64(0)
|
||||
barReader := bar.NewProxyReader(exportReader)
|
||||
|
||||
_, err = io.Copy(pw, barReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write into %q: %s", p.dst.addr, err)
|
||||
}
|
||||
if err := pw.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
<-sync
|
||||
|
||||
bar.Finish()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *vmNativeProcessor) exportPipe() (io.ReadCloser, error) {
|
||||
u := fmt.Sprintf("%s/%s", p.src.addr, nativeExportAddr)
|
||||
req, err := http.NewRequest("GET", u, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create request to %q: %s", p.src.addr, err)
|
||||
}
|
||||
|
||||
params := req.URL.Query()
|
||||
params.Set("match[]", p.filter.match)
|
||||
if p.filter.timeStart != "" {
|
||||
params.Set("start", p.filter.timeStart)
|
||||
}
|
||||
if p.filter.timeEnd != "" {
|
||||
params.Set("end", p.filter.timeEnd)
|
||||
}
|
||||
req.URL.RawQuery = params.Encode()
|
||||
|
||||
// disable compression since it is meaningless for native format
|
||||
req.Header.Set("Accept-Encoding", "identity")
|
||||
resp, err := p.src.do(req, http.StatusOK)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("export request failed: %s", err)
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
func (c *vmNativeClient) do(req *http.Request, expSC int) (*http.Response, error) {
|
||||
if c.user != "" {
|
||||
req.SetBasicAuth(c.user, c.password)
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error when performing request: %s", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != expSC {
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body for status code %d: %s", resp.StatusCode, err)
|
||||
}
|
||||
return nil, fmt.Errorf("unexpected response code %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
287
app/vmgateway/README.md
Normal file
@@ -0,0 +1,287 @@
|
||||
# vmgateway
|
||||
|
||||
|
||||
<img alt="vmgateway" src="vmgateway-overview.jpeg">
|
||||
|
||||
`vmgateway` is a proxy for Victoria Metrics TSDB. It provides the following features:
|
||||
|
||||
* Rate Limiter
|
||||
* Based on cluster tenants' utilization supports multiple time interval limits for ingestion/retrieving metrics
|
||||
* Token Access Control
|
||||
* Supports additional per-label access control for Single and Cluster versions of Victoria Metrics TSDB
|
||||
* Provides access by tenantID at Cluster version
|
||||
* Allows to separate write/read/admin access to data
|
||||
|
||||
`vmgateway` is included in an [enterprise package](https://victoriametrics.com/enterprise.html).
|
||||
|
||||
|
||||
## Access Control
|
||||
|
||||
<img alt="vmgateway-ac" src="vmgateway-access-control.jpg">
|
||||
|
||||
`vmgateway` supports jwt based authentication. With jwt payload can be configured access to specific tenant, labels, read/write.
|
||||
|
||||
jwt token must be in following format:
|
||||
```json
|
||||
{
|
||||
"exp": 1617304574,
|
||||
"vm_access": {
|
||||
"tenant_id": {
|
||||
"account_id": 1,
|
||||
"project_id": 5
|
||||
},
|
||||
"extra_labels": {
|
||||
"team": "dev",
|
||||
"project": "mobile"
|
||||
},
|
||||
"mode": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
Where:
|
||||
- `exp` - required, expire time in unix_timestamp. If token expires, `vmgateway` rejects request.
|
||||
- `vm_access` - required, dict with claim info, minimum form: `{"vm_access": {"tenand_id": {}}`
|
||||
- `tenant_id` - optional, make sense only for cluster mode, routes request to corresponding tenant.
|
||||
- `extra_labels` - optional, key-value pairs for label filters - added to ingested or selected metrics.
|
||||
- `mode` - optional, access mode for api - read, write, full. supported values: 0 - full (default value), 1 - read, 2 - write.
|
||||
|
||||
## QuickStart
|
||||
|
||||
Start single version of Victoria Metrics
|
||||
|
||||
```bash
|
||||
# single
|
||||
# start node
|
||||
./bin/victoria-metrics --selfScrapeInterval=10s
|
||||
```
|
||||
|
||||
Start vmgateway
|
||||
|
||||
```bash
|
||||
./bin/vmgateway -eula -enable.auth -read.url http://localhost:8428 --write.url http://localhost:8428
|
||||
```
|
||||
|
||||
Retrieve data from database
|
||||
```bash
|
||||
curl 'http://localhost:8431/api/v1/series/count' -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ2bV9hY2Nlc3MiOnsidGVuYW50X2lkIjp7fSwicm9sZSI6MX0sImV4cCI6MTkzOTM0NjIxMH0.5WUxEfdcV9hKo4CtQdtuZYOGpGXWwaqM9VuVivMMrVg'
|
||||
```
|
||||
|
||||
Request with incorrect token or with out token will be rejected:
|
||||
```bash
|
||||
curl 'http://localhost:8431/api/v1/series/count'
|
||||
|
||||
curl 'http://localhost:8431/api/v1/series/count' -H 'Authorization: Bearer incorrect-token'
|
||||
```
|
||||
|
||||
|
||||
## Rate Limiter
|
||||
|
||||
<img alt="vmgateway-rl" src="vmgateway-rate-limiting.jpg">
|
||||
|
||||
Limits incoming requests by given pre-configured limits. It supports read and write limiting by a tenant.
|
||||
|
||||
`vmgateway` needs datasource for rate limits queries. It can be single-node or cluster version of `victoria-metrics`.
|
||||
It must have metrics scrapped from cluster, that you want to rate limit.
|
||||
|
||||
List of supported limit types:
|
||||
- `queries` - count of api requests made at tenant to read api, such as `/api/v1/query`, `/api/v1/series` and others.
|
||||
- `active_series` - count of current active series at given tenant.
|
||||
- `new_series` - count of created series aka churn rate
|
||||
- `rows_inserted` - count of inserted rows per tenant.
|
||||
|
||||
List of supported time windows:
|
||||
- `minute`
|
||||
- `hour`
|
||||
|
||||
Limits can be specified per tenant or at global level, if you omit `project_id` and `account_id`.
|
||||
|
||||
Example of configuration file:
|
||||
|
||||
```yaml
|
||||
limits:
|
||||
- type: queries
|
||||
value: 1000
|
||||
resolution: minute
|
||||
- type: queries
|
||||
value: 10000
|
||||
resolution: hour
|
||||
- type: queries
|
||||
value: 10
|
||||
resolution: minute
|
||||
project_id: 5
|
||||
account_id: 1
|
||||
```
|
||||
|
||||
## QuickStart
|
||||
|
||||
cluster version required for rate limiting.
|
||||
```bash
|
||||
# start datasource for cluster metrics
|
||||
|
||||
cat << EOF > cluster.yaml
|
||||
scrape_configs:
|
||||
- job_name: cluster
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['127.0.0.1:8481','127.0.0.1:8482','127.0.0.1:8480']
|
||||
EOF
|
||||
|
||||
./bin/victoria-metrics --promscrape.config cluster.yaml
|
||||
|
||||
# start cluster
|
||||
|
||||
# start vmstorage, vmselect and vminsert
|
||||
./bin/vmstorage -eula
|
||||
./bin/vmselect -eula -storageNode 127.0.0.1:8401
|
||||
./bin/vminsert -eula -storageNode 127.0.0.1:8400
|
||||
|
||||
# create base rate limitng config:
|
||||
cat << EOF > limit.yaml
|
||||
limits:
|
||||
- type: queries
|
||||
value: 100
|
||||
- type: rows_inserted
|
||||
value: 100000
|
||||
- type: new_series
|
||||
value: 1000
|
||||
- type: active_series
|
||||
value: 100000
|
||||
- type: queries
|
||||
value: 1
|
||||
account_id: 15
|
||||
EOF
|
||||
|
||||
# start gateway with clusterMoe
|
||||
./bin/vmgateway -eula -enable.rateLimit -ratelimit.config limit.yaml -datasource.url http://localhost:8428 -enable.auth -clusterMode -write.url=http://localhost:8480 --read.url=http://localhost:8481
|
||||
|
||||
# ingest simple metric to tenant 1:5
|
||||
curl 'http://localhost:8431/api/v1/import/prometheus' -X POST -d 'foo{bar="baz1"} 123' -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2MjAxNjIwMDAwMDAsInZtX2FjY2VzcyI6eyJ0ZW5hbnRfaWQiOnsiYWNjb3VudF9pZCI6MTV9fX0.PB1_KXDKPUp-40pxOGk6lt_jt9Yq80PIMpWVJqSForQ'
|
||||
# read metric from tenant 1:5
|
||||
curl 'http://localhost:8431/api/v1/labels' -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2MjAxNjIwMDAwMDAsInZtX2FjY2VzcyI6eyJ0ZW5hbnRfaWQiOnsiYWNjb3VudF9pZCI6MTV9fX0.PB1_KXDKPUp-40pxOGk6lt_jt9Yq80PIMpWVJqSForQ'
|
||||
|
||||
# check rate limit
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
The shortlist of configuration flags is the following:
|
||||
```bash
|
||||
-clusterMode
|
||||
enable it for cluster version
|
||||
-datasource.appendTypePrefix
|
||||
Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to VMSelect URL.
|
||||
-datasource.basicAuth.password string
|
||||
Optional basic auth password for -datasource.url
|
||||
-datasource.basicAuth.username string
|
||||
Optional basic auth username for -datasource.url
|
||||
-datasource.lookback duration
|
||||
Lookback defines how far to look into past when evaluating queries. For example, if datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
|
||||
-datasource.maxIdleConnections int
|
||||
Defines the number of idle (keep-alive connections) to configured datasource.Consider to set this value equal to the value: groups_total * group.concurrency. Too low value may result into high number of sockets in TIME_WAIT state. (default 100)
|
||||
-datasource.queryStep duration
|
||||
queryStep defines how far a value can fallback to when evaluating queries. For example, if datasource.queryStep=15s then param "step" with value "15s" will be added to every query.
|
||||
-datasource.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default system CA is used
|
||||
-datasource.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -datasource.url
|
||||
-datasource.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -datasource.url
|
||||
-datasource.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -datasource.url
|
||||
-datasource.tlsServerName string
|
||||
Optional TLS server name to use for connections to -datasource.url. By default the server name from -datasource.url is used
|
||||
-datasource.url string
|
||||
Victoria Metrics or VMSelect url. Required parameter. E.g. http://127.0.0.1:8428
|
||||
-enable.auth
|
||||
enables auth with jwt token
|
||||
-enable.rateLimit
|
||||
enables rate limiter
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isnt set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
By specifying this flag you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help spreading incoming load among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for graceful shutdown of HTTP server. Highly loaded server may require increased value for graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this dealy the servier returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections (default ":8431")
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof. It overrides httpAuth settings
|
||||
-ratelimit.config string
|
||||
path for configuration file
|
||||
-ratelimit.extraLabels array
|
||||
additional labels, that will be applied to fetchdata from datasource
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-ratelimit.refreshInterval duration
|
||||
(default 5s)
|
||||
-read.url string
|
||||
read access url address, example: http://vmselect:8481
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
-write.url string
|
||||
write access url address, example: http://vminsert:8480
|
||||
|
||||
```
|
||||
|
||||
## TroubleShooting
|
||||
|
||||
* Access control:
|
||||
* incorrect `jwt` format, try https://jwt.io/#debugger-io with our tokens
|
||||
* expired token, check `exp` field.
|
||||
* Rate Limiting:
|
||||
* `scrape_interval` at datasource, reduce it to apply limits faster.
|
||||
|
||||
|
||||
## Limitations
|
||||
|
||||
* Access Control:
|
||||
* `jwt` token must be validated by external system, currently `vmgateway` can't validate the signature.
|
||||
* RateLimiting:
|
||||
* limits applied based on queries to `datasource.url`
|
||||
* only cluster version can be rate-limited.
|
||||
BIN
app/vmgateway/vmgateway-access-control.jpg
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
app/vmgateway/vmgateway-overview.jpeg
Normal file
|
After Width: | Height: | Size: 48 KiB |
BIN
app/vmgateway/vmgateway-rate-limiting.jpg
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
app/vmgateway/vmgateway.png
Normal file
|
After Width: | Height: | Size: 48 KiB |
@@ -14,7 +14,7 @@ import (
|
||||
|
||||
// InsertCtx contains common bits for data points insertion.
|
||||
type InsertCtx struct {
|
||||
Labels []prompb.Label
|
||||
Labels sortedLabels
|
||||
|
||||
mrs []storage.MetricRow
|
||||
metricNamesBuf []byte
|
||||
|
||||