mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-07 19:06:17 +03:00
Compare commits
878 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c568b13b2 | ||
|
|
7388479a07 | ||
|
|
157c02622b | ||
|
|
4068f8d590 | ||
|
|
bd11fd8f1d | ||
|
|
b577cdd855 | ||
|
|
b39d5ef656 | ||
|
|
8164cd8932 | ||
|
|
b43b498fd8 | ||
|
|
5d87dbfd65 | ||
|
|
195341a7cf | ||
|
|
f0087f0dbb | ||
|
|
a4ae945a79 | ||
|
|
b2aa80e74b | ||
|
|
29a7067827 | ||
|
|
d5c180e680 | ||
|
|
2a7b1cc668 | ||
|
|
929f09b90d | ||
|
|
d6347a3e56 | ||
|
|
fc5b26d856 | ||
|
|
de3c662e8a | ||
|
|
3149ac7a7e | ||
|
|
419ad74269 | ||
|
|
3fe848cdd7 | ||
|
|
5481906db6 | ||
|
|
cc3e69e963 | ||
|
|
8cea3c3cc4 | ||
|
|
c164a8d231 | ||
|
|
3caac3d12c | ||
|
|
054fe1c198 | ||
|
|
0a45220b0a | ||
|
|
8749c2dd92 | ||
|
|
011c5da785 | ||
|
|
fcbefc15d0 | ||
|
|
485d43ef21 | ||
|
|
b638c1eed5 | ||
|
|
cc379f95c2 | ||
|
|
689d769b4d | ||
|
|
0d03855787 | ||
|
|
75f7c51cab | ||
|
|
184a659c5f | ||
|
|
7ce87ebcb2 | ||
|
|
1051d8aa2d | ||
|
|
689cf88eb2 | ||
|
|
bdd0a1cdb2 | ||
|
|
acf1a2c72b | ||
|
|
89315d719d | ||
|
|
dc9d7aedd5 | ||
|
|
7373986f9e | ||
|
|
7bf5d48315 | ||
|
|
3e451ccdda | ||
|
|
fe3444b124 | ||
|
|
77be066ee8 | ||
|
|
1837f2f7d3 | ||
|
|
f5d52b51f1 | ||
|
|
31ec79eaf6 | ||
|
|
c8ea697db8 | ||
|
|
2140ccbdcc | ||
|
|
7976c22797 | ||
|
|
2c44f9989a | ||
|
|
e61e3bf174 | ||
|
|
89611fa48c | ||
|
|
14f0f90507 | ||
|
|
24ffad74c1 | ||
|
|
6740294ebb | ||
|
|
2e2e4f7e21 | ||
|
|
9dcb18e03d | ||
|
|
0477991b4d | ||
|
|
b1f9b39c4b | ||
|
|
39b11b3ff4 | ||
|
|
7bd420cbfe | ||
|
|
85962b459f | ||
|
|
f6ca776c75 | ||
|
|
70df5f4975 | ||
|
|
c86286ec1d | ||
|
|
261535b32d | ||
|
|
4b7105a65b | ||
|
|
df0309eae0 | ||
|
|
ad4e6a9283 | ||
|
|
59183f66d0 | ||
|
|
fb338c50a3 | ||
|
|
86630350bf | ||
|
|
490c69c64e | ||
|
|
932e53522d | ||
|
|
1de15ad490 | ||
|
|
1f2944a9d0 | ||
|
|
cab7e936a3 | ||
|
|
0326638c90 | ||
|
|
4eb520a342 | ||
|
|
b21e16ad0c | ||
|
|
820669da69 | ||
|
|
8dd03ecf19 | ||
|
|
9e4ed5e591 | ||
|
|
9df60518bb | ||
|
|
c270f8f3e6 | ||
|
|
46dba00756 | ||
|
|
de89bcddae | ||
|
|
0f99c1afb1 | ||
|
|
750daa04d1 | ||
|
|
e4f856e900 | ||
|
|
e15b20dde3 | ||
|
|
13804bda8f | ||
|
|
404cbd1522 | ||
|
|
88ac4dfc07 | ||
|
|
17c2ce18fd | ||
|
|
d65c03c004 | ||
|
|
ebf8da3730 | ||
|
|
e6666da4e7 | ||
|
|
97686ddc65 | ||
|
|
43577a8237 | ||
|
|
8df25e12d8 | ||
|
|
d8197f4a55 | ||
|
|
8aa2f448a8 | ||
|
|
2dfa746c91 | ||
|
|
9abb2d6c74 | ||
|
|
27f0261257 | ||
|
|
2a1550f341 | ||
|
|
0d2c4f252f | ||
|
|
0e082b1c76 | ||
|
|
1b9992b42a | ||
|
|
795e32be4a | ||
|
|
4215182e61 | ||
|
|
e8f645bf52 | ||
|
|
a4c7fcb5e1 | ||
|
|
aa56b9217e | ||
|
|
b10ad44692 | ||
|
|
1eabbc0e27 | ||
|
|
a13a443bf7 | ||
|
|
b9913e151a | ||
|
|
b730fc2667 | ||
|
|
11fa458e39 | ||
|
|
149511f5e9 | ||
|
|
2813d0b1e0 | ||
|
|
95c9b630cc | ||
|
|
60fcac4878 | ||
|
|
5af2a9ca0e | ||
|
|
020917949b | ||
|
|
4e48067133 | ||
|
|
ae3675d3d0 | ||
|
|
6247884057 | ||
|
|
0b2726c3be | ||
|
|
5d426dfe0a | ||
|
|
d006b41eff | ||
|
|
ae972429c7 | ||
|
|
f8e7f433cf | ||
|
|
069c9ade52 | ||
|
|
ce8c2dd1f1 | ||
|
|
5ebfc275e6 | ||
|
|
f93247e82d | ||
|
|
c4c90ab2b1 | ||
|
|
ae10ff8ccd | ||
|
|
4862edfef3 | ||
|
|
9d42546a27 | ||
|
|
710f8ce5aa | ||
|
|
081aa4ad68 | ||
|
|
5f9d88a3cb | ||
|
|
ba8ac08739 | ||
|
|
e7d8d84396 | ||
|
|
30445ed5e9 | ||
|
|
82afcb6d0d | ||
|
|
3ca1ed0fde | ||
|
|
b13680a67e | ||
|
|
0066a02293 | ||
|
|
fd9fd191b9 | ||
|
|
4146fc4668 | ||
|
|
364f30a6e7 | ||
|
|
1906f841c9 | ||
|
|
26df320be5 | ||
|
|
b6b1b06d70 | ||
|
|
5454668709 | ||
|
|
c8133cbb16 | ||
|
|
30deb2b548 | ||
|
|
08b71d2067 | ||
|
|
0f1b969aa6 | ||
|
|
c7ac7c1807 | ||
|
|
05813259dc | ||
|
|
9c1c9d8e76 | ||
|
|
007dbf273d | ||
|
|
82972a8f2a | ||
|
|
83c0c241a7 | ||
|
|
299a35948c | ||
|
|
b0e4b234cb | ||
|
|
6f0038209c | ||
|
|
ae1db8fa08 | ||
|
|
0e46e8df8d | ||
|
|
d305cc2017 | ||
|
|
e2e8ef86d9 | ||
|
|
52915c8f7e | ||
|
|
eb27dbde13 | ||
|
|
9d787f9edd | ||
|
|
66379cc69f | ||
|
|
d0e1589ea9 | ||
|
|
de0643fab5 | ||
|
|
9cd8eb92f1 | ||
|
|
5009b25a03 | ||
|
|
c6dee6c52d | ||
|
|
a7fc84b390 | ||
|
|
2f777d996d | ||
|
|
44a34a0f5f | ||
|
|
4910bac46b | ||
|
|
1982505c2b | ||
|
|
9d87496b50 | ||
|
|
91a4c279cc | ||
|
|
7590b8477b | ||
|
|
b1fd390e16 | ||
|
|
5bf14991a3 | ||
|
|
700bda8e2e | ||
|
|
efdc3c71af | ||
|
|
ca091bade3 | ||
|
|
b35b3dc043 | ||
|
|
0463cb5550 | ||
|
|
357f886f97 | ||
|
|
ace969d595 | ||
|
|
32869e4c0f | ||
|
|
a906b3862f | ||
|
|
eedb79ead8 | ||
|
|
ae457828bc | ||
|
|
51652f638f | ||
|
|
3a32789352 | ||
|
|
2cea4d403f | ||
|
|
3dffc6099e | ||
|
|
b0a5c382ee | ||
|
|
1de1774de6 | ||
|
|
067188501f | ||
|
|
4cb6bcd2d7 | ||
|
|
6b1317b6a4 | ||
|
|
b7fcdb528d | ||
|
|
dabbf930d8 | ||
|
|
1c669a69a8 | ||
|
|
7119f294f3 | ||
|
|
8a057e705a | ||
|
|
b65236530c | ||
|
|
ae04378424 | ||
|
|
bf95fbfc1d | ||
|
|
78d2715d04 | ||
|
|
d0ffb49ee2 | ||
|
|
b7f4fc6e0d | ||
|
|
d48363534a | ||
|
|
0acdab3ab9 | ||
|
|
7e8dcf9ddc | ||
|
|
aa90b93778 | ||
|
|
de523c81b9 | ||
|
|
a724dde90a | ||
|
|
fb8e56d8a2 | ||
|
|
f0c207fae2 | ||
|
|
d3794eb994 | ||
|
|
f765985947 | ||
|
|
e614a14b21 | ||
|
|
9d160f9048 | ||
|
|
d7932775cc | ||
|
|
eec76718e9 | ||
|
|
093a891762 | ||
|
|
c03e4ef9d6 | ||
|
|
de7f315231 | ||
|
|
97a0c80904 | ||
|
|
09105ff49c | ||
|
|
2859a452d4 | ||
|
|
170e2f54ab | ||
|
|
8b116b619a | ||
|
|
6e6d62284c | ||
|
|
a02a12f639 | ||
|
|
f818ab497b | ||
|
|
b73802372a | ||
|
|
2f05f90888 | ||
|
|
7e4bcbd853 | ||
|
|
a11659013f | ||
|
|
a6b2b2c005 | ||
|
|
c2afa3fdd7 | ||
|
|
d4cc934c77 | ||
|
|
870270c75e | ||
|
|
7addbfc831 | ||
|
|
1c477bc2fc | ||
|
|
d57214244d | ||
|
|
84b986b2fc | ||
|
|
1052effb6d | ||
|
|
266788be14 | ||
|
|
cf18df367d | ||
|
|
72ab3f7230 | ||
|
|
30a922f383 | ||
|
|
2c67232565 | ||
|
|
86f99c6b55 | ||
|
|
3c1434118e | ||
|
|
27a417bcd3 | ||
|
|
6fa806f1ca | ||
|
|
f5500251d9 | ||
|
|
5d6d2ef3a6 | ||
|
|
0208d8c103 | ||
|
|
465923b181 | ||
|
|
a1f3795b78 | ||
|
|
414cd39659 | ||
|
|
d100341394 | ||
|
|
6251762787 | ||
|
|
48d033a198 | ||
|
|
4aaee33860 | ||
|
|
6c0d36e4a9 | ||
|
|
ef9a8989fd | ||
|
|
5d27642106 | ||
|
|
0deabbbb4a | ||
|
|
67b41c080d | ||
|
|
6fcbd17bdd | ||
|
|
9ce5c0c33f | ||
|
|
c5daf8a27b | ||
|
|
d9d01f976b | ||
|
|
1f19c167a4 | ||
|
|
cdf1e6684b | ||
|
|
28ea993872 | ||
|
|
149c0c4a6d | ||
|
|
4f8a3af061 | ||
|
|
57a4af98fa | ||
|
|
3fa9ab4a49 | ||
|
|
47a038401b | ||
|
|
077f8cbe1c | ||
|
|
4057305148 | ||
|
|
bb06b98202 | ||
|
|
4adb96161a | ||
|
|
4c8e01b312 | ||
|
|
51c529a2b6 | ||
|
|
1437d6db0c | ||
|
|
e60c0d0bae | ||
|
|
462913ed2f | ||
|
|
1e69c151eb | ||
|
|
348edd92fe | ||
|
|
352485b0de | ||
|
|
9e40eec7d8 | ||
|
|
e205975716 | ||
|
|
6e668fd480 | ||
|
|
47390d8947 | ||
|
|
ba4a2c8bca | ||
|
|
0d7a3f4eb3 | ||
|
|
fc499ab501 | ||
|
|
3adf8c5a6f | ||
|
|
0d1855f661 | ||
|
|
bcd139362b | ||
|
|
6c24c5caa3 | ||
|
|
ef6ab3d2c9 | ||
|
|
41813eb87a | ||
|
|
4e391a5e39 | ||
|
|
bb3b513bdd | ||
|
|
83df20b5b5 | ||
|
|
9e83335ca9 | ||
|
|
5407eed2f6 | ||
|
|
188325f0fc | ||
|
|
55e98e265e | ||
|
|
dbbc160a40 | ||
|
|
9c0e2d2a6e | ||
|
|
82ce930e59 | ||
|
|
dd6bfa50e9 | ||
|
|
43823addea | ||
|
|
5943f49f60 | ||
|
|
9deda5107b | ||
|
|
07f7245aeb | ||
|
|
944c5ea331 | ||
|
|
de81472724 | ||
|
|
f733b0ac9d | ||
|
|
368b69b4c4 | ||
|
|
1cb78ba1a0 | ||
|
|
b378cd6ed8 | ||
|
|
381ad564a2 | ||
|
|
4c808d58bf | ||
|
|
c4e8c34d0e | ||
|
|
b2042a1c30 | ||
|
|
caeb74f068 | ||
|
|
ae91a6883c | ||
|
|
e4182dd896 | ||
|
|
b9e5172aa2 | ||
|
|
600f225cff | ||
|
|
bd81f926a4 | ||
|
|
5a9743211f | ||
|
|
ca8b5745b5 | ||
|
|
f3f62ab04e | ||
|
|
e0a91ef163 | ||
|
|
c87fb9191e | ||
|
|
51e661ecfe | ||
|
|
cd071357d8 | ||
|
|
61579680bb | ||
|
|
fe289331dd | ||
|
|
d396c265a6 | ||
|
|
31918f60b2 | ||
|
|
d62ec1cb01 | ||
|
|
5e75c389e6 | ||
|
|
c0f3be824d | ||
|
|
ca566dce39 | ||
|
|
0b35da159c | ||
|
|
cb71af216a | ||
|
|
daacbc7e34 | ||
|
|
f477cbe861 | ||
|
|
50d44d5932 | ||
|
|
68d004bc05 | ||
|
|
e277c3d07b | ||
|
|
29e4e7f422 | ||
|
|
b7638f04a7 | ||
|
|
c539494b36 | ||
|
|
d12c4914f0 | ||
|
|
64e2d66014 | ||
|
|
4108e85efd | ||
|
|
f0bdc5716e | ||
|
|
67059caa12 | ||
|
|
de3fe22815 | ||
|
|
055f152246 | ||
|
|
20311f6065 | ||
|
|
a51a7b2a20 | ||
|
|
bca468bb55 | ||
|
|
0729cc36b2 | ||
|
|
5bfd4e6218 | ||
|
|
920300643a | ||
|
|
ef77120170 | ||
|
|
b3f3c078e5 | ||
|
|
84e3881c0b | ||
|
|
2ed069c3bc | ||
|
|
28353e48ca | ||
|
|
01987f8c77 | ||
|
|
d2960a20e0 | ||
|
|
d4f12e0fbb | ||
|
|
e6ab69dd88 | ||
|
|
ed5f05024b | ||
|
|
43aa737e23 | ||
|
|
46dccc1088 | ||
|
|
96cdfcba50 | ||
|
|
09d60d64a9 | ||
|
|
c37e5de66f | ||
|
|
3b847d32d9 | ||
|
|
590d8d537f | ||
|
|
bc42b5598f | ||
|
|
94978af9bc | ||
|
|
8e20bc7b53 | ||
|
|
a2b9476897 | ||
|
|
9aa3b65766 | ||
|
|
d8af290947 | ||
|
|
1e27420243 | ||
|
|
4f16a964e3 | ||
|
|
4cc6574cea | ||
|
|
63c4999e06 | ||
|
|
9bd9f67718 | ||
|
|
7f983d461a | ||
|
|
3bba6a2199 | ||
|
|
762c967855 | ||
|
|
45f7cdc532 | ||
|
|
a94825b169 | ||
|
|
f7d28bddbf | ||
|
|
2749a3c827 | ||
|
|
b449607181 | ||
|
|
cf5f2874cd | ||
|
|
272d6976b3 | ||
|
|
68f0e00761 | ||
|
|
84227ea2fc | ||
|
|
f4e8687c88 | ||
|
|
561a7619a5 | ||
|
|
6105d61d11 | ||
|
|
12d2cf3a7a | ||
|
|
71ea4935de | ||
|
|
9b0a5c1028 | ||
|
|
d423d73251 | ||
|
|
d8546e972a | ||
|
|
c9fb217e4e | ||
|
|
bec85d5135 | ||
|
|
e9f2e2cbc9 | ||
|
|
5ef71974fe | ||
|
|
92e5d89fc9 | ||
|
|
8e6eb2cd6b | ||
|
|
af90b3121c | ||
|
|
e9d99021b0 | ||
|
|
5aa269def6 | ||
|
|
d16dbfd639 | ||
|
|
cfd720e772 | ||
|
|
e10c484a8e | ||
|
|
2a6fa53957 | ||
|
|
5a8553bfd2 | ||
|
|
e19d400230 | ||
|
|
90aa2a8ffd | ||
|
|
cc08648699 | ||
|
|
129b07113e | ||
|
|
aba899c298 | ||
|
|
991fad7855 | ||
|
|
cbe3cf683b | ||
|
|
f42194d817 | ||
|
|
bbeac0ba46 | ||
|
|
47db9bb24a | ||
|
|
bc7d67cee2 | ||
|
|
59c26feefa | ||
|
|
764dc2499f | ||
|
|
10f2eedee0 | ||
|
|
d25dd7fdb6 | ||
|
|
daa2d1c065 | ||
|
|
a44e0c6153 | ||
|
|
a897cf2ec3 | ||
|
|
58465bb29b | ||
|
|
e59de98384 | ||
|
|
bec9b31b81 | ||
|
|
44bcda81ab | ||
|
|
a9db81c4ab | ||
|
|
dbf9402329 | ||
|
|
1137bdec66 | ||
|
|
127537d631 | ||
|
|
f7636b0342 | ||
|
|
76b244cfcf | ||
|
|
7dc67cd883 | ||
|
|
efdefbc1cb | ||
|
|
1659135752 | ||
|
|
9945b8c98d | ||
|
|
1e679f3e0d | ||
|
|
38789e4aa0 | ||
|
|
19c0b6f3ef | ||
|
|
7cde336b33 | ||
|
|
96ee276e6e | ||
|
|
6fdfc67620 | ||
|
|
165c9c6371 | ||
|
|
41f24cdb64 | ||
|
|
7673839228 | ||
|
|
cc90a548b1 | ||
|
|
8d5df13c7c | ||
|
|
7500146321 | ||
|
|
124f78857b | ||
|
|
978c6b4ba9 | ||
|
|
5cdad60a6f | ||
|
|
1b3efccb24 | ||
|
|
95688cbfc5 | ||
|
|
b4bf722d8f | ||
|
|
c00627c103 | ||
|
|
b6a976b98d | ||
|
|
82973f8ae7 | ||
|
|
bab6a15ae0 | ||
|
|
23bdc1f107 | ||
|
|
24ca30bf66 | ||
|
|
c584aece38 | ||
|
|
2985077c35 | ||
|
|
30c7269814 | ||
|
|
27500d7d4c | ||
|
|
3ba507000c | ||
|
|
c5ef0e6327 | ||
|
|
bed25e3c24 | ||
|
|
5c42965853 | ||
|
|
09b0f7c202 | ||
|
|
36eb5427eb | ||
|
|
31ce0e29cd | ||
|
|
3b1e3a03e0 | ||
|
|
a69234ed18 | ||
|
|
be5e1222f3 | ||
|
|
94f7d00537 | ||
|
|
f6f5c4118c | ||
|
|
00b5145c69 | ||
|
|
2eb72e09ab | ||
|
|
29108cc53e | ||
|
|
964bc7595c | ||
|
|
312fead9a2 | ||
|
|
1e1a27d803 | ||
|
|
9739283dad | ||
|
|
5dffc7a553 | ||
|
|
82c3bbce34 | ||
|
|
3e8569f456 | ||
|
|
f00e0e0103 | ||
|
|
26115891db | ||
|
|
d50165ad59 | ||
|
|
63d3c88c3b | ||
|
|
1a9ee39b0e | ||
|
|
70c721c01b | ||
|
|
74e3198281 | ||
|
|
98d1cd0971 | ||
|
|
7a134b0fd7 | ||
|
|
1f33dd717f | ||
|
|
8beb0da6ad | ||
|
|
067d7c1ea1 | ||
|
|
020bd8685e | ||
|
|
f2a449983d | ||
|
|
8674963f6a | ||
|
|
ab53cb6f7b | ||
|
|
9f79bcf64a | ||
|
|
39dee12ed7 | ||
|
|
d455764a6f | ||
|
|
ffadf035fa | ||
|
|
d8183c3124 | ||
|
|
9bc8484ab6 | ||
|
|
26fa94ba8d | ||
|
|
0bccb58e80 | ||
|
|
1fec47a289 | ||
|
|
8c3d7c1a59 | ||
|
|
fa01169c3d | ||
|
|
51598bd718 | ||
|
|
ba74d0c14c | ||
|
|
7d893a234c | ||
|
|
0e533d1a9c | ||
|
|
0e19f35af5 | ||
|
|
6ad6480400 | ||
|
|
4cdffb04a4 | ||
|
|
ca856284e4 | ||
|
|
62fde80490 | ||
|
|
5a90a92378 | ||
|
|
a2f647d142 | ||
|
|
f95eea60d1 | ||
|
|
2380e9b017 | ||
|
|
f0005c3007 | ||
|
|
2114179e19 | ||
|
|
6c80ae0da8 | ||
|
|
204ec415b4 | ||
|
|
8a8b5a73d3 | ||
|
|
c9d0905b17 | ||
|
|
f6bc608e86 | ||
|
|
3eccecd5fd | ||
|
|
b3dcaf0cd7 | ||
|
|
9d8fdff6c5 | ||
|
|
d7c04db1fc | ||
|
|
e5ed8c8d75 | ||
|
|
9d431a4b45 | ||
|
|
4739dff6f0 | ||
|
|
11eaa37111 | ||
|
|
df169b1ebd | ||
|
|
9d61d24142 | ||
|
|
62919eaf7e | ||
|
|
e6da63dffe | ||
|
|
8e85b56737 | ||
|
|
c0343a661b | ||
|
|
1bca6160a3 | ||
|
|
ccfb7c5e29 | ||
|
|
8d71a60a76 | ||
|
|
eb33a48b9b | ||
|
|
cd7426be6e | ||
|
|
a5621b9c46 | ||
|
|
be6ae4b5e7 | ||
|
|
d387da142e | ||
|
|
e1c2757f70 | ||
|
|
f4e7e5fb90 | ||
|
|
d5b985f086 | ||
|
|
e706e59d49 | ||
|
|
fe98ba5a60 | ||
|
|
ddabc13796 | ||
|
|
7a839b461f | ||
|
|
764b3d4fda | ||
|
|
b4afc6ee2f | ||
|
|
5f16ceb294 | ||
|
|
3f932c2db1 | ||
|
|
f41b36bb9a | ||
|
|
038358b777 | ||
|
|
ed899ca9e8 | ||
|
|
e9196655dd | ||
|
|
821df709d3 | ||
|
|
67277abecf | ||
|
|
c2ff8de456 | ||
|
|
b059f194e4 | ||
|
|
7785869ccc | ||
|
|
5af777469a | ||
|
|
2149733bd2 | ||
|
|
dd20784d06 | ||
|
|
de6970e828 | ||
|
|
4a415620d3 | ||
|
|
acbcad1ece | ||
|
|
f4c4ab811b | ||
|
|
10601bc652 | ||
|
|
f2c004d1ae | ||
|
|
efc730863b | ||
|
|
d6967319b6 | ||
|
|
f5f59896ec | ||
|
|
147c35ebd4 | ||
|
|
7c0d6a8b88 | ||
|
|
ed00eb3f33 | ||
|
|
7615a3ab8d | ||
|
|
7be9bedaf9 | ||
|
|
00b1659dde | ||
|
|
528e25bdde | ||
|
|
b3849a90fd | ||
|
|
7d89fafe1a | ||
|
|
cd96248480 | ||
|
|
7554be172d | ||
|
|
4beab7ad39 | ||
|
|
41d23f84ed | ||
|
|
184670fb9b | ||
|
|
52791fd1c0 | ||
|
|
576da0fe46 | ||
|
|
215967437d | ||
|
|
d1ad3adcbe | ||
|
|
42960feff4 | ||
|
|
07246bc31c | ||
|
|
e646674b23 | ||
|
|
4628deecd1 | ||
|
|
eead3ee8ec | ||
|
|
c402265e88 | ||
|
|
ff495a74f6 | ||
|
|
45962fb8c2 | ||
|
|
fd6c690276 | ||
|
|
e730788477 | ||
|
|
ef7e2af8f5 | ||
|
|
15aa6142ef | ||
|
|
5492edcc6c | ||
|
|
e969ef2639 | ||
|
|
c098988a18 | ||
|
|
1bdfa29ef7 | ||
|
|
8adba82c02 | ||
|
|
8d9eb5f808 | ||
|
|
582c74cd93 | ||
|
|
f3d33e23c9 | ||
|
|
455bf50a91 | ||
|
|
2791008e19 | ||
|
|
a499de45cc | ||
|
|
23c9e6b727 | ||
|
|
9d32fb1d9e | ||
|
|
d4b6d22987 | ||
|
|
0be5b09fb4 | ||
|
|
81746d14b9 | ||
|
|
807c2b076c | ||
|
|
84fd8af6d3 | ||
|
|
9043a509a3 | ||
|
|
1ad3de5c54 | ||
|
|
d60908bba4 | ||
|
|
716754fae6 | ||
|
|
bb61a4769b | ||
|
|
ac45082216 | ||
|
|
e5202a4eae | ||
|
|
68e4f40a72 | ||
|
|
ada2ae69ec | ||
|
|
bc8381613d | ||
|
|
8e44fba76d | ||
|
|
7dbe335426 | ||
|
|
3f85c06b65 | ||
|
|
d20c2156e4 | ||
|
|
ad730d8a17 | ||
|
|
dbbdfbe7ee | ||
|
|
639b26b40c | ||
|
|
8f16388428 | ||
|
|
aaa497ff0b | ||
|
|
ef94333808 | ||
|
|
c25b0c2cd5 | ||
|
|
5d0c37bec0 | ||
|
|
bba1442649 | ||
|
|
a9ffd233df | ||
|
|
a034f02fb2 | ||
|
|
e6eee2bebf | ||
|
|
509d12643b | ||
|
|
5e71fab8a6 | ||
|
|
d01f3c1943 | ||
|
|
3f498cf2dc | ||
|
|
8c8c14c127 | ||
|
|
44a86e1be3 | ||
|
|
f0c678c41b | ||
|
|
e255c066cc | ||
|
|
e7959094f6 | ||
|
|
922d9aadf2 | ||
|
|
68716488db | ||
|
|
67a64c142d | ||
|
|
328b52e5ff | ||
|
|
700737c181 | ||
|
|
2f735f112d | ||
|
|
1ca0c8a29b | ||
|
|
d81d586b86 | ||
|
|
0f63da3698 | ||
|
|
62ed38c6f0 | ||
|
|
79c30cf4cb | ||
|
|
2f1e7298ce | ||
|
|
0da202023b | ||
|
|
48d0ec1363 | ||
|
|
a1a065a47e | ||
|
|
0516e3f330 | ||
|
|
5b81bdde39 | ||
|
|
865610a7c8 | ||
|
|
cb8c6908dc | ||
|
|
894dcb7c1c | ||
|
|
215eba0b82 | ||
|
|
edb1eca6f1 | ||
|
|
97b6f5d223 | ||
|
|
a090627059 | ||
|
|
53c87ba341 | ||
|
|
bb161497cf | ||
|
|
994fa2f3bf | ||
|
|
e151c5c644 | ||
|
|
3107c633e3 | ||
|
|
3e557c9861 | ||
|
|
54ef2d8112 | ||
|
|
b1f6843bd0 | ||
|
|
039c9d2441 | ||
|
|
2a45871823 | ||
|
|
461481fbdf | ||
|
|
4c8b49b193 | ||
|
|
e79de9774b | ||
|
|
34563916f7 | ||
|
|
9257eee982 | ||
|
|
6f05c4d351 | ||
|
|
2f612e0c67 | ||
|
|
61c611f5ad | ||
|
|
9224ede54f | ||
|
|
228d137936 | ||
|
|
e4303d3d21 | ||
|
|
ad8d3b387d | ||
|
|
62e76ca805 | ||
|
|
4f526cc816 | ||
|
|
dfb113f175 | ||
|
|
31ae5911a8 | ||
|
|
d3442b40b2 | ||
|
|
caa2952aa6 | ||
|
|
e00cfc854d | ||
|
|
b9c8f6bf34 | ||
|
|
ad6290953c | ||
|
|
efcbb51968 | ||
|
|
ed0df37ee7 | ||
|
|
004d2924e2 | ||
|
|
11be704109 | ||
|
|
5a4675c528 | ||
|
|
ecb1b2564a | ||
|
|
b35cb293f5 | ||
|
|
1c641037e8 | ||
|
|
6b5ad535ae | ||
|
|
8949d65ad1 | ||
|
|
3198fd31fa | ||
|
|
aa5d88055d | ||
|
|
df01836818 | ||
|
|
dfa156e6aa | ||
|
|
8c14ca93fa | ||
|
|
e4e1cd1de2 | ||
|
|
ef6ee72108 | ||
|
|
ed7580ad22 | ||
|
|
9eb71dda3d | ||
|
|
328814ee60 | ||
|
|
7398e5701b | ||
|
|
4e770e9120 | ||
|
|
b442a42d8e | ||
|
|
6d77bfae4f | ||
|
|
4081e2295e | ||
|
|
e1107fec10 | ||
|
|
25f80d320b | ||
|
|
cde18d1f43 | ||
|
|
457e61900d | ||
|
|
7e347972c4 | ||
|
|
19dd121968 | ||
|
|
829ec4f9cf | ||
|
|
55d83e777d | ||
|
|
1033dc7e2a | ||
|
|
619b0a25c9 | ||
|
|
666c795b98 | ||
|
|
a730b3f6a1 | ||
|
|
508ad46e0e | ||
|
|
e5b9f47623 | ||
|
|
ca74b80f10 | ||
|
|
cba820e390 | ||
|
|
6fe3c48a6e | ||
|
|
9c350bc20d | ||
|
|
256fd9a87e | ||
|
|
2d9b3ad5b3 | ||
|
|
b66c7c13ac | ||
|
|
3e1d7d8489 | ||
|
|
47c7ea5c60 | ||
|
|
4f737d1cbd | ||
|
|
742da690f4 | ||
|
|
99f54e44ff | ||
|
|
cb92113632 | ||
|
|
e7557e0252 | ||
|
|
e59b9916aa | ||
|
|
d0b694c5c8 | ||
|
|
eb45185eef | ||
|
|
32b9fb58b8 | ||
|
|
12b16077c4 | ||
|
|
a23806f486 | ||
|
|
6daa5f7500 | ||
|
|
703def4b2e | ||
|
|
de137aef98 | ||
|
|
acf828a759 | ||
|
|
8bb762124a | ||
|
|
ff6a0955eb | ||
|
|
8b133e40d5 | ||
|
|
44a54b8b3d | ||
|
|
d59cdbe90c | ||
|
|
0b2086b7a5 | ||
|
|
8f628cd805 | ||
|
|
91b3482894 | ||
|
|
e5500bfcf2 | ||
|
|
5d3db3ff7c | ||
|
|
4dd3de9286 | ||
|
|
8da3f773ae | ||
|
|
9d5f5b6878 | ||
|
|
9a2ba5b6d1 | ||
|
|
b277ba8121 | ||
|
|
84a37098ed | ||
|
|
56ccfa5218 | ||
|
|
7c2c8b2981 | ||
|
|
d5dddb0953 | ||
|
|
586c5be404 | ||
|
|
1cd01b5359 | ||
|
|
88538df267 | ||
|
|
63e5ee0d29 | ||
|
|
eba4e92994 | ||
|
|
82ecfa3b32 | ||
|
|
dc4e3f0e0b | ||
|
|
8f2e88234f |
5
.github/ISSUE_TEMPLATE/bug_report.md
vendored
5
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -9,9 +9,12 @@ assignees: ''
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
It would be great [upgrading](https://victoriametrics.github.io/#how-to-upgrade) to [the latest avaialble release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verifying whether the bug is reproducible there.
|
||||
It is also recommended reading [troubleshooting docs](https://victoriametrics.github.io/#troubleshooting).
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior
|
||||
Steps to reproduce the behavior.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
24
.github/workflows/main.yml
vendored
24
.github/workflows/main.yml
vendored
@@ -14,17 +14,15 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@master
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.14
|
||||
go-version: 1.15
|
||||
id: go
|
||||
- name: Dependencies
|
||||
env:
|
||||
GO111MODULE: on
|
||||
run: |
|
||||
go get -u golang.org/x/lint/golint
|
||||
go get -u github.com/kisielk/errcheck
|
||||
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.27.0
|
||||
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.29.0
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
- name: Build
|
||||
@@ -43,7 +41,23 @@ jobs:
|
||||
make victoria-metrics-arm64
|
||||
make vmutils
|
||||
GOOS=freebsd go build -mod=vendor ./app/victoria-metrics
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmagent
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmalert
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmbackup
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmrestore
|
||||
GOOS=freebsd go build -mod=vendor ./app/vmctl
|
||||
GOOS=openbsd go build -mod=vendor ./app/victoria-metrics
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmagent
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmalert
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmbackup
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmrestore
|
||||
GOOS=openbsd go build -mod=vendor ./app/vmctl
|
||||
GOOS=darwin go build -mod=vendor ./app/victoria-metrics
|
||||
GOOS=darwin go build -mod=vendor ./app/vmagent
|
||||
GOOS=darwin go build -mod=vendor ./app/vmalert
|
||||
GOOS=darwin go build -mod=vendor ./app/vmbackup
|
||||
GOOS=darwin go build -mod=vendor ./app/vmrestore
|
||||
GOOS=darwin go build -mod=vendor ./app/vmctl
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v1.0.6
|
||||
with:
|
||||
|
||||
120
CODE_OF_CONDUCT_RU.md
Normal file
120
CODE_OF_CONDUCT_RU.md
Normal file
@@ -0,0 +1,120 @@
|
||||
|
||||
# Кодекс Поведения участника
|
||||
|
||||
## Наши обязательства
|
||||
|
||||
Мы, как участники, авторы и лидеры обязуемся сделать участие в сообществе
|
||||
свободным от притеснений для всех, независимо от возраста, телосложения,
|
||||
видимых или невидимых ограничений способности, этнической принадлежности,
|
||||
половых признаков, гендерной идентичности и выражения, уровня опыта,
|
||||
образования, социо-экономического статуса, национальности, внешности,
|
||||
расы, религии, или сексуальной идентичности и ориентации.
|
||||
|
||||
Мы обещаем действовать и взаимодействовать таким образом, чтобы вносить вклад в открытое,
|
||||
дружелюбное, многообразное, инклюзивное и здоровое сообщество.
|
||||
|
||||
## Наши стандарты
|
||||
|
||||
Примеры поведения, создающие условия для благоприятных взаимоотношений включают в себя:
|
||||
|
||||
* Проявление доброты и эмпатии к другим участникам проекта
|
||||
* Уважение к чужой точке зрения и опыту
|
||||
* Конструктивная критика и принятие конструктивной критики
|
||||
* Принятие ответственности, принесение извинений тем, кто пострадал от наших ошибок
|
||||
и извлечение уроков из опыта
|
||||
* Ориентирование на то, что лучше подходит для сообщества, а не только для нас лично
|
||||
|
||||
Примеры неприемлемого поведения участников включают в себя:
|
||||
|
||||
* Использование выражений или изображений сексуального характера и нежелательное сексуальное внимание или домогательство в любой форме
|
||||
* Троллинг, оскорбительные или уничижительные комментарии, переход на личности или затрагивание политических убеждений
|
||||
* Публичное или приватное домогательство
|
||||
* Публикация личной информации других лиц, например, физического или электронного адреса, без явного разрешения
|
||||
* Иное поведение, которое обоснованно считать неуместным в профессиональной обстановке
|
||||
|
||||
## Обязанности
|
||||
|
||||
Лидеры сообщества отвечают за разъяснение и применение наших стандартов приемлемого
|
||||
поведения и будут предпринимать соответствующие и честные меры по исправлению положения
|
||||
в ответ на любое поведение, которое они сочтут неприемлемым, угрожающим, оскорбительным или вредным.
|
||||
|
||||
Лидеры сообщества обладают правом и обязанностью удалять, редактировать или отклонять
|
||||
комментарии, коммиты, код, изменения в вики, вопросы и другой вклад, который не совпадает
|
||||
с Кодексом Поведения, и предоставят причины принятого решения, когда сочтут нужным.
|
||||
|
||||
## Область применения
|
||||
|
||||
Данный Кодекс Поведения применим во всех во всех публичных физических и цифровых пространства сообщества,
|
||||
а также когда человек официально представляет сообщество в публичных местах.
|
||||
Примеры представления проекта или сообщества включают использование официальной электронной почты,
|
||||
публикации в официальном аккаунте в социальных сетях,
|
||||
или упоминания как представителя в онлайн или оффлайн мероприятии.
|
||||
|
||||
## Приведение в исполнение
|
||||
|
||||
О случаях домогательства, а так же оскорбительного или иного другого неприемлемого
|
||||
поведения можно сообщить ответственным лидерам сообщества с помощью письма на info@victoriametrics.com
|
||||
Все жалобы будут рассмотрены и расследованы оперативно и беспристрастно.
|
||||
|
||||
Все лидеры сообщества обязаны уважать неприкосновенность частной жизни и личную
|
||||
неприкосновенность автора сообщения.
|
||||
|
||||
## Руководство по исполнению
|
||||
|
||||
Лидеры сообщества будут следовать следующим Принципам Воздействия в Сообществе,
|
||||
чтобы определить последствия для тех, кого они считают виновными в нарушении данного Кодекса Поведения:
|
||||
|
||||
### 1. Исправление
|
||||
|
||||
**Общественное влияние**: Использование недопустимой лексики или другое поведение,
|
||||
считающиеся непрофессиональным или нежелательным в сообществе.
|
||||
|
||||
**Последствия**: Личное, письменное предупреждение от лидеров сообщества,
|
||||
объясняющее суть нарушения и почему такое поведение
|
||||
было неуместно. Лидеры сообщества могут попросить принести публичное извинение.
|
||||
|
||||
### 2. Предупреждение
|
||||
|
||||
**Общественное влияние**: Нарушение в результате одного инцидента или серии действий.
|
||||
|
||||
**Последствия**: Предупреждение о последствиях в случае продолжающегося неуместного поведения.
|
||||
На определенное время не допускается взаимодействие с людьми, вовлеченными в инцидент,
|
||||
включая незапрошенное взаимодействие
|
||||
с теми, кто обеспечивает соблюдение Кодекса. Это включает в себя избегание взаимодействия
|
||||
в публичных пространствах, а так же во внешних каналах,
|
||||
таких как социальные сети. Нарушение этих правил влечет за собой временный или вечный бан.
|
||||
|
||||
### 3. Временный бан
|
||||
|
||||
**Общественное влияние**: Серьёзное нарушение стандартов сообщества,
|
||||
включая продолжительное неуместное поведение.
|
||||
|
||||
**Последствия**: Временный запрет (бан) на любое взаимодействие
|
||||
или публичное общение с сообществом на определенный период времени.
|
||||
На этот период не допускается публичное или личное взаимодействие с людьми,
|
||||
вовлеченными в инцидент, включая незапрошенное взаимодействие
|
||||
с теми, кто обеспечивает соблюдение Кодекса.
|
||||
Нарушение этих правил влечет за собой вечный бан.
|
||||
|
||||
### 4. Вечный бан
|
||||
|
||||
**Общественное влияние**: Демонстрация систематических нарушений стандартов сообщества,
|
||||
включая продолжающееся неуместное поведение, домогательство до отдельных лиц,
|
||||
или проявление агрессии либо пренебрежительного отношения к категориям лиц.
|
||||
|
||||
**Последствия**: Вечный запрет на любое публичное взаимодействие с сообществом.
|
||||
|
||||
## Атрибуция
|
||||
|
||||
Данный Кодекс Поведения основан на [Кодекс Поведения участника][homepage],
|
||||
версии 2.0, доступной по адресу
|
||||
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
|
||||
|
||||
Принципы Воздействия в Сообществе были вдохновлены [Mozilla's code of conduct
|
||||
enforcement ladder](https://github.com/mozilla/diversity).
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
|
||||
Ответы на общие вопросы о данном кодексе поведения ищите на странице FAQ:
|
||||
https://www.contributor-covenant.org/faq. Переводы доступны по адресу
|
||||
https://www.contributor-covenant.org/translations.
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2020 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2021 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
102
Makefile
102
Makefile
@@ -10,13 +10,16 @@ endif
|
||||
|
||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date -u +'%Y%m%d-%H%M%S')-$(BUILDINFO_TAG)'
|
||||
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
all: \
|
||||
victoria-metrics-prod \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmauth-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod
|
||||
vmrestore-prod \
|
||||
vmctl-prod
|
||||
|
||||
include app/*/Makefile
|
||||
include deployment/*/Makefile
|
||||
@@ -30,7 +33,8 @@ publish: \
|
||||
publish-vmalert \
|
||||
publish-vmauth \
|
||||
publish-vmbackup \
|
||||
publish-vmrestore
|
||||
publish-vmrestore \
|
||||
publish-vmctl
|
||||
|
||||
package: \
|
||||
package-victoria-metrics \
|
||||
@@ -38,31 +42,84 @@ package: \
|
||||
package-vmalert \
|
||||
package-vmauth \
|
||||
package-vmbackup \
|
||||
package-vmrestore
|
||||
package-vmrestore \
|
||||
package-vmctl
|
||||
|
||||
vmutils: \
|
||||
vmagent \
|
||||
vmalert \
|
||||
vmauth \
|
||||
vmbackup \
|
||||
vmrestore
|
||||
vmrestore \
|
||||
vmctl
|
||||
|
||||
vmutils-arm64: \
|
||||
vmagent-arm64 \
|
||||
vmalert-arm64 \
|
||||
vmauth-arm64 \
|
||||
vmbackup-arm64 \
|
||||
vmrestore-arm64 \
|
||||
vmctl-arm64
|
||||
|
||||
release-snap:
|
||||
snapcraft
|
||||
snapcraft upload "victoriametrics_$(PKG_TAG)_multi.snap" --release beta,edge,candidate
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: victoria-metrics-prod
|
||||
cd bin && tar czf victoria-metrics-$(PKG_TAG).tar.gz victoria-metrics-prod && \
|
||||
sha256sum victoria-metrics-$(PKG_TAG).tar.gz > victoria-metrics-$(PKG_TAG)_checksums.txt
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-amd64 \
|
||||
release-victoria-metrics-arm64
|
||||
|
||||
release-victoria-metrics-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-victoria-metrics-generic
|
||||
|
||||
release-victoria-metrics-arm64:
|
||||
GOARCH=arm64 $(MAKE) release-victoria-metrics-generic
|
||||
|
||||
release-victoria-metrics-generic: victoria-metrics-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOARCH)||" -czf victoria-metrics-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
victoria-metrics-$(GOARCH)-prod \
|
||||
&& sha256sum victoria-metrics-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
victoria-metrics-$(GOARCH)-prod \
|
||||
| sed s/-$(GOARCH)// > victoria-metrics-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
|
||||
release-vmutils: \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmauth-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod
|
||||
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmalert-prod vmauth-prod vmbackup-prod vmrestore-prod && \
|
||||
sha256sum vmutils-$(PKG_TAG).tar.gz > vmutils-$(PKG_TAG)_checksums.txt
|
||||
release-vmutils-amd64 \
|
||||
release-vmutils-arm64
|
||||
|
||||
release-vmutils-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-vmutils-generic
|
||||
|
||||
release-vmutils-arm64:
|
||||
GOARCH=arm64 $(MAKE) release-vmutils-generic
|
||||
|
||||
release-vmutils-generic: \
|
||||
vmagent-$(GOARCH)-prod \
|
||||
vmalert-$(GOARCH)-prod \
|
||||
vmauth-$(GOARCH)-prod \
|
||||
vmbackup-$(GOARCH)-prod \
|
||||
vmrestore-$(GOARCH)-prod \
|
||||
vmctl-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOARCH)||" -czf vmutils-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOARCH)-prod \
|
||||
vmalert-$(GOARCH)-prod \
|
||||
vmauth-$(GOARCH)-prod \
|
||||
vmbackup-$(GOARCH)-prod \
|
||||
vmrestore-$(GOARCH)-prod \
|
||||
vmctl-$(GOARCH)-prod \
|
||||
&& sha256sum vmutils-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOARCH)-prod \
|
||||
vmalert-$(GOARCH)-prod \
|
||||
vmauth-$(GOARCH)-prod \
|
||||
vmbackup-$(GOARCH)-prod \
|
||||
vmrestore-$(GOARCH)-prod \
|
||||
vmctl-$(GOARCH)-prod \
|
||||
| sed s/-$(GOARCH)// > vmutils-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
@@ -80,7 +137,7 @@ lint: install-golint
|
||||
golint app/...
|
||||
|
||||
install-golint:
|
||||
which golint || GO111MODULE=off go get -u golang.org/x/lint/golint
|
||||
which golint || go install golang.org/x/lint/golint
|
||||
|
||||
errcheck: install-errcheck
|
||||
errcheck -exclude=errcheck_excludes.txt ./lib/...
|
||||
@@ -92,9 +149,10 @@ errcheck: install-errcheck
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmauth/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmctl/...
|
||||
|
||||
install-errcheck:
|
||||
which errcheck || GO111MODULE=off go get -u github.com/kisielk/errcheck
|
||||
which errcheck || go install github.com/kisielk/errcheck
|
||||
|
||||
check-all: fmt vet lint errcheck golangci-lint
|
||||
|
||||
@@ -122,8 +180,8 @@ benchmark-pure:
|
||||
GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor -bench=. ./app/...
|
||||
|
||||
vendor-update:
|
||||
GO111MODULE=on go get -u ./lib/...
|
||||
GO111MODULE=on go get -u ./app/...
|
||||
GO111MODULE=on go get -u -d ./lib/...
|
||||
GO111MODULE=on go get -u -d ./app/...
|
||||
GO111MODULE=on go mod tidy
|
||||
GO111MODULE=on go mod vendor
|
||||
|
||||
@@ -133,18 +191,21 @@ app-local:
|
||||
app-local-pure:
|
||||
CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
app-local-with-goarch:
|
||||
GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-$(GOARCH)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
quicktemplate-gen: install-qtc
|
||||
qtc
|
||||
|
||||
install-qtc:
|
||||
which qtc || GO111MODULE=off go get -u github.com/valyala/quicktemplate/qtc
|
||||
which qtc || go install github.com/valyala/quicktemplate/qtc
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint || GO111MODULE=off go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.29.0
|
||||
|
||||
docs-sync:
|
||||
cp app/vmagent/README.md docs/vmagent.md
|
||||
@@ -152,4 +213,5 @@ docs-sync:
|
||||
cp app/vmauth/README.md docs/vmauth.md
|
||||
cp app/vmbackup/README.md docs/vmbackup.md
|
||||
cp app/vmrestore/README.md docs/vmrestore.md
|
||||
cp app/vmctl/README.md docs/vmctl.md
|
||||
cp README.md docs/Single-server-VictoriaMetrics.md
|
||||
|
||||
@@ -108,3 +108,10 @@ victoria-metrics-package-deb-rpm-all: \
|
||||
victoria-metrics-package-deb-arm64 \
|
||||
victoria-metrics-package-rpm \
|
||||
victoria-metrics-package-rpm-arm64
|
||||
|
||||
### Packaging as snap
|
||||
victoria-metrics-package-snap:
|
||||
which snapcraft || snap install snapcraft
|
||||
which multipass || snap install multipass
|
||||
snapcraft
|
||||
|
||||
|
||||
@@ -2,19 +2,25 @@ package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
||||
@@ -23,18 +29,33 @@ var (
|
||||
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Remove superflouos samples from time series if they are located closer to each other than this duration. "+
|
||||
"This may be useful for reducing overhead when multiple identically configured Prometheus instances write data to the same VictoriaMetrics. "+
|
||||
"Deduplication is disabled if the -dedup.minScrapeInterval is 0")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only -promscrape.config and then exit. "+
|
||||
"Unknown config entries are allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse")
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
if promscrape.IsDryRun() {
|
||||
*dryRun = true
|
||||
}
|
||||
if *dryRun {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("-promscrape.config is ok; exitting with 0 status code")
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("starting VictoriaMetrics at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
storage.SetMinScrapeIntervalForDeduplication(*minScrapeInterval)
|
||||
vmstorage.Init()
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsert.Init()
|
||||
startSelfScraper()
|
||||
@@ -64,6 +85,18 @@ func main() {
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics.</h2></br>")
|
||||
fmt.Fprintf(w, "See docs at <a href='https://victoriametrics.github.io/'>https://victoriametrics.github.io/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints: </br>")
|
||||
writeAPIHelp(w, [][]string{
|
||||
{"/targets", "discovered targets list"},
|
||||
{"/api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"/metrics", "available service metrics"},
|
||||
{"/api/v1/status/tsdb", "tsdb status page"},
|
||||
})
|
||||
return true
|
||||
}
|
||||
if vminsert.RequestHandler(w, r) {
|
||||
return true
|
||||
}
|
||||
@@ -75,3 +108,21 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func writeAPIHelp(w io.Writer, pathList [][]string) {
|
||||
pathPrefix := httpserver.GetPathPrefix()
|
||||
for _, p := range pathList {
|
||||
p, doc := p[0], p[1]
|
||||
p = path.Join(pathPrefix, p)
|
||||
fmt.Fprintf(w, "<a href='%s'>%q</a> - %s<br/>", p, p, doc)
|
||||
}
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
victoria-metrics is a time series database and monitoring solution.
|
||||
|
||||
See the docs at https://victoriametrics.github.io/
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
testutil "github.com/VictoriaMetrics/VictoriaMetrics/app/victoria-metrics/test"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
@@ -57,6 +58,7 @@ var (
|
||||
type test struct {
|
||||
Name string `json:"name"`
|
||||
Data []string `json:"data"`
|
||||
InsertQuery string `json:"insert_query"`
|
||||
Query []string `json:"query"`
|
||||
ResultMetrics []Metric `json:"result_metrics"`
|
||||
ResultSeries Series `json:"result_series"`
|
||||
@@ -129,7 +131,7 @@ func setUp() {
|
||||
storagePath = filepath.Join(os.TempDir(), testStorageSuffix)
|
||||
processFlags()
|
||||
logger.Init()
|
||||
vmstorage.InitWithoutMetrics()
|
||||
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsert.Init()
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
@@ -192,7 +194,7 @@ func TestWriteRead(t *testing.T) {
|
||||
time.Sleep(1 * time.Second)
|
||||
vmstorage.Stop()
|
||||
// open storage after stop in write
|
||||
vmstorage.InitWithoutMetrics()
|
||||
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
|
||||
t.Run("read", testRead)
|
||||
}
|
||||
|
||||
@@ -208,7 +210,7 @@ func testWrite(t *testing.T) {
|
||||
t.Errorf("error compressing %v %s", r, err)
|
||||
t.Fail()
|
||||
}
|
||||
httpWrite(t, testPromWriteHTTPPath, bytes.NewBuffer(data))
|
||||
httpWrite(t, testPromWriteHTTPPath, test.InsertQuery, bytes.NewBuffer(data))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -217,7 +219,7 @@ func testWrite(t *testing.T) {
|
||||
test := x
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
httpWrite(t, testWriteHTTPPath, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
httpWrite(t, testWriteHTTPPath, test.InsertQuery, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
})
|
||||
}
|
||||
})
|
||||
@@ -245,7 +247,7 @@ func testWrite(t *testing.T) {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
logger.Infof("writing %s", test.Data)
|
||||
httpWrite(t, testOpenTSDBWriteHTTPPath, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
httpWrite(t, testOpenTSDBWriteHTTPPath, test.InsertQuery, bytes.NewBufferString(strings.Join(test.Data, "\n")))
|
||||
})
|
||||
}
|
||||
})
|
||||
@@ -323,10 +325,10 @@ func readIn(readFor string, t *testing.T, insertTime time.Time) []test {
|
||||
return tt
|
||||
}
|
||||
|
||||
func httpWrite(t *testing.T, address string, r io.Reader) {
|
||||
func httpWrite(t *testing.T, address, query string, r io.Reader) {
|
||||
t.Helper()
|
||||
s := newSuite(t)
|
||||
resp, err := http.Post(address, "", r)
|
||||
resp, err := http.Post(address+query, "", r)
|
||||
s.noError(err)
|
||||
s.noError(resp.Body.Close())
|
||||
s.equalInt(resp.StatusCode, 204)
|
||||
@@ -373,7 +375,7 @@ func checkMetricsResult(got, want []Metric) error {
|
||||
want = removeIfFoundMetrics(r, want)
|
||||
}
|
||||
if len(want) > 0 {
|
||||
return fmt.Errorf("exptected metrics %+v not found in %+v", want, got)
|
||||
return fmt.Errorf("expected metrics %+v not found in %+v", want, got)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -85,7 +85,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
mr.Timestamp = currentTimestamp
|
||||
mr.Value = r.Value
|
||||
}
|
||||
logger.Infof("writing %d rows at timestamp %d", len(mrs), currentTimestamp)
|
||||
vmstorage.AddRows(mrs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,6 @@
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
{"metric":{"item":"y"},"values":[["{TIME_S-1m}","0.5"],["{TIME_S}","0.5"]]}
|
||||
{"metric":{"item":"y"},"values":[["{TIME_S-1m}","0.5"]]}
|
||||
]}}
|
||||
}
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
"empty_label_match 1 {TIME_S-1m}",
|
||||
"empty_label_match;foo=bar 2 {TIME_S-1m}",
|
||||
"empty_label_match;foo=baz 3 {TIME_S-1m}"],
|
||||
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S}&end={TIME_S}&step=60"],
|
||||
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S-1m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
{"metric":{"__name__":"empty_label_match"},"values":[["{TIME_S}","1"]]},
|
||||
{"metric":{"__name__":"empty_label_match","foo":"bar"},"values":[["{TIME_S}","2"]]}
|
||||
{"metric":{"__name__":"empty_label_match"},"values":[["{TIME_S-1m}","1"],["{TIME_S}","1"]]},
|
||||
{"metric":{"__name__":"empty_label_match","foo":"bar"},"values":[["{TIME_S-1m}","2"],["{TIME_S}","2"]]}
|
||||
]}}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
["{TIME_S-120s}","3"],
|
||||
["{TIME_S-60s}","2"],
|
||||
["{TIME_S-30s}","1"],
|
||||
["{TIME_S-20s}","1"]
|
||||
["{TIME_S-20s}","1"],
|
||||
["{TIME_S-10s}","1"],
|
||||
["{TIME_S-0s}","1"]
|
||||
]}]}}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,6 @@
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
{"metric":{"__name__":"not_nan_as_missing_data","item":"x"},"values":[["{TIME_S-2m}","2"]]},
|
||||
{"metric":{"__name__":"not_nan_as_missing_data","item":"y"},"values":[["{TIME_S-2m}","4"],["{TIME_S-1m}","3"],["{TIME_S}","3"]]}
|
||||
{"metric":{"__name__":"not_nan_as_missing_data","item":"y"},"values":[["{TIME_S-2m}","4"],["{TIME_S-1m}","3"]]}
|
||||
]}}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,6 @@
|
||||
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}"],
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"vector","result":[{"metric":{"item":"x"},"value":["{TIME_S-1m}","1"]},{"metric":{"item":"y"},"value":["{TIME_S-1m}","3"]}]}
|
||||
"data":{"resultType":"vector","result":[{"metric":{"item":"x"},"value":["{TIME_S-1m}","2"]},{"metric":{"item":"y"},"value":["{TIME_S-1m}","4"]}]}
|
||||
}
|
||||
}
|
||||
|
||||
10
app/victoria-metrics/testdata/influxdb/with_extra_labels.json
vendored
Normal file
10
app/victoria-metrics/testdata/influxdb/with_extra_labels.json
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"name": "insert_with_extra_labels",
|
||||
"data": ["measurement,tag1=value1,tag2=value2 field6=1.23,field5=123 {TIME_NS}"],
|
||||
"insert_query": "?extra_label=job=test&extra_label=tag2=value10",
|
||||
"query": ["/api/v1/export?match={__name__!=''}"],
|
||||
"result_metrics": [
|
||||
{"metric":{"__name__":"measurement_field5","tag1":"value1","job": "test","tag2":"value10"},"values":[123], "timestamps": ["{TIME_MS}"]},
|
||||
{"metric":{"__name__":"measurement_field6","tag1":"value1","job": "test","tag2":"value10"},"values":[1.23], "timestamps": ["{TIME_MS}"]}
|
||||
]
|
||||
}
|
||||
9
app/victoria-metrics/testdata/opentsdbhttp/with_extra_labels.json
vendored
Normal file
9
app/victoria-metrics/testdata/opentsdbhttp/with_extra_labels.json
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "insert_with_extra_labels",
|
||||
"data": ["{\"metric\": \"opentsdbhttp.foobar\", \"value\": 1001, \"timestamp\": {TIME_S}, \"tags\": {\"bar\":\"baz\", \"x\": \"y\"}}"],
|
||||
"insert_query": "?extra_label=job=open-test&extra_label=x=z",
|
||||
"query": ["/api/v1/export?match={__name__!=''}"],
|
||||
"result_metrics": [
|
||||
{"metric":{"__name__":"opentsdbhttp.foobar","bar":"baz","x":"z","job": "open-test"},"values":[1001], "timestamps": ["{TIME_MSZ}"]}
|
||||
]
|
||||
}
|
||||
9
app/victoria-metrics/testdata/prometheus/with_extra_labels.json
vendored
Normal file
9
app/victoria-metrics/testdata/prometheus/with_extra_labels.json
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "basic_insertion_with_extra_labels",
|
||||
"insert_query": "?extra_label=job=prom-test&extra_label=baz=bar",
|
||||
"data": ["[{\"labels\":[{\"name\":\"__name__\",\"value\":\"prometheus.foobar\"},{\"name\":\"baz\",\"value\":\"qux\"}],\"samples\":[{\"value\":100000,\"timestamp\":\"{TIME_MS}\"}]}]"],
|
||||
"query": ["/api/v1/export?match={__name__!=''}"],
|
||||
"result_metrics": [
|
||||
{"metric":{"__name__":"prometheus.foobar","baz":"bar","job": "prom-test"},"values":[100000], "timestamps": ["{TIME_MS}"]}
|
||||
]
|
||||
}
|
||||
@@ -59,19 +59,22 @@ run-vmagent:
|
||||
$(MAKE) run-via-docker
|
||||
|
||||
vmagent-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-amd64 ./app/vmagent
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-arm ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-arm64 ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-ppc64le ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-386 ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-local-with-goarch:
|
||||
APP_NAME=vmagent $(MAKE) app-local-with-goarch
|
||||
|
||||
vmagent-pure:
|
||||
APP_NAME=vmagent $(MAKE) app-local-pure
|
||||
|
||||
@@ -7,7 +7,7 @@ or any other Prometheus-compatible storage system that supports the `remote_writ
|
||||
<img alt="vmagent" src="vmagent.png">
|
||||
|
||||
|
||||
### Motivation
|
||||
## Motivation
|
||||
|
||||
While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
|
||||
and RAM friendly to scrape metrics from Prometheus-compatible exporters to VictoriaMetrics.
|
||||
@@ -15,18 +15,20 @@ Also, we found that users’ infrastructure are snowflakes - no two are alike, a
|
||||
to `vmagent` (like the ability to push metrics instead of pulling them). We did our best and plan to do even more.
|
||||
|
||||
|
||||
### Features
|
||||
## Features
|
||||
|
||||
* Can be used as drop-in replacement for Prometheus for scraping targets such as [node_exporter](https://github.com/prometheus/node_exporter).
|
||||
See [Quick Start](#quick-start) for details.
|
||||
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
|
||||
* Accepts data via all the ingestion protocols supported by VictoriaMetrics:
|
||||
* Influx line protocol via `http://<vmagent>:8429/write`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf).
|
||||
* Graphite plaintext protocol if `-graphiteListenAddr` command-line flag is set. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-graphite-compatible-agents-such-as-statsd).
|
||||
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-opentsdb-compatible-agents).
|
||||
* Influx line protocol via `http://<vmagent>:8429/write`. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf).
|
||||
* Graphite plaintext protocol if `-graphiteListenAddr` command-line flag is set. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-send-data-from-graphite-compatible-agents-such-as-statsd).
|
||||
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-send-data-from-opentsdb-compatible-agents).
|
||||
* Prometheus remote write protocol via `http://<vmagent>:8429/api/v1/write`.
|
||||
* JSON lines import protocol via `http://<vmagent>:8429/api/v1/import`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-time-series-data).
|
||||
* Arbitrary CSV data via `http://<vmagent>:8429/api/v1/import/csv`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-csv-data).
|
||||
* JSON lines import protocol via `http://<vmagent>:8429/api/v1/import`. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-import-data-in-json-line-format).
|
||||
* Native data import protocol via `http://<vmagent>:8429/api/v1/import/native`. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-import-data-in-native-format).
|
||||
* Data in Prometheus exposition format. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-import-data-in-prometheus-exposition-format) for details.
|
||||
* Arbitrary CSV data via `http://<vmagent>:8429/api/v1/import/csv`. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-import-csv-data).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems.
|
||||
* Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as connection
|
||||
@@ -34,13 +36,13 @@ to `vmagent` (like the ability to push metrics instead of pulling them). We did
|
||||
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared to Prometheus.
|
||||
|
||||
|
||||
### Quick Start
|
||||
## Quick Start
|
||||
|
||||
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
|
||||
and pass the following flags to `vmagent` binary in order to start scraping Prometheus targets:
|
||||
|
||||
* `-promscrape.config` with the path to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`)
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. The `-remoteWrite.url` argument can be specified multiple times in order to replicate data concurrently to an arbitrary amount of remote storage systems.
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. The `-remoteWrite.url` argument can be specified multiple times in order to replicate data concurrently to an arbitrary number of remote storage systems.
|
||||
|
||||
Example command line:
|
||||
|
||||
@@ -54,17 +56,33 @@ If you only need to collect Influx data, then the following is sufficient:
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
Then send Influx data to `http://vmagent-host:8429`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for more details.
|
||||
Then send Influx data to `http://vmagent-host:8429`. See [these docs](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for more details.
|
||||
|
||||
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags).
|
||||
|
||||
Pass `-help` to `vmagent` in order to see the full list of supported command-line flags with their descriptions.
|
||||
|
||||
|
||||
### Use cases
|
||||
## Configuration update
|
||||
|
||||
`vmagent` should be restarted in order to update config options set via command-line args.
|
||||
|
||||
`vmagent` supports multiple approaches for reloading configs from updated config files such as `-promscrape.config`, `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig`:
|
||||
|
||||
* Sending `SUGHUP` signal to `vmagent` process:
|
||||
```bash
|
||||
kill -SIGHUP `pidof vmagent`
|
||||
```
|
||||
|
||||
* Sending HTTP request to `http://vmagent:8429/-/reload` endpoint.
|
||||
|
||||
There is also `-promscrape.configCheckInterval` command-line option, which can be used for automatic reloading configs from updated `-promscrape.config` file.
|
||||
|
||||
|
||||
#### IoT and Edge monitoring
|
||||
## Use cases
|
||||
|
||||
|
||||
### IoT and Edge monitoring
|
||||
|
||||
`vmagent` can run and collect metrics in IoT and industrial networks with unreliable or scheduled connections to the remote storage.
|
||||
It buffers the collected data in local files until the connection to remote storage becomes available and then sends the buffered
|
||||
@@ -75,14 +93,14 @@ The maximum buffer size can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
See [the corresponding Makefile rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/Makefile) for details.
|
||||
|
||||
|
||||
#### Drop-in replacement for Prometheus
|
||||
### Drop-in replacement for Prometheus
|
||||
|
||||
If you use Prometheus only for scraping metrics from various targets and forwarding these metrics to remote storage,
|
||||
then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such a setup.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
|
||||
#### Replication and high availability
|
||||
### Replication and high availability
|
||||
|
||||
`vmagent` replicates the collected metrics among multiple remote storage instances configured via `-remoteWrite.url` args.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instances.
|
||||
@@ -90,14 +108,14 @@ If a single remote storage instance temporarily is out of service, then the coll
|
||||
Then it sends the buffered data to the remote storage in order to prevent data gaps in the remote storage.
|
||||
|
||||
|
||||
#### Relabeling and filtering
|
||||
### Relabeling and filtering
|
||||
|
||||
`vmagent` can add, remove or update labels on the collected data before sending it to remote storage. Additionally,
|
||||
it can remove unwanted samples via Prometheus-like relabeling before sending the collected data to remote storage.
|
||||
See [these docs](#relabeling) for details.
|
||||
|
||||
|
||||
#### Splitting data streams among multiple systems
|
||||
### Splitting data streams among multiple systems
|
||||
|
||||
`vmagent` supports splitting the collected data between muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
which is applied independently for each configured `-remoteWrite.url` destination. For instance, it is possible to replicate or split
|
||||
@@ -105,7 +123,7 @@ data among long-term remote storage, short-term remote storage and real-time ana
|
||||
Note that each destination can receive its own subset of the collected data thanks to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
|
||||
|
||||
#### Prometheus remote_write proxy
|
||||
### Prometheus remote_write proxy
|
||||
|
||||
`vmagent` may be used as a proxy for Prometheus data sent via Prometheus `remote_write` protocol. It can accept data via `remote_write` API
|
||||
at `/api/v1/write` endpoint, apply relabeling and filtering and then proxy it to another `remote_write` systems.
|
||||
@@ -113,8 +131,12 @@ The `vmagent` can be configured to encrypt the incoming `remote_write` requests
|
||||
Additionally, Basic Auth can be enabled for the incoming `remote_write` requests with `-httpAuth.*` command-line flags.
|
||||
|
||||
|
||||
### remote_write for clustered version
|
||||
|
||||
### How to collect metrics in Prometheus format
|
||||
Despite `vmagent` can accept data in several supported protocols (OpenTSDB, Influx, Prometheus, Graphite) and scrape data from various targets, writes always peformed in Promethes remote_write protocol. Therefore for clustered version `-remoteWrite.url` command-line flag should be configured as `<schema>://<vminsert-host>:8480/insert/<customer-id>/prometheus/api/v1/write`
|
||||
|
||||
|
||||
## How to collect metrics in Prometheus format
|
||||
|
||||
Pass the path to `prometheus.yml` to `-promscrape.config` command-line flag. `vmagent` takes into account the following
|
||||
sections from [Prometheus config file](https://prometheus.io/docs/prometheus/latest/configuration/configuration/):
|
||||
@@ -134,7 +156,7 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
|
||||
See [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) for details.
|
||||
* `ec2_sd_configs` - for scraping targets in Amazon EC2.
|
||||
See [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) for details.
|
||||
`vmagent` doesn't support `role_arn` config param yet.
|
||||
`vmagent` doesn't support `profile` config param and aws credentials file yet.
|
||||
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
|
||||
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
|
||||
`vmagent` provides the following additional functionality for `gce_sd_config`:
|
||||
@@ -146,16 +168,31 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
|
||||
See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details.
|
||||
* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
|
||||
See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.
|
||||
* `openstack_sd_configs` - for scraping OpenStack targets.
|
||||
See [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) for details.
|
||||
[OpenStack identity API v3](https://docs.openstack.org/api-ref/identity/v3/) is supported only.
|
||||
* `dockerswarm_sd_configs` - for scraping Docker Swarm targets.
|
||||
See [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config) for details.
|
||||
* `eureka_sd_configs` - for scraping targets registered in [Netflix Eureka](https://github.com/Netflix/eureka).
|
||||
See [eureka_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#eureka_sd_config) for details.
|
||||
|
||||
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
|
||||
`vmagent` also support the following additional options in `scrape_config` section:
|
||||
|
||||
* `disable_compression: true` - for disabling response compression on a per-job basis. By default `vmagent` requests compressed responses from scrape targets
|
||||
in order to save network bandwidth.
|
||||
* `disable_keepalive: true` - for disabling [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis.
|
||||
By default `vmagent` uses keep-alive connections to scrape targets in order to reduce overhead on connection re-establishing.
|
||||
|
||||
Note that `vmagent` doesn't support `refresh_interval` option these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
|
||||
command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs`
|
||||
entries to 60s. Run `vmagent -help` in order to see default values for `-promscrape.*CheckInterval` flags.
|
||||
|
||||
|
||||
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
|
||||
|
||||
### Adding labels to metrics
|
||||
## Adding labels to metrics
|
||||
|
||||
Labels can be added to metrics via the following mechanisms:
|
||||
|
||||
@@ -163,7 +200,7 @@ Labels can be added to metrics via the following mechanisms:
|
||||
* Via `-remoteWrite.label` command-line flag. These labels are added to all the collected metrics before sending them to `-remoteWrite.url`.
|
||||
|
||||
|
||||
### Relabeling
|
||||
## Relabeling
|
||||
|
||||
`vmagent` supports [Prometheus relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config).
|
||||
Additionally it provides the following extra actions:
|
||||
@@ -182,6 +219,7 @@ The relabeling can be defined in the following places:
|
||||
|
||||
Read more about relabeling in the following articles:
|
||||
|
||||
* [How to use Relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2)
|
||||
* [Life of a label](https://www.robustperception.io/life-of-a-label)
|
||||
* [Discarding targets and timeseries with relabeling](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts)
|
||||
* [Dropping labels at scrape time](https://www.robustperception.io/dropping-metrics-at-scrape-time-with-prometheus)
|
||||
@@ -189,70 +227,149 @@ Read more about relabeling in the following articles:
|
||||
* [relabel_configs vs metric_relabel_configs](https://www.robustperception.io/relabel_configs-vs-metric_relabel_configs)
|
||||
|
||||
|
||||
### Monitoring
|
||||
## Monitoring
|
||||
|
||||
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
|
||||
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview.
|
||||
If you have suggestions, improvements or found a bug - feel free to open an issue on github or add review to the dashboard.
|
||||
|
||||
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
|
||||
`vmagent` also exports target statuses at the following handlers:
|
||||
|
||||
* `http://vmagent-host:8429/targets`. This handler returns human-readable plaintext status for every active target.
|
||||
This page is convenient to query from command line with `wget`, `curl` or similar tools.
|
||||
It accepts optional `show_original_labels=1` query arg, which shows the original labels per each target before applying relabeling.
|
||||
This information may be useful for debugging target relabeling.
|
||||
* `http://vmagent-host:8429/api/v1/targets`. This handler returns data compatible with [the corresponding page from Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets).
|
||||
|
||||
* `http://vmagent-host:8429/ready`. This handler returns http 200 status code when `vmagent` finishes initialization for all service_discovery configs.
|
||||
It may be useful for performing `vmagent` rolling update without scrape loss.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
## Troubleshooting
|
||||
|
||||
* It is recommended [setting up the official Grafana dashboard](#monitoring) in order to monitor `vmagent` state.
|
||||
|
||||
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
|
||||
since `vmagent` establishes at least a single TCP connection per each target.
|
||||
|
||||
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
|
||||
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.
|
||||
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`
|
||||
and `http://vmagent-host:8429/api/v1/targets`.
|
||||
|
||||
* It is recommended to increase `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
and `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows.
|
||||
* The `/api/v1/targets` page could be useful for debugging relabeling process for scrape targets.
|
||||
This page contains original labels for targets dropped during relabeling (see "droppedTargets" section in the page output). By default up to `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling, then increase `-promscrape.maxDroppedTargets` command-line flag value in order to see all the dropped targets. Note that tracking each dropped target requires up to 10Kb of RAM, so big values for `-promscrape.maxDroppedTargets` may result in increased memory usage if big number of scrape targets are dropped during relabeling.
|
||||
|
||||
* If `vmagent` scrapes big number of targets, then `-promscrape.dropOriginalLabels` command-line option may be passed to `vmagent` in order to reduce memory usage.
|
||||
This option drops `"discoveredLabels"` and `"droppedTargets"` lists at `/api/v1/targets` page, which may result in reduced debuggability for improperly configured per-target relabeling.
|
||||
|
||||
* If `vmagent` scrapes targets with millions of metrics per each target (for instance, when scraping [federation endpoints](https://prometheus.io/docs/prometheus/latest/federation/)),
|
||||
then it is recommended enabling `stream parsing mode` in order to reduce memory usage during scraping. This mode may be enabled either globally for all the scrape targets
|
||||
by passing `-promscrape.streamParse` command-line flag or on a per-scrape target basis with `stream_parse: true` option. For example:
|
||||
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: 'big-federate'
|
||||
stream_parse: true
|
||||
static_configs:
|
||||
- targets:
|
||||
- big-prometeus1
|
||||
- big-prometeus2
|
||||
honor_labels: true
|
||||
metrics_path: /federate
|
||||
params:
|
||||
'match[]': ['{__name__!=""}']
|
||||
```
|
||||
|
||||
Note that `sample_limit` option doesn't work if stream parsing is enabled, since the parsed data is pushed to remote storage as soon as it is parsed. So `sample_limit` option
|
||||
has no sense during stream parsing.
|
||||
|
||||
* It is recommended to increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows.
|
||||
|
||||
* If you see gaps on the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set, then try increasing `-remoteWrite.queues`.
|
||||
Such gaps may appear because `vmagent` cannot keep up with sending the collected data to remote storage, so it starts dropping the buffered data
|
||||
if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow large when remote storage is unavailable for extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want to send all the data from the directory to remote storage, simply stop `vmagent` and delete the directory.
|
||||
|
||||
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports.
|
||||
Just add the following relabeling rule to `relabel_configs` section in order to filter out targets with unneeded ports:
|
||||
* By default `vmagent` masks `-remoteWrite.url` with `secret-url` values in logs and at `/metrics` page because
|
||||
the url may contain sensitive information such as auth tokens or passwords.
|
||||
Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
|
||||
|
||||
```yml
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
```
|
||||
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
|
||||
or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
|
||||
See available options below if you prefer fixing the root cause of the error:
|
||||
|
||||
The following `relabel_configs` section may help determining `__meta_*` labels resulting in duplicate targets:
|
||||
```yml
|
||||
- action: labelmap
|
||||
regex: __meta_(.*)
|
||||
```
|
||||
|
||||
The following relabeling rule may be added to `relabel_configs` section in order to filter out pods with unneeded ports:
|
||||
```yml
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
```
|
||||
|
||||
The following relabeling rule may be added to `relabel_configs` section in order to filter out init container pods:
|
||||
```yml
|
||||
- action: drop
|
||||
source_labels: [__meta_kubernetes_pod_container_init]
|
||||
regex: true
|
||||
```
|
||||
|
||||
|
||||
### How to build from sources
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmagent` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmagent` from the root folder of the repository.
|
||||
It builds `vmagent` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmagent-prod` from the root folder of the repository.
|
||||
It builds `vmagent-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Building docker images
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmagent
|
||||
ROOT_IMAGE=scratch make package-vmagent
|
||||
```
|
||||
|
||||
### ARM build
|
||||
|
||||
### Profiling
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmagent-arm` or `make vmagent-arm64` from the root folder of the repository.
|
||||
It builds `vmagent-arm` or `vmagent-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmagent-arm-prod` or `make vmagent-arm64-prod` from the root folder of the repository.
|
||||
It builds `vmagent-arm-prod` or `vmagent-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
|
||||
## Profiling
|
||||
|
||||
`vmagent` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
// PushCtx is a context used for populating WriteRequest.
|
||||
@@ -28,12 +29,7 @@ func (ctx *PushCtx) Reset() {
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = ctx.WriteRequest.Timeseries[:0]
|
||||
|
||||
labels := ctx.Labels
|
||||
for i := range labels {
|
||||
label := &labels[i]
|
||||
label.Name = ""
|
||||
label.Value = ""
|
||||
}
|
||||
promrelabel.CleanLabels(ctx.Labels)
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
|
||||
ctx.Samples = ctx.Samples[:0]
|
||||
@@ -67,4 +63,4 @@ func PutPushCtx(ctx *PushCtx) {
|
||||
}
|
||||
|
||||
var pushCtxPool sync.Pool
|
||||
var pushCtxPoolCh = make(chan *PushCtx, runtime.GOMAXPROCS(-1))
|
||||
var pushCtxPoolCh = make(chan *PushCtx, cgroup.AvailableCPUs())
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -18,12 +19,18 @@ var (
|
||||
|
||||
// InsertHandler processes csv data from req.
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, insertRows)
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -44,6 +51,7 @@ func insertRows(rows []parser.Row) error {
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
|
||||
@@ -4,13 +4,15 @@ import (
|
||||
"flag"
|
||||
"io"
|
||||
"net/http"
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -19,6 +21,7 @@ import (
|
||||
var (
|
||||
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol")
|
||||
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field")
|
||||
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -31,7 +34,9 @@ var (
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, false, "", "", insertRows)
|
||||
return parser.ParseStream(r, false, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(db, rows, nil)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -39,17 +44,23 @@ func InsertHandlerForReader(r io.Reader) error {
|
||||
//
|
||||
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
|
||||
func InsertHandlerForHTTP(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, insertRows)
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(db, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(db string, rows []parser.Row) error {
|
||||
func insertRows(db string, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := getPushCtx()
|
||||
defer putPushCtx(ctx)
|
||||
|
||||
@@ -61,25 +72,30 @@ func insertRows(db string, rows []parser.Row) error {
|
||||
buf := ctx.buf[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
rowsTotal += len(r.Fields)
|
||||
commonLabels = commonLabels[:0]
|
||||
hasDBLabel := false
|
||||
hasDBKey := false
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
if tag.Key == "db" {
|
||||
hasDBLabel = true
|
||||
hasDBKey = true
|
||||
}
|
||||
commonLabels = append(commonLabels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
if len(db) > 0 && !hasDBLabel {
|
||||
if len(db) > 0 && !hasDBKey {
|
||||
commonLabels = append(commonLabels, prompbmarshal.Label{
|
||||
Name: "db",
|
||||
Value: db,
|
||||
})
|
||||
}
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
|
||||
commonLabels = append(commonLabels, extraLabels...)
|
||||
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
|
||||
if !*skipMeasurement {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, r.Measurement...)
|
||||
}
|
||||
skipFieldKey := len(r.Fields) == 1 && *skipSingleField
|
||||
if len(ctx.metricGroupBuf) > 0 && !skipFieldKey {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, *measurementFieldSeparator...)
|
||||
@@ -107,7 +123,6 @@ func insertRows(db string, rows []parser.Row) error {
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
}
|
||||
rowsTotal += len(r.Fields)
|
||||
}
|
||||
ctx.buf = buf
|
||||
ctx.ctx.WriteRequest.Timeseries = tssDst
|
||||
@@ -131,12 +146,8 @@ type pushCtx struct {
|
||||
func (ctx *pushCtx) reset() {
|
||||
ctx.ctx.Reset()
|
||||
|
||||
commonLabels := ctx.commonLabels
|
||||
for i := range commonLabels {
|
||||
label := &commonLabels[i]
|
||||
label.Name = ""
|
||||
label.Value = ""
|
||||
}
|
||||
promrelabel.CleanLabels(ctx.commonLabels)
|
||||
ctx.commonLabels = ctx.commonLabels[:0]
|
||||
|
||||
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
|
||||
ctx.buf = ctx.buf[:0]
|
||||
@@ -164,4 +175,4 @@ func putPushCtx(ctx *pushCtx) {
|
||||
}
|
||||
|
||||
var pushCtxPool sync.Pool
|
||||
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))
|
||||
var pushCtxPoolCh = make(chan *pushCtx, cgroup.AvailableCPUs())
|
||||
|
||||
@@ -6,18 +6,22 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/prometheusimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
|
||||
influxserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/influx"
|
||||
@@ -26,6 +30,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -34,14 +39,16 @@ var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to `http://<vmagent>:8429/write`")
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
"Usually :4242 must be set. Doesn't work if empty")
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . See also -promscrape.config.dryRun")
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . "+
|
||||
"Unknown config entries are allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -56,18 +63,23 @@ func main() {
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
remotewrite.InitSecretFlags()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
if *dryRun {
|
||||
if err := flag.Set("promscrape.config.strictParse", "true"); err != nil {
|
||||
logger.Panicf("BUG: cannot set promscrape.config.strictParse=true: %s", err)
|
||||
if promscrape.IsDryRun() {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("-promscrape.config is ok; exitting with 0 status code")
|
||||
return
|
||||
}
|
||||
if *dryRun {
|
||||
if err := remotewrite.CheckRelabelConfigs(); err != nil {
|
||||
logger.Fatalf("error when checking relabel configs: %s", err)
|
||||
}
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking Prometheus config: %s", err)
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("all the configs are ok; exitting with 0 status code")
|
||||
return
|
||||
@@ -76,6 +88,7 @@ func main() {
|
||||
logger.Infof("starting vmagent at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
remotewrite.Init()
|
||||
common.StartUnmarshalWorkers()
|
||||
writeconcurrencylimiter.Init()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, influx.InsertHandlerForReader)
|
||||
@@ -123,19 +136,24 @@ func main() {
|
||||
if len(*opentsdbHTTPListenAddr) > 0 {
|
||||
opentsdbhttpServer.MustStop()
|
||||
}
|
||||
common.StopUnmarshalWorkers()
|
||||
remotewrite.Stop()
|
||||
|
||||
logger.Infof("successfully stopped vmagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
fmt.Fprintf(w, "vmagent - see docs at https://victoriametrics.github.io/vmagent.html")
|
||||
return true
|
||||
}
|
||||
path := strings.Replace(r.URL.Path, "//", "/", -1)
|
||||
switch path {
|
||||
case "/api/v1/write":
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -144,7 +162,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
vmimportRequests.Inc()
|
||||
if err := vmimport.InsertHandler(r); err != nil {
|
||||
vmimportErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -153,7 +171,25 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
csvimportRequests.Inc()
|
||||
if err := csvimport.InsertHandler(r); err != nil {
|
||||
csvimportErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/api/v1/import/prometheus":
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(r); err != nil {
|
||||
prometheusimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/api/v1/import/native":
|
||||
nativeimportRequests.Inc()
|
||||
if err := native.InsertHandler(r); err != nil {
|
||||
nativeimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -162,7 +198,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
influxWriteRequests.Inc()
|
||||
if err := influx.InsertHandlerForHTTP(r); err != nil {
|
||||
influxWriteErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -175,14 +211,29 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/targets":
|
||||
promscrapeTargetsRequests.Inc()
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
promscrape.WriteHumanReadableTargetsStatus(w)
|
||||
promscrape.WriteHumanReadableTargetsStatus(w, r)
|
||||
return true
|
||||
case "/api/v1/targets":
|
||||
promscrapeAPIV1TargetsRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
state := r.FormValue("state")
|
||||
promscrape.WriteAPIV1Targets(w, state)
|
||||
return true
|
||||
case "/-/reload":
|
||||
promscrapeConfigReloadRequests.Inc()
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "/ready":
|
||||
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
|
||||
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)
|
||||
http.Error(w, errMsg, http.StatusTooEarly)
|
||||
} else {
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("OK"))
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -197,12 +248,19 @@ var (
|
||||
csvimportRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/import/csv", protocol="csvimport"}`)
|
||||
csvimportErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/import/csv", protocol="csvimport"}`)
|
||||
|
||||
prometheusimportRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/import/prometheus", protocol="prometheusimport"}`)
|
||||
prometheusimportErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/import/prometheus", protocol="prometheusimport"}`)
|
||||
|
||||
nativeimportRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/import/native", protocol="nativeimport"}`)
|
||||
nativeimportErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/import/native", protocol="nativeimport"}`)
|
||||
|
||||
influxWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/write", protocol="influx"}`)
|
||||
influxWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/write", protocol="influx"}`)
|
||||
|
||||
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/query", protocol="influx"}`)
|
||||
|
||||
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
|
||||
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
|
||||
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
|
||||
|
||||
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
|
||||
)
|
||||
@@ -211,10 +269,7 @@ func usage() {
|
||||
const s = `
|
||||
vmagent collects metrics data via popular data ingestion protocols and routes it to VictoriaMetrics.
|
||||
|
||||
See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md .
|
||||
See the docs at https://victoriametrics.github.io/vmagent.html .
|
||||
`
|
||||
|
||||
f := flag.CommandLine.Output()
|
||||
fmt.Fprintf(f, "%s\n", s)
|
||||
flag.PrintDefaults()
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
85
app/vmagent/native/request_handler.go
Normal file
85
app/vmagent/native/request_handler.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package native
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="native"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="native"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes `/api/v1/import` request.
|
||||
//
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, func(block *parser.Block) error {
|
||||
return insertRows(block, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(block *parser.Block, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
// Update rowsInserted and rowsPerInsert before actual inserting,
|
||||
// since relabeling can prevent from inserting the rows.
|
||||
rowsLen := len(block.Values)
|
||||
rowsInserted.Add(rowsLen)
|
||||
rowsPerInsert.Update(float64(rowsLen))
|
||||
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
mn := &block.MetricName
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: bytesutil.ToUnsafeString(mn.MetricGroup),
|
||||
})
|
||||
for j := range mn.Tags {
|
||||
tag := &mn.Tags[j]
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
values := block.Values
|
||||
timestamps := block.Timestamps
|
||||
if len(timestamps) != len(values) {
|
||||
logger.Panicf("BUG: len(timestamps)=%d must match len(values)=%d", len(timestamps), len(values))
|
||||
}
|
||||
samplesLen := len(samples)
|
||||
for j, value := range values {
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: value,
|
||||
Timestamp: timestamps[j],
|
||||
})
|
||||
}
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(&ctx.WriteRequest)
|
||||
return nil
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -19,12 +20,18 @@ var (
|
||||
// InsertHandler processes HTTP OpenTSDB put requests.
|
||||
// See http://opentsdb.net/docs/build/html/api_http/put.html
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, insertRows)
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -45,6 +52,7 @@ func insertRows(rows []parser.Row) error {
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
|
||||
76
app/vmagent/prometheusimport/request_handler.go
Normal file
76
app/vmagent/prometheusimport/request_handler.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package prometheusimport
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="prometheus"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="prometheus"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes `/api/v1/import/prometheus` request.
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defaultTimestamp, err := parserCommon.GetTimestamp(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(rows, extraLabels)
|
||||
}, nil)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: r.Metric,
|
||||
})
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(&ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -20,12 +21,18 @@ var (
|
||||
|
||||
// InsertHandler processes remote write for prometheus.
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, insertRows)
|
||||
return parser.ParseStream(req, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(tss, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(timeseries []prompb.TimeSeries) error {
|
||||
func insertRows(timeseries []prompb.TimeSeries, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -35,6 +42,7 @@ func insertRows(timeseries []prompb.TimeSeries) error {
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range timeseries {
|
||||
ts := ×eries[i]
|
||||
rowsTotal += len(ts.Samples)
|
||||
labelsLen := len(labels)
|
||||
for i := range ts.Labels {
|
||||
label := &ts.Labels[i]
|
||||
@@ -43,6 +51,7 @@ func insertRows(timeseries []prompb.TimeSeries) error {
|
||||
Value: bytesutil.ToUnsafeString(label.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
samplesLen := len(samples)
|
||||
for i := range ts.Samples {
|
||||
sample := &ts.Samples[i]
|
||||
@@ -55,7 +64,6 @@ func insertRows(timeseries []prompb.TimeSeries) error {
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
rowsTotal += len(ts.Samples)
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -13,14 +16,19 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
sendTimeout = flag.Duration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to -remoteWrite.url")
|
||||
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", "Optional rate limit in bytes per second for data sent to -remoteWrite.url. "+
|
||||
"By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"is sent after temporary unavailability of the remote storage")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", "Timeout for sending a single block of data to -remoteWrite.url")
|
||||
proxyURL = flagutil.NewArray("remoteWrite.proxyURL", "Optional proxy URL for writing data to -remoteWrite.url. Supported proxies: http, https, socks5. "+
|
||||
"Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("remoteWrite.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
tlsCertFile = flagutil.NewArray("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
|
||||
tlsKeyFile = flagutil.NewArray("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url. "+
|
||||
@@ -39,24 +47,51 @@ var (
|
||||
)
|
||||
|
||||
type client struct {
|
||||
urlLabelValue string
|
||||
sanitizedURL string
|
||||
remoteWriteURL string
|
||||
host string
|
||||
requestURI string
|
||||
authHeader string
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *fasthttp.HostClient
|
||||
hc *http.Client
|
||||
|
||||
rl rateLimiter
|
||||
|
||||
bytesSent *metrics.Counter
|
||||
blocksSent *metrics.Counter
|
||||
requestDuration *metrics.Histogram
|
||||
requestsOKCount *metrics.Counter
|
||||
errorsCount *metrics.Counter
|
||||
packetsDropped *metrics.Counter
|
||||
retriesCount *metrics.Counter
|
||||
|
||||
wg sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
func newClient(argIdx int, remoteWriteURL, urlLabelValue string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
func newClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
tlsCfg, err := getTLSConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot initialize TLS config: %s", err)
|
||||
}
|
||||
tr := &http.Transport{
|
||||
Dial: statDial,
|
||||
TLSClientConfig: tlsCfg,
|
||||
TLSHandshakeTimeout: 5 * time.Second,
|
||||
MaxConnsPerHost: 2 * concurrency,
|
||||
MaxIdleConnsPerHost: 2 * concurrency,
|
||||
IdleConnTimeout: time.Minute,
|
||||
WriteBufferSize: 64 * 1024,
|
||||
}
|
||||
pURL := proxyURL.GetOptionalArg(argIdx)
|
||||
if len(pURL) > 0 {
|
||||
if !strings.Contains(pURL, "://") {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: it must start with `http://`, `https://` or `socks5://`", pURL)
|
||||
}
|
||||
urlProxy, err := url.Parse(pURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: %s", pURL, err)
|
||||
}
|
||||
tr.Proxy = http.ProxyURL(urlProxy)
|
||||
}
|
||||
authHeader := ""
|
||||
username := basicAuthUsername.GetOptionalArg(argIdx)
|
||||
password := basicAuthPassword.GetOptionalArg(argIdx)
|
||||
@@ -73,68 +108,30 @@ func newClient(argIdx int, remoteWriteURL, urlLabelValue string, fq *persistentq
|
||||
}
|
||||
authHeader = "Bearer " + token
|
||||
}
|
||||
|
||||
readTimeout := *sendTimeout
|
||||
if readTimeout <= 0 {
|
||||
readTimeout = time.Minute
|
||||
}
|
||||
writeTimeout := readTimeout
|
||||
var u fasthttp.URI
|
||||
u.Update(remoteWriteURL)
|
||||
scheme := string(u.Scheme())
|
||||
switch scheme {
|
||||
case "http", "https":
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme in -remoteWrite.url=%q: %q. It must be http or https", remoteWriteURL, scheme)
|
||||
}
|
||||
host := string(u.Host())
|
||||
if len(host) == 0 {
|
||||
logger.Fatalf("invalid -remoteWrite.url=%q: host cannot be empty. Make sure the url looks like `http://host:port/path`", remoteWriteURL)
|
||||
}
|
||||
requestURI := string(u.RequestURI())
|
||||
isTLS := scheme == "https"
|
||||
var tlsCfg *tls.Config
|
||||
if isTLS {
|
||||
var err error
|
||||
tlsCfg, err = getTLSConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot initialize TLS config: %s", err)
|
||||
}
|
||||
}
|
||||
if !strings.Contains(host, ":") {
|
||||
if isTLS {
|
||||
host += ":443"
|
||||
} else {
|
||||
host += ":80"
|
||||
}
|
||||
}
|
||||
maxConns := 2 * concurrency
|
||||
hc := &fasthttp.HostClient{
|
||||
Addr: host,
|
||||
Name: "vmagent",
|
||||
Dial: statDial,
|
||||
IsTLS: isTLS,
|
||||
TLSConfig: tlsCfg,
|
||||
MaxConns: maxConns,
|
||||
MaxIdleConnDuration: 10 * readTimeout,
|
||||
ReadTimeout: readTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
MaxResponseBodySize: 1024 * 1024,
|
||||
}
|
||||
c := &client{
|
||||
urlLabelValue: urlLabelValue,
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
host: host,
|
||||
requestURI: requestURI,
|
||||
authHeader: authHeader,
|
||||
fq: fq,
|
||||
hc: hc,
|
||||
stopCh: make(chan struct{}),
|
||||
hc: &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
|
||||
},
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.urlLabelValue))
|
||||
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.urlLabelValue))
|
||||
c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_errors_total{url=%q}`, c.urlLabelValue))
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.urlLabelValue))
|
||||
if bytesPerSec := rateLimit.GetOptionalArgOrDefault(argIdx, 0); bytesPerSec > 0 {
|
||||
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
|
||||
c.rl.perSecondLimit = int64(bytesPerSec)
|
||||
}
|
||||
c.rl.limitReached = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remote_write_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
|
||||
c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL))
|
||||
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL))
|
||||
c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_errors_total{url=%q}`, c.sanitizedURL))
|
||||
c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL))
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||
for i := 0; i < concurrency; i++ {
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
@@ -142,27 +139,30 @@ func newClient(argIdx int, remoteWriteURL, urlLabelValue string, fq *persistentq
|
||||
c.runWorker()
|
||||
}()
|
||||
}
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.remoteWriteURL)
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *client) MustStop() {
|
||||
close(c.stopCh)
|
||||
c.wg.Wait()
|
||||
logger.Infof("stopped client for -remoteWrite.url=%q", c.remoteWriteURL)
|
||||
logger.Infof("stopped client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
|
||||
func getTLSConfig(argIdx int) (*tls.Config, error) {
|
||||
tlsConfig := &promauth.TLSConfig{
|
||||
c := &promauth.TLSConfig{
|
||||
CAFile: tlsCAFile.GetOptionalArg(argIdx),
|
||||
CertFile: tlsCertFile.GetOptionalArg(argIdx),
|
||||
KeyFile: tlsKeyFile.GetOptionalArg(argIdx),
|
||||
ServerName: tlsServerName.GetOptionalArg(argIdx),
|
||||
InsecureSkipVerify: *tlsInsecureSkipVerify,
|
||||
InsecureSkipVerify: tlsInsecureSkipVerify.GetOptionalArg(argIdx),
|
||||
}
|
||||
cfg, err := promauth.NewConfig(".", nil, "", "", tlsConfig)
|
||||
if c.CAFile == "" && c.CertFile == "" && c.KeyFile == "" && c.ServerName == "" && !c.InsecureSkipVerify {
|
||||
return nil, nil
|
||||
}
|
||||
cfg, err := promauth.NewConfig(".", nil, "", "", c)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot populate TLS config: %s", err)
|
||||
return nil, fmt.Errorf("cannot populate TLS config: %w", err)
|
||||
}
|
||||
tlsCfg := cfg.NewTLSConfig()
|
||||
return tlsCfg, nil
|
||||
@@ -193,7 +193,7 @@ func (c *client) runWorker() {
|
||||
// The block has been sent successfully.
|
||||
case <-time.After(graceDuration):
|
||||
logger.Errorf("couldn't sent block with size %d bytes to %q in %.3f seconds during shutdown; dropping it",
|
||||
len(block), c.remoteWriteURL, graceDuration.Seconds())
|
||||
len(block), c.sanitizedURL, graceDuration.Seconds())
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -201,32 +201,28 @@ func (c *client) runWorker() {
|
||||
}
|
||||
|
||||
func (c *client) sendBlock(block []byte) {
|
||||
req := fasthttp.AcquireRequest()
|
||||
req.SetRequestURI(c.requestURI)
|
||||
req.SetHost(c.host)
|
||||
req.Header.SetMethod("POST")
|
||||
req.Header.Add("Content-Type", "application/x-protobuf")
|
||||
req.Header.Add("Content-Encoding", "snappy")
|
||||
req.Header.Add("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
c.rl.register(len(block), c.stopCh)
|
||||
retryDuration := time.Second
|
||||
retriesCount := 0
|
||||
c.bytesSent.Add(len(block))
|
||||
c.blocksSent.Inc()
|
||||
|
||||
again:
|
||||
req, err := http.NewRequest("POST", c.remoteWriteURL, bytes.NewBuffer(block))
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexected error from http.NewRequest(%q): %s", c.sanitizedURL, err)
|
||||
}
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vmagent")
|
||||
h.Set("Content-Type", "application/x-protobuf")
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
if c.authHeader != "" {
|
||||
req.Header.Set("Authorization", c.authHeader)
|
||||
}
|
||||
req.SetBody(block)
|
||||
|
||||
retryDuration := time.Second
|
||||
resp := fasthttp.AcquireResponse()
|
||||
|
||||
again:
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
fasthttp.ReleaseRequest(req)
|
||||
fasthttp.ReleaseResponse(resp)
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
err := doRequestWithPossibleRetry(c.hc, req, resp)
|
||||
resp, err := c.hc.Do(req)
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
@@ -235,40 +231,94 @@ again:
|
||||
retryDuration = time.Minute
|
||||
}
|
||||
logger.Errorf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.remoteWriteURL, err, retryDuration.Seconds())
|
||||
time.Sleep(retryDuration)
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
statusCode := resp.StatusCode()
|
||||
if statusCode/100 != 2 {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.urlLabelValue, statusCode)).Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q: %d; response body=%q; re-sending the block in %.3f seconds",
|
||||
len(block), c.remoteWriteURL, statusCode, resp.Body(), retryDuration.Seconds())
|
||||
time.Sleep(retryDuration)
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
c.requestsOKCount.Inc()
|
||||
statusCode := resp.StatusCode
|
||||
if statusCode/100 == 2 {
|
||||
_ = resp.Body.Close()
|
||||
c.requestsOKCount.Inc()
|
||||
return
|
||||
}
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.sanitizedURL, statusCode)).Inc()
|
||||
if statusCode == 409 {
|
||||
// Just drop block on 409 status code like Prometheus does.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/873
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
logger.Errorf("unexpected status code received when sending a block with size %d bytes to %q: #%d; dropping the block like Prometheus does; "+
|
||||
"response body=%q", len(block), c.sanitizedURL, statusCode, body)
|
||||
c.packetsDropped.Inc()
|
||||
return
|
||||
}
|
||||
|
||||
// The block has been successfully sent to the remote storage.
|
||||
fasthttp.ReleaseResponse(resp)
|
||||
fasthttp.ReleaseRequest(req)
|
||||
// Unexpected status code returned
|
||||
retriesCount++
|
||||
retryDuration *= 2
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
}
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
|
||||
} else {
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
||||
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
|
||||
}
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response) error {
|
||||
// There is no need in calling DoTimeout, since the timeout must be already set in hc.ReadTimeout.
|
||||
err := hc.Do(req, resp)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if err != fasthttp.ErrConnectionClosed {
|
||||
return err
|
||||
}
|
||||
// Retry request if the server closed the keep-alive connection during the first attempt.
|
||||
return hc.Do(req, resp)
|
||||
type rateLimiter struct {
|
||||
perSecondLimit int64
|
||||
|
||||
// The current budget. It is increased by perSecondLimit every second.
|
||||
budget int64
|
||||
|
||||
// The next deadline for increasing the budget by perSecondLimit
|
||||
deadline time.Time
|
||||
|
||||
limitReached *metrics.Counter
|
||||
}
|
||||
|
||||
func (rl *rateLimiter) register(dataLen int, stopCh <-chan struct{}) {
|
||||
limit := rl.perSecondLimit
|
||||
if limit <= 0 {
|
||||
return
|
||||
}
|
||||
for rl.budget <= 0 {
|
||||
now := time.Now()
|
||||
if d := rl.deadline.Sub(now); d > 0 {
|
||||
rl.limitReached.Inc()
|
||||
t := timerpool.Get(d)
|
||||
select {
|
||||
case <-stopCh:
|
||||
timerpool.Put(t)
|
||||
return
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
}
|
||||
rl.budget += limit
|
||||
rl.deadline = now.Add(time.Second)
|
||||
}
|
||||
rl.budget -= int64(dataLen)
|
||||
}
|
||||
|
||||
@@ -3,12 +3,16 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
@@ -17,7 +21,7 @@ var (
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
|
||||
"Higher value reduces network bandwidth usage at the cost of delayed push of scraped data to remote storage. "+
|
||||
"Minimum supported interval is 1 second")
|
||||
maxUnpackedBlockSize = flag.Int("remoteWrite.maxBlockSize", 32*1024*1024, "The maximum size in bytes of unpacked request to send to remote storage. "+
|
||||
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum size in bytes of unpacked request to send to remote storage. "+
|
||||
"It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics")
|
||||
)
|
||||
|
||||
@@ -32,9 +36,11 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(pushBlock func(block []byte)) *pendingSeries {
|
||||
func newPendingSeries(pushBlock func(block []byte), significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
ps.stopCh = make(chan struct{})
|
||||
ps.periodicFlusherWG.Add(1)
|
||||
go func() {
|
||||
@@ -68,7 +74,7 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
case <-ps.stopCh:
|
||||
mustStop = true
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-ps.wr.lastFlushTime < uint64(flushSeconds) {
|
||||
if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
@@ -79,10 +85,20 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
}
|
||||
|
||||
type writeRequest struct {
|
||||
wr prompbmarshal.WriteRequest
|
||||
pushBlock func(block []byte)
|
||||
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
|
||||
lastFlushTime uint64
|
||||
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
significantFigures int
|
||||
|
||||
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
|
||||
roundDigits int
|
||||
|
||||
wr prompbmarshal.WriteRequest
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
labels []prompbmarshal.Label
|
||||
@@ -91,6 +107,8 @@ type writeRequest struct {
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset pushBlock, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
for i := range wr.tss {
|
||||
@@ -100,11 +118,7 @@ func (wr *writeRequest) reset() {
|
||||
}
|
||||
wr.tss = wr.tss[:0]
|
||||
|
||||
for i := range wr.labels {
|
||||
label := &wr.labels[i]
|
||||
label.Name = ""
|
||||
label.Value = ""
|
||||
}
|
||||
promrelabel.CleanLabels(wr.labels)
|
||||
wr.labels = wr.labels[:0]
|
||||
|
||||
wr.samples = wr.samples[:0]
|
||||
@@ -113,18 +127,35 @@ func (wr *writeRequest) reset() {
|
||||
|
||||
func (wr *writeRequest) flush() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.lastFlushTime = fasttime.UnixTimestamp()
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock)
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
func (wr *writeRequest) adjustSampleValues() {
|
||||
samples := wr.samples
|
||||
if n := wr.significantFigures; n > 0 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
|
||||
}
|
||||
}
|
||||
if n := wr.roundDigits; n < 100 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
|
||||
tssDst := wr.tss
|
||||
for i := range src {
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
dst := &tssDst[len(tssDst)-1]
|
||||
wr.copyTimeSeries(dst, &src[i])
|
||||
if len(wr.tss) >= maxRowsPerBlock {
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
|
||||
if len(wr.samples) >= maxRowsPerBlock {
|
||||
wr.tss = tssDst
|
||||
wr.flush()
|
||||
tssDst = wr.tss
|
||||
}
|
||||
@@ -164,7 +195,7 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
}
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
if len(bb.B) <= *maxUnpackedBlockSize {
|
||||
if len(bb.B) <= maxUnpackedBlockSize.N {
|
||||
zb := snappyBufPool.Get()
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
writeRequestBufPool.Put(bb)
|
||||
|
||||
@@ -16,7 +16,7 @@ var (
|
||||
unparsedLabelsGlobal = flagutil.NewArray("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple flags to metrics before sending them to remote storage")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. These entries are applied to all the metrics "+
|
||||
"before sending them to -remoteWrite.url. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config for details")
|
||||
"before sending them to -remoteWrite.url. See https://victoriametrics.github.io/vmagent.html#relabeling for details")
|
||||
relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url")
|
||||
)
|
||||
|
||||
@@ -33,7 +33,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
if *relabelConfigPathGlobal != "" {
|
||||
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %s", *relabelConfigPathGlobal, err)
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
||||
}
|
||||
rcs.global = global
|
||||
}
|
||||
@@ -43,9 +43,13 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
}
|
||||
rcs.perURL = make([][]promrelabel.ParsedRelabelConfig, len(*remoteWriteURLs))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
// Skip empty relabel config.
|
||||
continue
|
||||
}
|
||||
prc, err := promrelabel.LoadRelabelConfigs(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %s", path, err)
|
||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
||||
}
|
||||
rcs.perURL[i] = prc
|
||||
}
|
||||
@@ -59,9 +63,11 @@ type relabelConfigs struct {
|
||||
|
||||
// initLabelsGlobal must be called after parsing command-line flags.
|
||||
func initLabelsGlobal() {
|
||||
// Init labelsGlobal
|
||||
labelsGlobal = nil
|
||||
for _, s := range *unparsedLabelsGlobal {
|
||||
if len(s) == 0 {
|
||||
continue
|
||||
}
|
||||
n := strings.IndexByte(s, '=')
|
||||
if n < 0 {
|
||||
logger.Fatalf("missing '=' in `-remoteWrite.label`. It must contain label in the form `name=value`; got %q", s)
|
||||
@@ -114,12 +120,7 @@ type relabelCtx struct {
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) reset() {
|
||||
labels := rctx.labels
|
||||
for i := range labels {
|
||||
label := &labels[i]
|
||||
label.Name = ""
|
||||
label.Value = ""
|
||||
}
|
||||
promrelabel.CleanLabels(rctx.labels)
|
||||
rctx.labels = rctx.labels[:0]
|
||||
}
|
||||
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
@@ -22,14 +22,21 @@ var (
|
||||
"It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url flags in order to write data concurrently to multiple remote storage systems")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored")
|
||||
queues = flag.Int("remoteWrite.queues", 1, "The number of concurrent queues to each -remoteWrite.url. Set more queues if a single queue "+
|
||||
queues = flag.Int("remoteWrite.queues", 4, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensistive auth info")
|
||||
maxPendingBytesPerURL = flag.Int("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
maxPendingBytesPerURL = flagutil.NewBytes("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
|
||||
"Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. "+
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
significantFigures = flagutil.NewArrayInt("remoteWrite.significantFigures", "The number of significant figures to leave in metric values before writing them "+
|
||||
"to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. "+
|
||||
"This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits")
|
||||
roundDigits = flagutil.NewArrayInt("remoteWrite.roundDigits", "Round metric values to this number of decimal digits after the point before writing them to remote storage. "+
|
||||
"Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. "+
|
||||
"By default digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. "+
|
||||
"This option may be used for improving data compression for the stored metrics")
|
||||
)
|
||||
|
||||
var rwctxs []*remoteWriteCtx
|
||||
@@ -37,6 +44,18 @@ var rwctxs []*remoteWriteCtx
|
||||
// Contains the current relabelConfigs.
|
||||
var allRelabelConfigs atomic.Value
|
||||
|
||||
// maxQueues limits the maximum value for `-remoteWrite.queues`. There is no sense in setting too high value,
|
||||
// since it may lead to high memory usage due to big number of buffers.
|
||||
var maxQueues = cgroup.AvailableCPUs() * 4
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging.
|
||||
func InitSecretFlags() {
|
||||
if !*showRemoteWriteURL {
|
||||
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
|
||||
flagutil.RegisterSecretFlag("remoteWrite.url")
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes remotewrite.
|
||||
//
|
||||
// It must be called after flag.Parse().
|
||||
@@ -44,12 +63,13 @@ var allRelabelConfigs atomic.Value
|
||||
// Stop must be called for graceful shutdown.
|
||||
func Init() {
|
||||
if len(*remoteWriteURLs) == 0 {
|
||||
logger.Fatalf("at least one `-remoteWrite.url` must be set")
|
||||
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
|
||||
}
|
||||
|
||||
if !*showRemoteWriteURL {
|
||||
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
|
||||
httpserver.RegisterSecretFlag("remoteWrite.url")
|
||||
if *queues > maxQueues {
|
||||
*queues = maxQueues
|
||||
}
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
initLabelsGlobal()
|
||||
rcs, err := loadRelabelConfigs()
|
||||
@@ -69,11 +89,11 @@ func Init() {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
for i, remoteWriteURL := range *remoteWriteURLs {
|
||||
urlLabelValue := fmt.Sprintf("secret-url-%d", i+1)
|
||||
sanitizedURL := fmt.Sprintf("%d:secret-url", i+1)
|
||||
if *showRemoteWriteURL {
|
||||
urlLabelValue = remoteWriteURL
|
||||
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
||||
}
|
||||
rwctx := newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, urlLabelValue)
|
||||
rwctx := newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
rwctxs = append(rwctxs, rwctx)
|
||||
}
|
||||
|
||||
@@ -118,7 +138,7 @@ func Stop() {
|
||||
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
//
|
||||
// Note that wr may be modified by Push due to relabeling.
|
||||
// Note that wr may be modified by Push due to relabeling and rounding.
|
||||
func Push(wr *prompbmarshal.WriteRequest) {
|
||||
var rctx *relabelCtx
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
@@ -128,11 +148,20 @@ func Push(wr *prompbmarshal.WriteRequest) {
|
||||
}
|
||||
tss := wr.Timeseries
|
||||
for len(tss) > 0 {
|
||||
// Process big tss in smaller blocks in order to reduce maxmimum memory usage
|
||||
// Process big tss in smaller blocks in order to reduce the maximum memory usage
|
||||
samplesCount := 0
|
||||
i := 0
|
||||
for i < len(tss) {
|
||||
samplesCount += len(tss[i].Samples)
|
||||
i++
|
||||
if samplesCount > maxRowsPerBlock {
|
||||
break
|
||||
}
|
||||
}
|
||||
tssBlock := tss
|
||||
if len(tssBlock) > maxRowsPerBlock {
|
||||
tssBlock = tss[:maxRowsPerBlock]
|
||||
tss = tss[maxRowsPerBlock:]
|
||||
if i < len(tss) {
|
||||
tssBlock = tss[:i]
|
||||
tss = tss[i:]
|
||||
} else {
|
||||
tss = nil
|
||||
}
|
||||
@@ -162,25 +191,25 @@ type remoteWriteCtx struct {
|
||||
pss []*pendingSeries
|
||||
pssNextIdx uint64
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
relabelMetricsDropped *metrics.Counter
|
||||
}
|
||||
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL string, maxInmemoryBlocks int, urlLabelValue string) *remoteWriteCtx {
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL string, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
|
||||
h := xxhash.Sum64([]byte(remoteWriteURL))
|
||||
path := fmt.Sprintf("%s/persistent-queue/%016X", *tmpDataPath, h)
|
||||
fq := persistentqueue.MustOpenFastQueue(path, remoteWriteURL, maxInmemoryBlocks, *maxPendingBytesPerURL)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, path, urlLabelValue), func() float64 {
|
||||
path := fmt.Sprintf("%s/persistent-queue/%d_%016X", *tmpDataPath, argIdx+1, h)
|
||||
fq := persistentqueue.MustOpenFastQueue(path, sanitizedURL, maxInmemoryBlocks, maxPendingBytesPerURL.N)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, path, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, path, urlLabelValue), func() float64 {
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, path, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
c := newClient(argIdx, remoteWriteURL, urlLabelValue, fq, *queues)
|
||||
c := newClient(argIdx, remoteWriteURL, sanitizedURL, fq, *queues)
|
||||
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
|
||||
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
|
||||
pss := make([]*pendingSeries, *queues)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock)
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, sf, rd)
|
||||
}
|
||||
return &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
@@ -188,7 +217,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL string, maxInmemoryBlocks int,
|
||||
c: c,
|
||||
pss: pss,
|
||||
|
||||
relabelMetricsDropped: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, path, urlLabelValue)),
|
||||
relabelMetricsDropped: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, path, sanitizedURL)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,15 +237,17 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
prcs := rcs.perURL[rwctx.idx]
|
||||
if len(prcs) > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467 for details.
|
||||
rwctx.tss = append(rwctx.tss[:0], tss...)
|
||||
tss = rwctx.tss
|
||||
rctx = getRelabelCtx()
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/599
|
||||
v = tssRelabelPool.Get().(*[]prompbmarshal.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
tssLen := len(tss)
|
||||
tss = rctx.applyRelabeling(tss, nil, prcs)
|
||||
rwctx.relabelMetricsDropped.Add(tssLen - len(tss))
|
||||
@@ -225,8 +256,15 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssRelabelPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
// Zero rwctx.tss in order to free up GC references.
|
||||
rwctx.tss = prompbmarshal.ResetTimeSeries(rwctx.tss)
|
||||
}
|
||||
}
|
||||
|
||||
var tssRelabelPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
a := []prompbmarshal.TimeSeries{}
|
||||
return &a
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,20 +1,24 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func statDial(addr string) (conn net.Conn, err error) {
|
||||
if netutil.TCP6Enabled() {
|
||||
conn, err = fasthttp.DialDualStack(addr)
|
||||
} else {
|
||||
conn, err = fasthttp.Dial(addr)
|
||||
func statDial(network, addr string) (conn net.Conn, err error) {
|
||||
if !strings.HasPrefix(network, "tcp") {
|
||||
return nil, fmt.Errorf("unexpected network passed to statDial: %q; it must start from `tcp`", network)
|
||||
}
|
||||
if !netutil.TCP6Enabled() {
|
||||
network = "tcp4"
|
||||
}
|
||||
conn, err = net.DialTimeout(network, addr, 5*time.Second)
|
||||
dialsTotal.Inc()
|
||||
if err != nil {
|
||||
dialErrors.Inc()
|
||||
|
||||
@@ -6,7 +6,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -21,12 +23,18 @@ var (
|
||||
//
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
|
||||
func InsertHandler(req *http.Request) error {
|
||||
extraLabels, err := parserCommon.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, insertRows)
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -36,6 +44,7 @@ func insertRows(rows []parser.Row) error {
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
rowsTotal += len(r.Values)
|
||||
labelsLen := len(labels)
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
@@ -44,9 +53,12 @@ func insertRows(rows []parser.Row) error {
|
||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
values := r.Values
|
||||
timestamps := r.Timestamps
|
||||
_ = timestamps[len(values)-1]
|
||||
if len(timestamps) != len(values) {
|
||||
logger.Panicf("BUG: len(timestamps)=%d must match len(values)=%d", len(timestamps), len(values))
|
||||
}
|
||||
samplesLen := len(samples)
|
||||
for j, value := range values {
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
@@ -58,7 +70,6 @@ func insertRows(rows []parser.Row) error {
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
rowsTotal += len(values)
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
|
||||
@@ -61,24 +61,30 @@ run-vmalert: vmalert
|
||||
./bin/vmalert -rule=app/vmalert/config/testdata/rules2-good.rules \
|
||||
-datasource.url=http://localhost:8428 \
|
||||
-notifier.url=http://localhost:9093 \
|
||||
-notifier.url=http://127.0.0.1:9093 \
|
||||
-remoteWrite.url=http://localhost:8428 \
|
||||
-remoteRead.url=http://localhost:8428 \
|
||||
-external.label=cluster=east-1 \
|
||||
-external.label=replica=a \
|
||||
-evaluationInterval=3s
|
||||
|
||||
vmalert-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-amd64 ./app/vmalert
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm64 ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-ppc64le ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-386 ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-local-with-goarch:
|
||||
APP_NAME=vmalert $(MAKE) app-local-with-goarch
|
||||
|
||||
vmalert-pure:
|
||||
APP_NAME=vmalert $(MAKE) app-local-pure
|
||||
|
||||
@@ -6,11 +6,13 @@ rules against configured address.
|
||||
|
||||
### Features:
|
||||
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
|
||||
* VictoriaMetrics [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL)
|
||||
* VictoriaMetrics [MetricsQL](https://victoriametrics.github.io/MetricsQL.html)
|
||||
support and expressions validation;
|
||||
* Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
|
||||
support;
|
||||
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager);
|
||||
* Keeps the alerts [state on restarts](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmalert#alerts-state-on-restarts);
|
||||
* Graphite datasource can be used for alerting and recording rules. See [these docs](#graphite) for details.
|
||||
* Lightweight without extra dependencies.
|
||||
|
||||
### Limitations:
|
||||
@@ -20,7 +22,6 @@ may fail;
|
||||
* by default, rules execution is sequential within one group, but persisting of execution results to remote
|
||||
storage is asynchronous. Hence, user shouldn't rely on recording rules chaining when result of previous
|
||||
recording rule is reused in next one;
|
||||
* there is no `query` function support in templates yet;
|
||||
* `vmalert` has no UI, just an API for getting groups and rules statuses.
|
||||
|
||||
### QuickStart
|
||||
@@ -44,10 +45,19 @@ compatible storage address for storing recording rules results and alerts state
|
||||
Then configure `vmalert` accordingly:
|
||||
```
|
||||
./bin/vmalert -rule=alert.rules \
|
||||
-datasource.url=http://localhost:8428 \
|
||||
-notifier.url=http://localhost:9093
|
||||
-datasource.url=http://localhost:8428 \ # PromQL compatible datasource
|
||||
-notifier.url=http://localhost:9093 \ # AlertManager URL
|
||||
-notifier.url=http://127.0.0.1:9093 \ # AlertManager replica URL
|
||||
-remoteWrite.url=http://localhost:8428 \ # remote write compatible storage to persist rules
|
||||
-remoteRead.url=http://localhost:8428 \ # PromQL compatible datasource to restore alerts state from
|
||||
-external.label=cluster=east-1 \ # External label to be applied for each rule
|
||||
-external.label=replica=a \ # Multiple external labels may be set
|
||||
-evaluationInterval=3s # Default evaluation interval if not specified in rules group
|
||||
```
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
similar to Prometheus rules and configured using YAML. Configuration examples may be found
|
||||
@@ -80,7 +90,7 @@ rules:
|
||||
|
||||
There are two types of Rules:
|
||||
* [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) -
|
||||
Alerting rules allows to define alert conditions via [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL)
|
||||
Alerting rules allows to define alert conditions via [MetricsQL](https://victoriametrics.github.io/MetricsQL.html)
|
||||
and to send notifications about firing alerts to [Alertmanager](https://github.com/prometheus/alertmanager).
|
||||
* [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) -
|
||||
Recording rules allow you to precompute frequently needed or computationally expensive expressions
|
||||
@@ -96,7 +106,13 @@ The syntax for alerting rule is following:
|
||||
# The name of the alert. Must be a valid metric name.
|
||||
alert: <string>
|
||||
|
||||
# The MetricsQL expression to evaluate.
|
||||
# Optional type for the rule. Supported values: "graphite", "prometheus".
|
||||
# By default "prometheus" rule type is used.
|
||||
[ type: <string> ]
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default MetricsQL expression is used. If type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
expr: <string>
|
||||
|
||||
# Alerts are considered firing once they have been returned for this long.
|
||||
@@ -112,14 +128,6 @@ annotations:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
```
|
||||
|
||||
`vmalert` has no local storage and alerts state is stored in process memory. Hence, after reloading of `vmalert` process
|
||||
alerts state will be lost. To avoid this situation, `vmalert` may be configured via following flags:
|
||||
* `-remoteWrite.url` - URL to Victoria Metrics or VMInsert. `vmalert` will persist alerts state into the configured
|
||||
address in form of timeseries with name `ALERTS` via remote-write protocol.
|
||||
* `-remoteRead.url` - URL to Victoria Metrics or VMSelect. `vmalert` will try to restore alerts state from configured
|
||||
address by querying `ALERTS` timeseries.
|
||||
|
||||
|
||||
##### Recording rules
|
||||
|
||||
The syntax for recording rules is following:
|
||||
@@ -127,7 +135,13 @@ The syntax for recording rules is following:
|
||||
# The name of the time series to output to. Must be a valid metric name.
|
||||
record: <string>
|
||||
|
||||
# The MetricsQL expression to evaluate.
|
||||
# Optional type for the rule. Supported values: "graphite", "prometheus".
|
||||
# By default "prometheus" rule type is used.
|
||||
[ type: <string> ]
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default MetricsQL expression is used. If type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
expr: <string>
|
||||
|
||||
# Labels to add or overwrite before storing the result.
|
||||
@@ -138,6 +152,22 @@ labels:
|
||||
For recording rules to work `-remoteWrite.url` must specified.
|
||||
|
||||
|
||||
#### Alerts state on restarts
|
||||
|
||||
`vmalert` has no local storage, so alerts state is stored in the process memory. Hence, after reloading of `vmalert`
|
||||
the process alerts state will be lost. To avoid this situation, `vmalert` should be configured via the following flags:
|
||||
* `-remoteWrite.url` - URL to VictoriaMetrics (Single) or VMInsert (Cluster). `vmalert` will persist alerts state
|
||||
into the configured address in the form of time series named `ALERTS` and `ALERTS_FOR_STATE` via remote-write protocol.
|
||||
These are regular time series and may be queried from VM just as any other time series.
|
||||
The state stored to the configured address on every rule evaluation.
|
||||
* `-remoteRead.url` - URL to VictoriaMetrics (Single) or VMSelect (Cluster). `vmalert` will try to restore alerts state
|
||||
from configured address by querying time series with name `ALERTS_FOR_STATE`.
|
||||
|
||||
Both flags are required for the proper state restoring. Restore process may fail if time series are missing
|
||||
in configured `-remoteRead.url`, weren't updated in the last `1h` or received state doesn't match current `vmalert`
|
||||
rules configuration.
|
||||
|
||||
|
||||
#### WEB
|
||||
|
||||
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
@@ -149,98 +179,181 @@ Used as alert source in AlertManager.
|
||||
* `http://<vmalert-addr>/-/reload` - hot configuration reload.
|
||||
|
||||
|
||||
### Graphite
|
||||
|
||||
vmalert sends requests to `<-datasource.url>/render?format=json` during evaluation of alerting and recording rules
|
||||
if the corresponding rule contains `type: "graphite"` config option. It is expected that the `<-datasource.url>/render`
|
||||
implements [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) for `format=json`.
|
||||
|
||||
|
||||
### Configuration
|
||||
|
||||
The shortlist of configuration flags is the following:
|
||||
```
|
||||
Usage of vmalert:
|
||||
-datasource.basicAuth.password string
|
||||
Optional basic auth password for -datasource.url
|
||||
Optional basic auth password for -datasource.url
|
||||
-datasource.basicAuth.username string
|
||||
Optional basic auth username for -datasource.url
|
||||
-datasource.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default system CA is used.
|
||||
-datasource.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -datasource.url.
|
||||
Optional basic auth username for -datasource.url
|
||||
-datasource.lookback duration
|
||||
Lookback defines how far to look into past when evaluating queries. For example, if datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
|
||||
-datasource.maxIdleConnections int
|
||||
Defines the number of idle (keep-alive connections) to configured datasource.Consider to set this value equal to the value: groups_total * group.concurrency. Too low value may result into high number of sockets in TIME_WAIT state. (default 100)
|
||||
-datasource.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default system CA is used
|
||||
-datasource.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -datasource.url
|
||||
-datasource.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -datasource.url
|
||||
-datasource.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -datasource.url.
|
||||
-datasource.tlsServerName value
|
||||
Optional TLS server name to use for connections to -datasource.url. By default the server name from -datasource.url is used.
|
||||
Whether to skip tls verification when connecting to -datasource.url
|
||||
-datasource.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -datasource.url
|
||||
-datasource.tlsServerName string
|
||||
Optional TLS server name to use for connections to -datasource.url. By default the server name from -datasource.url is used
|
||||
-datasource.url string
|
||||
Victoria Metrics or VMSelect url. Required parameter. E.g. http://127.0.0.1:8428
|
||||
Victoria Metrics or VMSelect url. Required parameter. E.g. http://127.0.0.1:8428
|
||||
-dryRun -rule
|
||||
Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-evaluationInterval duration
|
||||
How often to evaluate the rules (default 1m0s)
|
||||
How often to evaluate the rules (default 1m0s)
|
||||
-external.alert.source string
|
||||
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used
|
||||
-external.label array
|
||||
Optional label in the form 'name=value' to add to all generated recording rules and alerts. Pass multiple -label flags in order to add multiple label sets.
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-external.url string
|
||||
External URL is used as alert's source for sent alerts to the notifier
|
||||
External URL is used as alert's source for sent alerts to the notifier
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help spreading incoming load among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for graceful shutdown of HTTP server. Highly loaded server may require increased value for graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this dealy the servier returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8880")
|
||||
Address to listen for http connections (default ":8880")
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit
|
||||
-memory.allowedBytes value
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-notifier.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -notifier.url. By default system CA is used.
|
||||
-notifier.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -notifier.url.
|
||||
-notifier.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -notifier.url
|
||||
-notifier.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -notifier.url.
|
||||
-notifier.tlsServerName value
|
||||
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used.
|
||||
-notifier.url string
|
||||
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-notifier.basicAuth.password array
|
||||
Optional basic auth password for -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.basicAuth.username array
|
||||
Optional basic auth username for -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to -notifier.url. By default system CA is used
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsInsecureSkipVerify array
|
||||
Whether to skip tls verification when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsKeyFile array
|
||||
Optional path to client-side TLS certificate key to use when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsServerName array
|
||||
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.url array
|
||||
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof. It overrides httpAuth settings
|
||||
-remoteRead.basicAuth.password string
|
||||
Optional basic auth password for -remoteRead.url
|
||||
Optional basic auth password for -remoteRead.url
|
||||
-remoteRead.basicAuth.username string
|
||||
Optional basic auth username for -remoteRead.url
|
||||
Optional basic auth username for -remoteRead.url
|
||||
-remoteRead.lookback duration
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteRead.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteRead.url. By default system CA is used.
|
||||
-remoteRead.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url.
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteRead.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteRead.url. By default system CA is used
|
||||
-remoteRead.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url
|
||||
-remoteRead.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -remoteRead.url
|
||||
-remoteRead.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url.
|
||||
-remoteRead.tlsServerName value
|
||||
Optional TLS server name to use for connections to -remoteRead.url. By default the server name from -remoteRead.url is used.
|
||||
Whether to skip tls verification when connecting to -remoteRead.url
|
||||
-remoteRead.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url
|
||||
-remoteRead.tlsServerName string
|
||||
Optional TLS server name to use for connections to -remoteRead.url. By default the server name from -remoteRead.url is used
|
||||
-remoteRead.url vmalert
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
-remoteWrite.basicAuth.password string
|
||||
Optional basic auth password for -remoteWrite.url
|
||||
Optional basic auth password for -remoteWrite.url
|
||||
-remoteWrite.basicAuth.username string
|
||||
Optional basic auth username for -remoteWrite.url
|
||||
Optional basic auth username for -remoteWrite.url
|
||||
-remoteWrite.concurrency int
|
||||
Defines number of readers that concurrently write into remote storage (default 1)
|
||||
Defines number of writers for concurrent writing into remote querier (default 1)
|
||||
-remoteWrite.flushInterval duration
|
||||
Defines interval of flushes to remote write endpoint (default 5s)
|
||||
-remoteWrite.maxBatchSize int
|
||||
Defines defines max number of timeseries to be flushed at once (default 1000)
|
||||
Defines defines max number of timeseries to be flushed at once (default 1000)
|
||||
-remoteWrite.maxQueueSize int
|
||||
Defines the max number of pending datapoints to remote write endpoint (default 100000)
|
||||
-remoteWrite.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default system CA is used.
|
||||
-remoteWrite.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url.
|
||||
Defines the max number of pending datapoints to remote write endpoint (default 100000)
|
||||
-remoteWrite.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default system CA is used
|
||||
-remoteWrite.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url.
|
||||
-remoteWrite.tlsServerName value
|
||||
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used.
|
||||
Whether to skip tls verification when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsServerName string
|
||||
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used
|
||||
-remoteWrite.url string
|
||||
Optional URL to Victoria Metrics or VMInsert where to persist alerts state in form of timeseries. E.g. http://127.0.0.1:8428
|
||||
-rule value
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule /path/to/file. Path to a single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Optional URL to Victoria Metrics or VMInsert where to persist alerts state and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428
|
||||
-rule array
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-rule.validateExpressions
|
||||
Whether to validate rules expressions via MetricsQL engine (default true)
|
||||
Whether to validate rules expressions via MetricsQL engine (default true)
|
||||
-rule.validateTemplates
|
||||
Whether to validate annotation and label templates (default true)
|
||||
Whether to validate annotation and label templates (default true)
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
Pass `-help` to `vmalert` in order to see the full list of supported
|
||||
@@ -273,3 +386,20 @@ It is recommended using
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-prod` from the root folder of the repository.
|
||||
It builds `vmalert-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
|
||||
#### ARM build
|
||||
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
#### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmalert-arm` or `make vmalert-arm64` from the root folder of the repository.
|
||||
It builds `vmalert-arm` or `vmalert-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
#### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-arm-prod` or `make vmalert-arm64-prod` from the root folder of the repository.
|
||||
It builds `vmalert-arm-prod` or `vmalert-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
@@ -14,10 +14,12 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// AlertingRule is basic alert entity
|
||||
type AlertingRule struct {
|
||||
Type datasource.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
@@ -25,6 +27,7 @@ type AlertingRule struct {
|
||||
Labels map[string]string
|
||||
Annotations map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
|
||||
// guard status fields
|
||||
mu sync.RWMutex
|
||||
@@ -36,19 +39,73 @@ type AlertingRule struct {
|
||||
// resets on every successful Exec
|
||||
// may be used as Health state
|
||||
lastExecError error
|
||||
|
||||
metrics *alertingRuleMetrics
|
||||
}
|
||||
|
||||
func newAlertingRule(gID uint64, cfg config.Rule) *AlertingRule {
|
||||
return &AlertingRule{
|
||||
type alertingRuleMetrics struct {
|
||||
errors *gauge
|
||||
pending *gauge
|
||||
active *gauge
|
||||
}
|
||||
|
||||
func newAlertingRule(group *Group, cfg config.Rule) *AlertingRule {
|
||||
ar := &AlertingRule{
|
||||
Type: cfg.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Alert,
|
||||
Expr: cfg.Expr,
|
||||
For: cfg.For,
|
||||
For: cfg.For.Duration(),
|
||||
Labels: cfg.Labels,
|
||||
Annotations: cfg.Annotations,
|
||||
GroupID: gID,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
metrics: &alertingRuleMetrics{},
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = getOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StatePending {
|
||||
num++
|
||||
}
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.active = getOrCreateGauge(fmt.Sprintf(`vmalert_alerts_firing{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateFiring {
|
||||
num++
|
||||
}
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.errors = getOrCreateGauge(fmt.Sprintf(`vmalert_alerts_error{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
if ar.lastExecError == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
return ar
|
||||
}
|
||||
|
||||
// Close unregisters rule metrics
|
||||
func (ar *AlertingRule) Close() {
|
||||
metrics.UnregisterMetric(ar.metrics.active.name)
|
||||
metrics.UnregisterMetric(ar.metrics.pending.name)
|
||||
metrics.UnregisterMetric(ar.metrics.errors.name)
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
@@ -65,14 +122,14 @@ func (ar *AlertingRule) ID() uint64 {
|
||||
// Exec executes AlertingRule expression via the given Querier.
|
||||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||
func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series bool) ([]prompbmarshal.TimeSeries, error) {
|
||||
qMetrics, err := q.Query(ctx, ar.Expr)
|
||||
qMetrics, err := q.Query(ctx, ar.Expr, ar.Type)
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
|
||||
ar.lastExecError = err
|
||||
ar.lastExecTime = time.Now()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute query %q: %s", ar.Expr, err)
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||
}
|
||||
|
||||
for h, a := range ar.alerts {
|
||||
@@ -82,28 +139,44 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
||||
}
|
||||
}
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query, ar.Type) }
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
// extra labels could contain templates, so we expand them first
|
||||
labels, err := expandLabels(m, qFn, ar)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
for k, v := range labels {
|
||||
// apply extra labels to datasource
|
||||
// so the hash key will be consistent on restore
|
||||
m.SetLabel(k, v)
|
||||
}
|
||||
h := hash(m)
|
||||
if _, ok := updated[h]; ok {
|
||||
// duplicate may be caused by extra labels
|
||||
// conflicting with the metric labels
|
||||
return nil, fmt.Errorf("labels %v: %w", m.Labels, errDuplicate)
|
||||
}
|
||||
updated[h] = struct{}{}
|
||||
if a, ok := ar.alerts[h]; ok {
|
||||
if a.Value != m.Value {
|
||||
// update Value field with latest value
|
||||
a.Value = m.Value
|
||||
// and re-exec template since Value can be used
|
||||
// in templates
|
||||
err = ar.template(a)
|
||||
// in annotations
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ar.Annotations)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
a, err := ar.newAlert(m, ar.lastExecTime)
|
||||
a, err := ar.newAlert(m, ar.lastExecTime, qFn)
|
||||
if err != nil {
|
||||
ar.lastExecError = err
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
a.ID = h
|
||||
a.State = notifier.StatePending
|
||||
@@ -134,6 +207,19 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func expandLabels(m datasource.Metric, q notifier.QueryFn, ar *AlertingRule) (map[string]string, error) {
|
||||
metricLabels := make(map[string]string)
|
||||
for _, l := range m.Labels {
|
||||
metricLabels[l.Name] = l.Value
|
||||
}
|
||||
tpl := notifier.AlertTplData{
|
||||
Labels: metricLabels,
|
||||
Value: m.Value,
|
||||
Expr: ar.Expr,
|
||||
}
|
||||
return notifier.ExecTemplate(q, ar.Labels, tpl)
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) toTimeSeries(timestamp time.Time) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
for _, a := range ar.alerts {
|
||||
@@ -180,7 +266,7 @@ func hash(m datasource.Metric) uint64 {
|
||||
return hash.Sum64()
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifier.Alert, error) {
|
||||
func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time, qFn notifier.QueryFn) (*notifier.Alert, error) {
|
||||
a := ¬ifier.Alert{
|
||||
GroupID: ar.GroupID,
|
||||
Name: ar.Name,
|
||||
@@ -189,6 +275,9 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifie
|
||||
Start: start,
|
||||
Expr: ar.Expr,
|
||||
}
|
||||
// label defined here to make override possible by
|
||||
// time series labels.
|
||||
a.Labels[alertGroupNameLabel] = ar.GroupName
|
||||
for _, l := range m.Labels {
|
||||
// drop __name__ to be consistent with Prometheus alerting
|
||||
if l.Name == "__name__" {
|
||||
@@ -196,31 +285,9 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifie
|
||||
}
|
||||
a.Labels[l.Name] = l.Value
|
||||
}
|
||||
return a, ar.template(a)
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) template(a *notifier.Alert) error {
|
||||
// 1. template rule labels with data labels
|
||||
rLabels, err := a.ExecTemplate(ar.Labels)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 2. merge data labels and rule labels
|
||||
// metric labels may be overridden by
|
||||
// rule labels
|
||||
for k, v := range rLabels {
|
||||
a.Labels[k] = v
|
||||
}
|
||||
|
||||
// 3. template merged labels
|
||||
a.Labels, err = a.ExecTemplate(a.Labels)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
a.Annotations, err = a.ExecTemplate(ar.Annotations)
|
||||
return err
|
||||
var err error
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ar.Annotations)
|
||||
return a, err
|
||||
}
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
||||
@@ -245,6 +312,7 @@ func (ar *AlertingRule) RuleAPI() APIAlertingRule {
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
Type: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Expression: ar.Expr,
|
||||
For: ar.For.String(),
|
||||
@@ -283,15 +351,18 @@ func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
|
||||
}
|
||||
|
||||
const (
|
||||
// AlertMetricName is the metric name for synthetic alert timeseries.
|
||||
// alertMetricName is the metric name for synthetic alert timeseries.
|
||||
alertMetricName = "ALERTS"
|
||||
// AlertForStateMetricName is the metric name for 'for' state of alert.
|
||||
// alertForStateMetricName is the metric name for 'for' state of alert.
|
||||
alertForStateMetricName = "ALERTS_FOR_STATE"
|
||||
|
||||
// AlertNameLabel is the label name indicating the name of an alert.
|
||||
// alertNameLabel is the label name indicating the name of an alert.
|
||||
alertNameLabel = "alertname"
|
||||
// AlertStateLabel is the label name indicating the state of an alert.
|
||||
// alertStateLabel is the label name indicating the state of an alert.
|
||||
alertStateLabel = "alertstate"
|
||||
|
||||
// alertGroupNameLabel defines the label name attached for generated time series.
|
||||
alertGroupNameLabel = "alertgroup"
|
||||
)
|
||||
|
||||
// alertToTimeSeries converts the given alert with the given timestamp to timeseries
|
||||
@@ -331,16 +402,25 @@ func alertForToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) p
|
||||
// Restore restores only Start field. Field State will be always Pending and supposed
|
||||
// to be updated on next Exec, as well as Value field.
|
||||
// Only rules with For > 0 will be restored.
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration, labels map[string]string) error {
|
||||
if q == nil {
|
||||
return fmt.Errorf("querier is nil")
|
||||
}
|
||||
// Get the last datapoint in range via MetricsQL `last_over_time`.
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query, ar.Type) }
|
||||
|
||||
// account for external labels in filter
|
||||
var labelsFilter string
|
||||
for k, v := range labels {
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
|
||||
// Get the last data point in range via MetricsQL `last_over_time`.
|
||||
// We don't use plain PromQL since Prometheus doesn't support
|
||||
// remote write protocol which is used for state persistence in vmalert.
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q}[%ds])",
|
||||
alertForStateMetricName, ar.Name, int(lookback.Seconds()))
|
||||
qMetrics, err := q.Query(ctx, expr)
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q%s}[%ds])",
|
||||
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
|
||||
qMetrics, err := q.Query(ctx, expr, ar.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -349,26 +429,22 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||
labels := m.Labels
|
||||
m.Labels = make([]datasource.Label, 0)
|
||||
// drop all extra labels, so hash key will
|
||||
// be identical to timeseries received in Exec
|
||||
// be identical to time series received in Exec
|
||||
for _, l := range labels {
|
||||
if l.Name == alertNameLabel {
|
||||
continue
|
||||
}
|
||||
// drop all overridden labels
|
||||
if _, ok := ar.Labels[l.Name]; ok {
|
||||
if l.Name == alertNameLabel || l.Name == alertGroupNameLabel {
|
||||
continue
|
||||
}
|
||||
m.Labels = append(m.Labels, l)
|
||||
}
|
||||
|
||||
a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0))
|
||||
a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0), qFn)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create alert: %s", err)
|
||||
return fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
a.ID = hash(m)
|
||||
a.State = notifier.StatePending
|
||||
ar.alerts[a.ID] = a
|
||||
logger.Infof("alert %q(%d) restored to state at %v", a.Name, a.ID, a.Start)
|
||||
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.Start)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,6 +2,9 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -218,19 +221,6 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||
hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("duplicate", 0),
|
||||
[][]datasource.Metric{
|
||||
{
|
||||
// metrics with the same labelset should result in one alert
|
||||
metricWithLabels(t, "name", "foo", "type", "bar"),
|
||||
metricWithLabels(t, "type", "bar", "name", "foo"),
|
||||
},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "name", "foo", "type", "bar")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending", time.Minute),
|
||||
[][]datasource.Metric{
|
||||
@@ -355,6 +345,7 @@ func TestAlertingRule_Restore(t *testing.T) {
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
alertNameLabel, "",
|
||||
alertGroupNameLabel, "groupID",
|
||||
"foo", "bar",
|
||||
"namespace", "baz",
|
||||
),
|
||||
@@ -375,7 +366,7 @@ func TestAlertingRule_Restore(t *testing.T) {
|
||||
alertNameLabel, "",
|
||||
"foo", "bar",
|
||||
"namespace", "baz",
|
||||
// following pair supposed to be dropped
|
||||
// extra labels set by rule
|
||||
"source", "vm",
|
||||
),
|
||||
},
|
||||
@@ -383,6 +374,7 @@ func TestAlertingRule_Restore(t *testing.T) {
|
||||
hash(metricWithLabels(t,
|
||||
"foo", "bar",
|
||||
"namespace", "baz",
|
||||
"source", "vm",
|
||||
)): {State: notifier.StatePending,
|
||||
Start: time.Now().Truncate(time.Hour)},
|
||||
},
|
||||
@@ -419,7 +411,7 @@ func TestAlertingRule_Restore(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
fq.add(tc.metrics...)
|
||||
if err := tc.rule.Restore(context.TODO(), fq, time.Hour); err != nil {
|
||||
if err := tc.rule.Restore(context.TODO(), fq, time.Hour, nil); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
||||
@@ -441,6 +433,138 @@ func TestAlertingRule_Restore(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
ar := newTestAlertingRule("test", 0)
|
||||
ar.Labels = map[string]string{"job": "test"}
|
||||
|
||||
// successful attempt
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
_, err := ar.Exec(context.TODO(), fq, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// label `job` will collide with rule extra label and will make both time series equal
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||
_, err = ar.Exec(context.TODO(), fq, false)
|
||||
if !errors.Is(err, errDuplicate) {
|
||||
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
||||
}
|
||||
|
||||
fq.reset()
|
||||
|
||||
expErr := "connection reset by peer"
|
||||
fq.setErr(errors.New(expErr))
|
||||
_, err = ar.Exec(context.TODO(), fq, false)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to get err; got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), expErr) {
|
||||
t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_Template(t *testing.T) {
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
metrics []datasource.Metric
|
||||
expAlerts map[uint64]*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestRuleWithLabels("common", "region", "east"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "region", "east", "instance", "foo")): {
|
||||
Annotations: map[string]string{},
|
||||
Labels: map[string]string{
|
||||
alertGroupNameLabel: "",
|
||||
"region": "east",
|
||||
"instance": "foo",
|
||||
},
|
||||
},
|
||||
hash(metricWithLabels(t, "region", "east", "instance", "bar")): {
|
||||
Annotations: map[string]string{},
|
||||
Labels: map[string]string{
|
||||
alertGroupNameLabel: "",
|
||||
"region": "east",
|
||||
"instance": "bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
&AlertingRule{
|
||||
Name: "override label",
|
||||
Labels: map[string]string{
|
||||
"instance": "{{ $labels.instance }}",
|
||||
"region": "east",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Too high connection number for "{{ $labels.instance }}" for region {{ $labels.region }}`,
|
||||
"description": `It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "instance", "foo"),
|
||||
metricWithValueAndLabels(t, 10, "instance", "bar"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "region", "east", "instance", "foo")): {
|
||||
Labels: map[string]string{
|
||||
alertGroupNameLabel: "",
|
||||
"instance": "foo",
|
||||
"region": "east",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Too high connection number for "foo" for region east`,
|
||||
"description": `It is 2 connections for "foo"`,
|
||||
},
|
||||
},
|
||||
hash(metricWithLabels(t, "region", "east", "instance", "bar")): {
|
||||
Labels: map[string]string{
|
||||
alertGroupNameLabel: "",
|
||||
"instance": "bar",
|
||||
"region": "east",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Too high connection number for "bar" for region east`,
|
||||
"description": `It is 10 connections for "bar"`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
fq.add(tc.metrics...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), fq, false); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
for hash, expAlert := range tc.expAlerts {
|
||||
gotAlert := tc.rule.alerts[hash]
|
||||
if gotAlert == nil {
|
||||
t.Fatalf("alert %d is missing; labels: %v; annotations: %v",
|
||||
hash, expAlert.Labels, expAlert.Annotations)
|
||||
}
|
||||
if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
|
||||
t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
|
||||
}
|
||||
if !reflect.DeepEqual(expAlert.Labels, gotAlert.Labels) {
|
||||
t.Fatalf("expected to have labels %#v; got %#v", expAlert.Labels, gotAlert.Labels)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
|
||||
r := newTestAlertingRule(name, 0)
|
||||
r.Labels = make(map[string]string)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io/ioutil"
|
||||
@@ -9,7 +10,12 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
@@ -17,16 +23,49 @@ import (
|
||||
// Group contains list of Rules grouped into
|
||||
// entity with one name and evaluation interval
|
||||
type Group struct {
|
||||
Type datasource.Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval time.Duration `yaml:"interval,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
// Checksum stores the hash of yaml definition for this group.
|
||||
// May be used to detect any changes like rules re-ordering etc.
|
||||
Checksum string
|
||||
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type group Group
|
||||
if err := unmarshal((*group)(g)); err != nil {
|
||||
return err
|
||||
}
|
||||
b, err := yaml.Marshal(g)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal group configuration for checksum: %w", err)
|
||||
}
|
||||
// change default value to prometheus datasource.
|
||||
if g.Type.Get() == "" {
|
||||
g.Type.Set(datasource.NewPrometheusType())
|
||||
}
|
||||
// update rules with empty type.
|
||||
for i, r := range g.Rules {
|
||||
if r.Type.Get() == "" {
|
||||
r.Type.Set(g.Type)
|
||||
r.ID = HashRule(r)
|
||||
g.Rules[i] = r
|
||||
}
|
||||
}
|
||||
|
||||
h := md5.New()
|
||||
h.Write(b)
|
||||
g.Checksum = fmt.Sprintf("%x", h.Sum(nil))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate check for internal Group or Rule configuration errors
|
||||
func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
if g.Name == "" {
|
||||
@@ -35,6 +74,7 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
if len(g.Rules) == 0 {
|
||||
return fmt.Errorf("group %q can't contain no rules", g.Name)
|
||||
}
|
||||
|
||||
uniqueRules := map[uint64]struct{}{}
|
||||
for _, r := range g.Rules {
|
||||
ruleName := r.Record
|
||||
@@ -46,19 +86,25 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
}
|
||||
uniqueRules[r.ID] = struct{}{}
|
||||
if err := r.Validate(); err != nil {
|
||||
return fmt.Errorf("invalid rule %q.%q: %s", g.Name, ruleName, err)
|
||||
return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if validateExpressions {
|
||||
if _, err := metricsql.Parse(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q.%q: %s", g.Name, ruleName, err)
|
||||
// its needed only for tests.
|
||||
// because correct types must be inherited after unmarshalling.
|
||||
exprValidator := g.Type.ValidateExpr
|
||||
if r.Type.Get() != "" {
|
||||
exprValidator = r.Type.ValidateExpr
|
||||
}
|
||||
if err := exprValidator(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
if validateAnnotations {
|
||||
if err := notifier.ValidateTemplates(r.Annotations); err != nil {
|
||||
return fmt.Errorf("invalid annotations for rule %q.%q: %s", g.Name, ruleName, err)
|
||||
return fmt.Errorf("invalid annotations for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if err := notifier.ValidateTemplates(r.Labels); err != nil {
|
||||
return fmt.Errorf("invalid labels for rule %q.%q: %s", g.Name, ruleName, err)
|
||||
return fmt.Errorf("invalid labels for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -69,10 +115,11 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
// recording rule or alerting rule.
|
||||
type Rule struct {
|
||||
ID uint64
|
||||
Type datasource.Type `yaml:"type,omitempty"`
|
||||
Record string `yaml:"record,omitempty"`
|
||||
Alert string `yaml:"alert,omitempty"`
|
||||
Expr string `yaml:"expr"`
|
||||
For time.Duration `yaml:"for,omitempty"`
|
||||
For PromDuration `yaml:"for"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
|
||||
@@ -80,6 +127,42 @@ type Rule struct {
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
|
||||
// PromDuration is Prometheus duration.
|
||||
type PromDuration struct {
|
||||
milliseconds int64
|
||||
}
|
||||
|
||||
// NewPromDuration returns PromDuration for given d.
|
||||
func NewPromDuration(d time.Duration) PromDuration {
|
||||
return PromDuration{
|
||||
milliseconds: d.Milliseconds(),
|
||||
}
|
||||
}
|
||||
|
||||
// MarshalYAML implements yaml.Marshaler interface.
|
||||
func (pd PromDuration) MarshalYAML() (interface{}, error) {
|
||||
return pd.Duration().String(), nil
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements yaml.Unmarshaler interface.
|
||||
func (pd *PromDuration) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
ms, err := metricsql.DurationValue(s, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pd.milliseconds = ms
|
||||
return nil
|
||||
}
|
||||
|
||||
// Duration returns duration for pd.
|
||||
func (pd *PromDuration) Duration() time.Duration {
|
||||
return time.Duration(pd.milliseconds) * time.Millisecond
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (r *Rule) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type rule Rule
|
||||
@@ -90,8 +173,16 @@ func (r *Rule) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Name returns Rule name according to its type
|
||||
func (r *Rule) Name() string {
|
||||
if r.Record != "" {
|
||||
return r.Record
|
||||
}
|
||||
return r.Alert
|
||||
}
|
||||
|
||||
// HashRule hashes significant Rule fields into
|
||||
// unique hash value
|
||||
// unique hash that supposed to define Rule uniqueness
|
||||
func HashRule(r Rule) uint64 {
|
||||
h := fnv.New64a()
|
||||
h.Write([]byte(r.Expr))
|
||||
@@ -102,16 +193,8 @@ func HashRule(r Rule) uint64 {
|
||||
h.Write([]byte("alerting"))
|
||||
h.Write([]byte(r.Alert))
|
||||
}
|
||||
type item struct {
|
||||
key, value string
|
||||
}
|
||||
var kv []item
|
||||
for k, v := range r.Labels {
|
||||
kv = append(kv, item{key: k, value: v})
|
||||
}
|
||||
sort.Slice(kv, func(i, j int) bool {
|
||||
return kv[i].key < kv[j].key
|
||||
})
|
||||
h.Write([]byte(r.Type.Get()))
|
||||
kv := sortMap(r.Labels)
|
||||
for _, i := range kv {
|
||||
h.Write([]byte(i.key))
|
||||
h.Write([]byte(i.value))
|
||||
@@ -137,31 +220,38 @@ func Parse(pathPatterns []string, validateAnnotations, validateExpressions bool)
|
||||
for _, pattern := range pathPatterns {
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading file pattern %s: %v", pattern, err)
|
||||
return nil, fmt.Errorf("error reading file pattern %s: %w", pattern, err)
|
||||
}
|
||||
fp = append(fp, matches...)
|
||||
}
|
||||
errGroup := new(utils.ErrGroup)
|
||||
var groups []Group
|
||||
for _, file := range fp {
|
||||
uniqueGroups := map[string]struct{}{}
|
||||
gr, err := parseFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse file %q: %w", file, err)
|
||||
errGroup.Add(fmt.Errorf("failed to parse file %q: %w", file, err))
|
||||
continue
|
||||
}
|
||||
for _, g := range gr {
|
||||
if err := g.Validate(validateAnnotations, validateExpressions); err != nil {
|
||||
return nil, fmt.Errorf("invalid group %q in file %q: %s", g.Name, file, err)
|
||||
errGroup.Add(fmt.Errorf("invalid group %q in file %q: %w", g.Name, file, err))
|
||||
continue
|
||||
}
|
||||
if _, ok := uniqueGroups[g.Name]; ok {
|
||||
return nil, fmt.Errorf("group name %q duplicate in file %q", g.Name, file)
|
||||
errGroup.Add(fmt.Errorf("group name %q duplicate in file %q", g.Name, file))
|
||||
continue
|
||||
}
|
||||
uniqueGroups[g.Name] = struct{}{}
|
||||
g.File = file
|
||||
groups = append(groups, g)
|
||||
}
|
||||
}
|
||||
if err := errGroup.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
return nil, fmt.Errorf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
logger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
}
|
||||
return groups, nil
|
||||
}
|
||||
@@ -171,6 +261,7 @@ func parseFile(path string) ([]Group, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading alert rule file: %w", err)
|
||||
}
|
||||
data = envtemplate.Replace(data)
|
||||
g := struct {
|
||||
Groups []Group `yaml:"groups"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
@@ -193,3 +284,18 @@ func checkOverflow(m map[string]interface{}, ctx string) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type item struct {
|
||||
key, value string
|
||||
}
|
||||
|
||||
func sortMap(m map[string]string) []item {
|
||||
var kv []item
|
||||
for k, v := range m {
|
||||
kv = append(kv, item{key: k, value: v})
|
||||
}
|
||||
sort.Slice(kv, func(i, j int) bool {
|
||||
return kv[i].key < kv[j].key
|
||||
})
|
||||
return kv
|
||||
}
|
||||
|
||||
@@ -7,6 +7,10 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
@@ -41,7 +45,7 @@ func TestParseBad(t *testing.T) {
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules2-bad.rules"},
|
||||
"function \"value\" not defined",
|
||||
"function \"unknown\" not defined",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules3-bad.rules"},
|
||||
@@ -52,8 +56,8 @@ func TestParseBad(t *testing.T) {
|
||||
"either `record` or `alert` must be set",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/*.yaml"},
|
||||
"no groups found",
|
||||
[]string{"testdata/rules1-bad.rules"},
|
||||
"bad graphite expr",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
@@ -70,13 +74,13 @@ func TestParseBad(t *testing.T) {
|
||||
|
||||
func TestRule_Validate(t *testing.T) {
|
||||
if err := (&Rule{}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty name error")
|
||||
t.Errorf("expected empty name error")
|
||||
}
|
||||
if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty expr error")
|
||||
t.Errorf("expected empty expr error")
|
||||
}
|
||||
if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
|
||||
t.Errorf("exptected valid rule; got %s", err)
|
||||
t.Errorf("expected valid rule; got %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,12 +144,14 @@ func TestGroup_Validate(t *testing.T) {
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
"summary": `
|
||||
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" "localhost" | query }}
|
||||
{{ . | first | value | humanize1024 }}B
|
||||
{{ end }}`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "error parsing annotation",
|
||||
validateAnnotations: true,
|
||||
},
|
||||
{
|
||||
@@ -215,6 +221,75 @@ func TestGroup_Validate(t *testing.T) {
|
||||
},
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test thanos",
|
||||
Type: datasource.NewRawType("thanos"),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "unknown datasource type",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test graphite",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "some-description",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test prometheus",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
},
|
||||
validateExpressions: true,
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "test graphite inherit",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
For: PromDuration{milliseconds: 10},
|
||||
},
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
group: &Group{
|
||||
Name: "test graphite prometheus bad expr",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
For: PromDuration{milliseconds: 10},
|
||||
},
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "invalid rule",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
err := tc.group.Validate(tc.validateAnnotations, tc.validateExpressions)
|
||||
@@ -273,7 +348,7 @@ func TestHashRule(t *testing.T) {
|
||||
true,
|
||||
},
|
||||
{
|
||||
Rule{Alert: "alert", Expr: "up == 1", For: time.Minute},
|
||||
Rule{Alert: "alert", Expr: "up == 1", For: NewPromDuration(time.Minute)},
|
||||
Rule{Alert: "alert", Expr: "up == 1"},
|
||||
true,
|
||||
},
|
||||
@@ -324,3 +399,57 @@ func TestHashRule(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroupChecksum(t *testing.T) {
|
||||
f := func(t *testing.T, data, newData string) {
|
||||
t.Helper()
|
||||
var g Group
|
||||
if err := yaml.Unmarshal([]byte(data), &g); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %s", err)
|
||||
}
|
||||
if g.Checksum == "" {
|
||||
t.Fatalf("expected to get non-empty checksum")
|
||||
}
|
||||
|
||||
var ng Group
|
||||
if err := yaml.Unmarshal([]byte(newData), &ng); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %s", err)
|
||||
}
|
||||
if g.Checksum == ng.Checksum {
|
||||
t.Fatalf("expected to get different checksums")
|
||||
}
|
||||
}
|
||||
t.Run("Ok", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
expr: sum by(job) (up == 1)
|
||||
- record: handler:requests:rate5m
|
||||
expr: sum(rate(prometheus_http_requests_total[5m])) by (handler)
|
||||
`, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- record: handler:requests:rate5m
|
||||
expr: sum(rate(prometheus_http_requests_total[5m])) by (handler)
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
expr: sum by(job) (up == 1)
|
||||
`)
|
||||
})
|
||||
|
||||
t.Run("Ok, `for` must change cs", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: ExampleAlertWithFor
|
||||
expr: sum by(job) (up == 1)
|
||||
for: 5m
|
||||
`, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: ExampleAlertWithFor
|
||||
expr: sum by(job) (up == 1)
|
||||
`)
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
13
app/vmalert/config/testdata/dir/rules-update0-good.rules
vendored
Normal file
13
app/vmalert/config/testdata/dir/rules-update0-good.rules
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
groups:
|
||||
- name: TestUpdateGroup
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
rules:
|
||||
- alert: up
|
||||
expr: up == 0
|
||||
for: 30s
|
||||
- alert: up graphite
|
||||
expr: filterSeries(time('host.1',20),'>','0')
|
||||
for: 30s
|
||||
type: graphite
|
||||
12
app/vmalert/config/testdata/dir/rules-update1-good.rules
vendored
Normal file
12
app/vmalert/config/testdata/dir/rules-update1-good.rules
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
groups:
|
||||
- name: TestUpdateGroup
|
||||
interval: 30s
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: up
|
||||
expr: filterSeries(time('host.2',20),'>','0')
|
||||
for: 30s
|
||||
- alert: up graphite
|
||||
expr: filterSeries(time('host.1',20),'>','0')
|
||||
for: 30s
|
||||
type: graphite
|
||||
@@ -6,6 +6,6 @@ groups:
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
summary: "{{ value|query }}"
|
||||
summary: "{{ unknown|query }}"
|
||||
annotations:
|
||||
description: "{{$labels}}"
|
||||
|
||||
4
app/vmalert/config/testdata/kube-good.rules
vendored
4
app/vmalert/config/testdata/kube-good.rules
vendored
@@ -665,7 +665,7 @@
|
||||
/
|
||||
sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
|
||||
> 0.01
|
||||
for: 15m
|
||||
for: 1d
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsWatchErrors
|
||||
@@ -1724,4 +1724,4 @@
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
severity: warning
|
||||
|
||||
12
app/vmalert/config/testdata/rules1-bad.rules
vendored
Normal file
12
app/vmalert/config/testdata/rules1-bad.rules
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
groups:
|
||||
- name: TestGraphiteBadGroup
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500) by instance
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
13
app/vmalert/config/testdata/rules2-good.rules
vendored
13
app/vmalert/config/testdata/rules2-good.rules
vendored
@@ -7,11 +7,22 @@ groups:
|
||||
expr: sum(vm_tcplistener_conns) by(instance) > 1
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: "Too high connection number for {{$labels.instance}}"
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
{{ with printf "sum(vm_tcplistener_conns{instance=%q})" .Labels.instance | query }}
|
||||
{{ . | first | value }}
|
||||
{{ end }}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
expr: sum by(job)
|
||||
(up == 1)
|
||||
labels:
|
||||
job: '{{ $labels.job }}'
|
||||
dynamic: '{{ $x := query "up" | first | value }}{{ if eq 1.0 $x }}one{{ else }}unknown{{ end }}'
|
||||
annotations:
|
||||
description: Job {{ $labels.job }} is up!
|
||||
summary: All instances up {{ range query "up" }}
|
||||
{{ . | label "instance" }}
|
||||
{{ end }}
|
||||
- record: handler:requests:rate5m
|
||||
expr: sum(rate(prometheus_http_requests_total[5m])) by (handler)
|
||||
labels:
|
||||
|
||||
30
app/vmalert/config/testdata/rules3-good.rules
vendored
Normal file
30
app/vmalert/config/testdata/rules3-good.rules
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
groups:
|
||||
- name: TestGroup
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- name: TestGroupPromMixed
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: sum(vm_tcplistener_conns) by (instance) > 1
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: HostDown
|
||||
type: graphite
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.up),'last','=', 0)
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
@@ -1,12 +1,14 @@
|
||||
package datasource
|
||||
|
||||
import "context"
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
// Querier interface wraps Query method which
|
||||
// executes given query and returns list of Metrics
|
||||
// as result
|
||||
type Querier interface {
|
||||
Query(ctx context.Context, query string) ([]Metric, error)
|
||||
Query(ctx context.Context, query string, engine Type) ([]Metric, error)
|
||||
}
|
||||
|
||||
// Metric is the basic entity which should be return by datasource
|
||||
@@ -17,6 +19,34 @@ type Metric struct {
|
||||
Value float64
|
||||
}
|
||||
|
||||
// SetLabel adds or updates existing one label
|
||||
// by the given key and label
|
||||
func (m *Metric) SetLabel(key, value string) {
|
||||
for i, l := range m.Labels {
|
||||
if l.Name == key {
|
||||
m.Labels[i].Value = value
|
||||
return
|
||||
}
|
||||
}
|
||||
m.AddLabel(key, value)
|
||||
}
|
||||
|
||||
// AddLabel appends the given label to the label set
|
||||
func (m *Metric) AddLabel(key, value string) {
|
||||
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
||||
}
|
||||
|
||||
// Label returns the given label value.
|
||||
// If label is missing empty string will be returned
|
||||
func (m *Metric) Label(key string) string {
|
||||
for _, l := range m.Labels {
|
||||
if l.Name == key {
|
||||
return l.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Label represents metric's label
|
||||
type Label struct {
|
||||
Name string
|
||||
|
||||
45
app/vmalert/datasource/init.go
Normal file
45
app/vmalert/datasource/init.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("datasource.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -datasource.url")
|
||||
tlsCertFile = flag.String("datasource.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -datasource.url")
|
||||
tlsKeyFile = flag.String("datasource.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -datasource.url")
|
||||
tlsCAFile = flag.String("datasource.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -datasource.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flag.String("datasource.tlsServerName", "", "Optional TLS server name to use for connections to -datasource.url. "+
|
||||
"By default the server name from -datasource.url is used")
|
||||
|
||||
lookBack = flag.Duration("datasource.lookback", 0, "Lookback defines how far to look into past when evaluating queries. "+
|
||||
"For example, if datasource.lookback=5m then param \"time\" with value now()-5m will be added to every query.")
|
||||
queryStep = flag.Duration("datasource.queryStep", 0, "queryStep defines how far a value can fallback to when evaluating queries. "+
|
||||
"For example, if datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query.")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, "Defines the number of idle (keep-alive connections) to configured datasource."+
|
||||
"Consider to set this value equal to the value: groups_total * group.concurrency. Too low value may result into high number of sockets in TIME_WAIT state.")
|
||||
)
|
||||
|
||||
// Init creates a Querier from provided flag values.
|
||||
func Init() (Querier, error) {
|
||||
if *addr == "" {
|
||||
return nil, fmt.Errorf("datasource.url is empty")
|
||||
}
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
tr.MaxIdleConns = *maxIdleConnections
|
||||
c := &http.Client{Transport: tr}
|
||||
return NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, *lookBack, *queryStep, c), nil
|
||||
}
|
||||
89
app/vmalert/datasource/type.go
Normal file
89
app/vmalert/datasource/type.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/graphiteql"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
const graphiteType = "graphite"
|
||||
const prometheusType = "prometheus"
|
||||
|
||||
// Type represents data source type
|
||||
type Type struct {
|
||||
name string
|
||||
}
|
||||
|
||||
// NewPrometheusType returns prometheus datasource type
|
||||
func NewPrometheusType() Type {
|
||||
return Type{name: prometheusType}
|
||||
}
|
||||
|
||||
// NewGraphiteType returns graphite datasource type
|
||||
func NewGraphiteType() Type {
|
||||
return Type{name: graphiteType}
|
||||
}
|
||||
|
||||
// NewRawType returns datasource type from raw string
|
||||
// without validation.
|
||||
func NewRawType(d string) Type {
|
||||
return Type{name: d}
|
||||
}
|
||||
|
||||
// Get returns datasource type
|
||||
func (t *Type) Get() string {
|
||||
return t.name
|
||||
}
|
||||
|
||||
// Set changes datasource type
|
||||
func (t *Type) Set(d Type) {
|
||||
t.name = d.name
|
||||
}
|
||||
|
||||
// String implements String interface with default value.
|
||||
func (t Type) String() string {
|
||||
if t.name == "" {
|
||||
return prometheusType
|
||||
}
|
||||
return t.name
|
||||
}
|
||||
|
||||
// ValidateExpr validates query expression with datasource ql.
|
||||
func (t *Type) ValidateExpr(expr string) error {
|
||||
switch t.name {
|
||||
case graphiteType:
|
||||
if _, err := graphiteql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad graphite expr: %q, err: %w", expr, err)
|
||||
}
|
||||
case "", prometheusType:
|
||||
if _, err := metricsql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q", t.name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t *Type) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
switch s {
|
||||
case "":
|
||||
s = prometheusType
|
||||
case graphiteType, prometheusType:
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q, want %q or %q", s, prometheusType, graphiteType)
|
||||
}
|
||||
t.name = s
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t Type) MarshalYAML() (interface{}, error) {
|
||||
return t.name, nil
|
||||
}
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type response struct {
|
||||
@@ -32,11 +32,11 @@ func (r response) metrics() ([]Metric, error) {
|
||||
for i, res := range r.Data.Result {
|
||||
f, err = strconv.ParseFloat(res.TV[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %s", res, res.TV[1], err)
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
|
||||
}
|
||||
m.Labels = nil
|
||||
for k, v := range r.Data.Result[i].Labels {
|
||||
m.Labels = append(m.Labels, Label{Name: k, Value: v})
|
||||
m.AddLabel(k, v)
|
||||
}
|
||||
m.Timestamp = int64(res.TV[0].(float64))
|
||||
m.Value = f
|
||||
@@ -45,59 +45,151 @@ func (r response) metrics() ([]Metric, error) {
|
||||
return ms, nil
|
||||
}
|
||||
|
||||
const queryPath = "/api/v1/query?query="
|
||||
type graphiteResponse []graphiteResponseTarget
|
||||
|
||||
type graphiteResponseTarget struct {
|
||||
Target string `json:"target"`
|
||||
Tags map[string]string `json:"tags"`
|
||||
DataPoints [][2]float64 `json:"datapoints"`
|
||||
}
|
||||
|
||||
func (r graphiteResponse) metrics() []Metric {
|
||||
var ms []Metric
|
||||
for _, res := range r {
|
||||
if len(res.DataPoints) < 1 {
|
||||
continue
|
||||
}
|
||||
var m Metric
|
||||
// add only last value to the result.
|
||||
last := res.DataPoints[len(res.DataPoints)-1]
|
||||
m.Value = last[0]
|
||||
m.Timestamp = int64(last[1])
|
||||
for k, v := range res.Tags {
|
||||
m.AddLabel(k, v)
|
||||
}
|
||||
ms = append(ms, m)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
|
||||
// VMStorage represents vmstorage entity with ability to read and write metrics
|
||||
type VMStorage struct {
|
||||
c *http.Client
|
||||
queryURL string
|
||||
basicAuthUser, basicAuthPass string
|
||||
c *http.Client
|
||||
datasourceURL string
|
||||
basicAuthUser string
|
||||
basicAuthPass string
|
||||
lookBack time.Duration
|
||||
queryStep time.Duration
|
||||
}
|
||||
|
||||
const queryPath = "/api/v1/query"
|
||||
const graphitePath = "/render"
|
||||
|
||||
// NewVMStorage is a constructor for VMStorage
|
||||
func NewVMStorage(baseURL, basicAuthUser, basicAuthPass string, c *http.Client) *VMStorage {
|
||||
func NewVMStorage(baseURL, basicAuthUser, basicAuthPass string, lookBack time.Duration, queryStep time.Duration, c *http.Client) *VMStorage {
|
||||
return &VMStorage{
|
||||
c: c,
|
||||
basicAuthUser: basicAuthUser,
|
||||
basicAuthPass: basicAuthPass,
|
||||
queryURL: strings.TrimSuffix(baseURL, "/") + queryPath,
|
||||
datasourceURL: strings.TrimSuffix(baseURL, "/"),
|
||||
lookBack: lookBack,
|
||||
queryStep: queryStep,
|
||||
}
|
||||
}
|
||||
|
||||
// Query reads metrics from datasource by given query
|
||||
func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
|
||||
const (
|
||||
statusSuccess, statusError, rtVector = "success", "error", "vector"
|
||||
)
|
||||
req, err := http.NewRequest("POST", s.queryURL+url.QueryEscape(query), nil)
|
||||
// Query reads metrics from datasource by given query and type
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, dataSourceType Type) ([]Metric, error) {
|
||||
switch dataSourceType.name {
|
||||
case "", prometheusType:
|
||||
return s.queryDataSource(ctx, query, s.setPrometheusReqParams, parsePrometheusResponse)
|
||||
case graphiteType:
|
||||
return s.queryDataSource(ctx, query, s.setGraphiteReqParams, parseGraphiteResponse)
|
||||
default:
|
||||
return nil, fmt.Errorf("engine not found: %q", dataSourceType)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *VMStorage) queryDataSource(
|
||||
ctx context.Context,
|
||||
query string,
|
||||
setReqParams func(r *http.Request, query string),
|
||||
processResponse func(r *http.Request, resp *http.Response,
|
||||
) ([]Metric, error)) ([]Metric, error) {
|
||||
req, err := http.NewRequest("POST", s.datasourceURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Content-Type", "application/json; charset=utf-8")
|
||||
if s.basicAuthPass != "" {
|
||||
req.SetBasicAuth(s.basicAuthUser, s.basicAuthPass)
|
||||
}
|
||||
setReqParams(req, query)
|
||||
resp, err := s.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting response from %s:%s", req.URL, err)
|
||||
return nil, fmt.Errorf("error getting response from %s: %w", req.URL, err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("datasource returns unxeprected response code %d for %s with err %s. Reponse body %s", resp.StatusCode, req.URL, err, body)
|
||||
return nil, fmt.Errorf("datasource returns unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL, body)
|
||||
}
|
||||
return processResponse(req, resp)
|
||||
}
|
||||
|
||||
func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
|
||||
r.URL.Path += queryPath
|
||||
q := r.URL.Query()
|
||||
q.Set("query", query)
|
||||
if s.lookBack > 0 {
|
||||
lookBack := time.Now().Add(-s.lookBack)
|
||||
q.Set("time", fmt.Sprintf("%d", lookBack.Unix()))
|
||||
}
|
||||
if s.queryStep > 0 {
|
||||
q.Set("step", s.queryStep.String())
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
|
||||
func (s *VMStorage) setGraphiteReqParams(r *http.Request, query string) {
|
||||
r.URL.Path += graphitePath
|
||||
q := r.URL.Query()
|
||||
q.Set("format", "json")
|
||||
q.Set("target", query)
|
||||
from := "-5min"
|
||||
if s.lookBack > 0 {
|
||||
lookBack := time.Now().Add(-s.lookBack)
|
||||
from = strconv.FormatInt(lookBack.Unix(), 10)
|
||||
}
|
||||
q.Set("from", from)
|
||||
q.Set("until", "now")
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
|
||||
const (
|
||||
statusSuccess, statusError, rtVector = "success", "error", "vector"
|
||||
)
|
||||
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
r := &response{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing metrics for %s:%s", req.URL, err)
|
||||
return nil, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL, err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return nil, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL, r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return nil, fmt.Errorf("unkown status:%s, Expected success or error ", r.Status)
|
||||
return nil, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
}
|
||||
if r.Data.ResultType != rtVector {
|
||||
return nil, fmt.Errorf("unkown restul type:%s. Expected vector", r.Data.ResultType)
|
||||
return nil, fmt.Errorf("unknown result type:%s. Expected vector", r.Data.ResultType)
|
||||
}
|
||||
return r.metrics()
|
||||
}
|
||||
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
r := &graphiteResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL, err)
|
||||
}
|
||||
return r.metrics(), nil
|
||||
}
|
||||
|
||||
@@ -4,7 +4,9 @@ import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -12,6 +14,7 @@ var (
|
||||
basicAuthName = "foo"
|
||||
basicAuthPass = "bar"
|
||||
query = "vm_rows"
|
||||
queryRender = "constantLine(10)"
|
||||
)
|
||||
|
||||
func TestVMSelectQuery(t *testing.T) {
|
||||
@@ -20,6 +23,13 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, request *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 7:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
@@ -29,7 +39,14 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
t.Errorf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
|
||||
}
|
||||
if r.URL.Query().Get("query") != query {
|
||||
t.Errorf("exptected %s in query param, got %s", query, r.URL.Query().Get("query"))
|
||||
t.Errorf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
|
||||
}
|
||||
timeParam := r.URL.Query().Get("time")
|
||||
if timeParam == "" {
|
||||
t.Errorf("expected 'time' in query param, got nil instead")
|
||||
}
|
||||
if _, err := strconv.ParseInt(timeParam, 10, 64); err != nil {
|
||||
t.Errorf("failed to parse 'time' query param: %s", err)
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
@@ -52,31 +69,31 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
am := NewVMStorage(srv.URL, basicAuthName, basicAuthPass, srv.Client())
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
am := NewVMStorage(srv.URL, basicAuthName, basicAuthPass, time.Minute, 0, srv.Client())
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected connection error got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected invalid response status error got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected response body error got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected error status got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
t.Fatalf("expected unkown status got nil")
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected unknown status got nil")
|
||||
}
|
||||
if _, err := am.Query(ctx, query); err == nil {
|
||||
if _, err := am.Query(ctx, query, NewPrometheusType()); err == nil {
|
||||
t.Fatalf("expected non-vector resultType error got nil")
|
||||
}
|
||||
m, err := am.Query(ctx, query)
|
||||
m, err := am.Query(ctx, query, NewPrometheusType())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("exptected 1 metric got %d in %+v", len(m), m)
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected := Metric{
|
||||
Labels: []Label{{Value: "vm_rows", Name: "__name__"}},
|
||||
@@ -89,5 +106,22 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
m[0].Labels[0].Name != expected.Labels[0].Name {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
|
||||
m, err = am.Query(ctx, queryRender, NewGraphiteType())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected = Metric{
|
||||
Labels: []Label{{Value: "constantLine(10)", Name: "name"}},
|
||||
Timestamp: 1611758403,
|
||||
Value: 10,
|
||||
}
|
||||
if m[0].Timestamp != expected.Timestamp &&
|
||||
m[0].Value != expected.Value &&
|
||||
m[0].Labels[0].Value != expected.Labels[0].Value &&
|
||||
m[0].Labels[0].Name != expected.Labels[0].Name {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -21,26 +22,46 @@ type Group struct {
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type datasource.Type
|
||||
Interval time.Duration
|
||||
Concurrency int
|
||||
Checksum string
|
||||
|
||||
doneCh chan struct{}
|
||||
finishedCh chan struct{}
|
||||
// channel accepts new Group obj
|
||||
// which supposed to update current group
|
||||
updateCh chan *Group
|
||||
|
||||
metrics *groupMetrics
|
||||
}
|
||||
|
||||
func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
|
||||
type groupMetrics struct {
|
||||
iterationTotal *counter
|
||||
iterationDuration *summary
|
||||
}
|
||||
|
||||
func newGroupMetrics(name, file string) *groupMetrics {
|
||||
m := &groupMetrics{}
|
||||
labels := fmt.Sprintf(`group=%q, file=%q`, name, file)
|
||||
m.iterationTotal = getOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels))
|
||||
m.iterationDuration = getOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels))
|
||||
return m
|
||||
}
|
||||
|
||||
func newGroup(cfg config.Group, defaultInterval time.Duration, labels map[string]string) *Group {
|
||||
g := &Group{
|
||||
Type: cfg.Type,
|
||||
Name: cfg.Name,
|
||||
File: cfg.File,
|
||||
Interval: cfg.Interval,
|
||||
Concurrency: cfg.Concurrency,
|
||||
Checksum: cfg.Checksum,
|
||||
doneCh: make(chan struct{}),
|
||||
finishedCh: make(chan struct{}),
|
||||
updateCh: make(chan *Group),
|
||||
}
|
||||
g.metrics = newGroupMetrics(g.Name, g.File)
|
||||
if g.Interval == 0 {
|
||||
g.Interval = defaultInterval
|
||||
}
|
||||
@@ -49,6 +70,17 @@ func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
|
||||
}
|
||||
rules := make([]Rule, len(cfg.Rules))
|
||||
for i, r := range cfg.Rules {
|
||||
// override rule labels with external labels
|
||||
for k, v := range labels {
|
||||
if prevV, ok := r.Labels[k]; ok {
|
||||
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
||||
k, prevV, g.Name, r.Name(), k, v)
|
||||
}
|
||||
if r.Labels == nil {
|
||||
r.Labels = map[string]string{}
|
||||
}
|
||||
r.Labels[k] = v
|
||||
}
|
||||
rules[i] = g.newRule(r)
|
||||
}
|
||||
g.Rules = rules
|
||||
@@ -57,9 +89,9 @@ func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
|
||||
|
||||
func (g *Group) newRule(rule config.Rule) Rule {
|
||||
if rule.Alert != "" {
|
||||
return newAlertingRule(g.ID(), rule)
|
||||
return newAlertingRule(g, rule)
|
||||
}
|
||||
return newRecordingRule(g.ID(), rule)
|
||||
return newRecordingRule(g, rule)
|
||||
}
|
||||
|
||||
// ID return unique group ID that consists of
|
||||
@@ -69,11 +101,12 @@ func (g *Group) ID() uint64 {
|
||||
hash.Write([]byte(g.File))
|
||||
hash.Write([]byte("\xff"))
|
||||
hash.Write([]byte(g.Name))
|
||||
hash.Write([]byte(g.Type.Get()))
|
||||
return hash.Sum64()
|
||||
}
|
||||
|
||||
// Restore restores alerts state for group rules
|
||||
func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
|
||||
func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration, labels map[string]string) error {
|
||||
for _, rule := range g.Rules {
|
||||
rr, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
@@ -82,8 +115,8 @@ func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time
|
||||
if rr.For < 1 {
|
||||
continue
|
||||
}
|
||||
if err := rr.Restore(ctx, q, lookback); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %s", rule, err)
|
||||
if err := rr.Restore(ctx, q, lookback, labels); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -105,6 +138,7 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
if !ok {
|
||||
// old rule is not present in the new list
|
||||
// so we mark it for removing
|
||||
g.Rules[i].Close()
|
||||
g.Rules[i] = nil
|
||||
continue
|
||||
}
|
||||
@@ -126,25 +160,17 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
for _, nr := range rulesRegistry {
|
||||
newRules = append(newRules, nr)
|
||||
}
|
||||
g.Type = newGroup.Type
|
||||
g.Concurrency = newGroup.Concurrency
|
||||
g.Checksum = newGroup.Checksum
|
||||
g.Rules = newRules
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
iterationTotal = metrics.NewCounter(`vmalert_iteration_total`)
|
||||
iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
|
||||
|
||||
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
|
||||
alertsSent = metrics.NewCounter(`vmalert_alerts_sent_total`)
|
||||
alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
|
||||
|
||||
remoteWriteSent = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
)
|
||||
|
||||
func (g *Group) close() {
|
||||
@@ -153,12 +179,41 @@ func (g *Group) close() {
|
||||
}
|
||||
close(g.doneCh)
|
||||
<-g.finishedCh
|
||||
|
||||
metrics.UnregisterMetric(g.metrics.iterationDuration.name)
|
||||
metrics.UnregisterMetric(g.metrics.iterationTotal.name)
|
||||
for _, rule := range g.Rules {
|
||||
rule.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (g *Group) start(ctx context.Context, querier datasource.Querier, nr notifier.Notifier, rw *remotewrite.Client) {
|
||||
var skipRandSleepOnGroupStart bool
|
||||
|
||||
func (g *Group) start(ctx context.Context, querier datasource.Querier, nts []notifier.Notifier, rw *remotewrite.Client) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
|
||||
// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
|
||||
if !skipRandSleepOnGroupStart {
|
||||
randSleep := uint64(float64(g.Interval) * (float64(uint32(g.ID())) / (1 << 32)))
|
||||
sleepOffset := uint64(time.Now().UnixNano()) % uint64(g.Interval)
|
||||
if randSleep < sleepOffset {
|
||||
randSleep += uint64(g.Interval)
|
||||
}
|
||||
randSleep -= sleepOffset
|
||||
sleepTimer := time.NewTimer(time.Duration(randSleep))
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
sleepTimer.Stop()
|
||||
return
|
||||
case <-g.doneCh:
|
||||
sleepTimer.Stop()
|
||||
return
|
||||
case <-sleepTimer.C:
|
||||
}
|
||||
}
|
||||
|
||||
logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
e := &executor{querier, nr, rw}
|
||||
e := &executor{querier, nts, rw}
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
@@ -185,7 +240,7 @@ func (g *Group) start(ctx context.Context, querier datasource.Querier, nr notifi
|
||||
g.mu.Unlock()
|
||||
logger.Infof("group %q re-started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
case <-t.C:
|
||||
iterationTotal.Inc()
|
||||
g.metrics.iterationTotal.Inc()
|
||||
iterationStart := time.Now()
|
||||
|
||||
errs := e.execConcurrently(ctx, g.Rules, g.Concurrency, g.Interval)
|
||||
@@ -195,15 +250,15 @@ func (g *Group) start(ctx context.Context, querier datasource.Querier, nr notifi
|
||||
}
|
||||
}
|
||||
|
||||
iterationDuration.UpdateDuration(iterationStart)
|
||||
g.metrics.iterationDuration.UpdateDuration(iterationStart)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type executor struct {
|
||||
querier datasource.Querier
|
||||
notifier notifier.Notifier
|
||||
rw *remotewrite.Client
|
||||
querier datasource.Querier
|
||||
notifiers []notifier.Notifier
|
||||
rw *remotewrite.Client
|
||||
}
|
||||
|
||||
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurrency int, interval time.Duration) chan error {
|
||||
@@ -240,6 +295,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurren
|
||||
return res
|
||||
}
|
||||
|
||||
var (
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
|
||||
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
)
|
||||
|
||||
func (e *executor) exec(ctx context.Context, rule Rule, returnSeries bool, interval time.Duration) error {
|
||||
execTotal.Inc()
|
||||
execStart := time.Now()
|
||||
@@ -250,15 +313,14 @@ func (e *executor) exec(ctx context.Context, rule Rule, returnSeries bool, inter
|
||||
tss, err := rule.Exec(ctx, e.querier, returnSeries)
|
||||
if err != nil {
|
||||
execErrors.Inc()
|
||||
return fmt.Errorf("rule %q: failed to execute: %s", rule, err)
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
|
||||
}
|
||||
|
||||
if len(tss) > 0 && e.rw != nil {
|
||||
remoteWriteSent.Add(len(tss))
|
||||
for _, ts := range tss {
|
||||
if err := e.rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
return fmt.Errorf("rule %q: remote write failure: %s", rule, err)
|
||||
return fmt.Errorf("rule %q: remote write failure: %w", rule, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -286,10 +348,14 @@ func (e *executor) exec(ctx context.Context, rule Rule, returnSeries bool, inter
|
||||
if len(alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
alertsSent.Add(len(alerts))
|
||||
if err := e.notifier.Send(ctx, alerts); err != nil {
|
||||
alertsSendErrors.Inc()
|
||||
return fmt.Errorf("rule %q: failed to send alerts: %s", rule, err)
|
||||
errGr := new(utils.ErrGroup)
|
||||
for _, nt := range e.notifiers {
|
||||
if err := nt.Send(ctx, alerts); err != nil {
|
||||
alertsSendErrors.Inc()
|
||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts: %w", rule, err))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return errGr.Err()
|
||||
}
|
||||
|
||||
@@ -10,6 +10,12 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Disable rand sleep on group start during tests in order to speed up test execution.
|
||||
// Rand sleep is needed only in prod code.
|
||||
skipRandSleepOnGroupStart = true
|
||||
}
|
||||
|
||||
func TestUpdateWith(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
@@ -26,7 +32,7 @@ func TestUpdateWith(t *testing.T) {
|
||||
[]config.Rule{{
|
||||
Alert: "foo",
|
||||
Expr: "up > 0",
|
||||
For: time.Second,
|
||||
For: config.NewPromDuration(time.Second),
|
||||
Labels: map[string]string{
|
||||
"bar": "baz",
|
||||
},
|
||||
@@ -38,7 +44,7 @@ func TestUpdateWith(t *testing.T) {
|
||||
[]config.Rule{{
|
||||
Alert: "foo",
|
||||
Expr: "up > 10",
|
||||
For: time.Second,
|
||||
For: config.NewPromDuration(time.Second),
|
||||
Labels: map[string]string{
|
||||
"baz": "bar",
|
||||
},
|
||||
@@ -150,7 +156,7 @@ func TestGroupStart(t *testing.T) {
|
||||
t.Fatalf("failed to parse rules: %s", err)
|
||||
}
|
||||
const evalInterval = time.Millisecond
|
||||
g := newGroup(groups[0], evalInterval)
|
||||
g := newGroup(groups[0], evalInterval, map[string]string{"cluster": "east-1"})
|
||||
g.Concurrency = 2
|
||||
|
||||
fn := &fakeNotifier{}
|
||||
@@ -161,25 +167,35 @@ func TestGroupStart(t *testing.T) {
|
||||
m2 := metricWithLabels(t, "instance", inst2, "job", job)
|
||||
|
||||
r := g.Rules[0].(*AlertingRule)
|
||||
alert1, err := r.newAlert(m1, time.Now())
|
||||
alert1, err := r.newAlert(m1, time.Now(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("faield to create alert: %s", err)
|
||||
}
|
||||
alert1.State = notifier.StateFiring
|
||||
// add external label
|
||||
alert1.Labels["cluster"] = "east-1"
|
||||
// add rule labels - see config/testdata/rules1-good.rules
|
||||
alert1.Labels["label"] = "bar"
|
||||
alert1.Labels["host"] = inst1
|
||||
alert1.ID = hash(m1)
|
||||
|
||||
alert2, err := r.newAlert(m2, time.Now())
|
||||
alert2, err := r.newAlert(m2, time.Now(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("faield to create alert: %s", err)
|
||||
}
|
||||
alert2.State = notifier.StateFiring
|
||||
// add external label
|
||||
alert2.Labels["cluster"] = "east-1"
|
||||
// add rule labels - see config/testdata/rules1-good.rules
|
||||
alert2.Labels["label"] = "bar"
|
||||
alert2.Labels["host"] = inst2
|
||||
alert2.ID = hash(m2)
|
||||
|
||||
finished := make(chan struct{})
|
||||
fs.add(m1)
|
||||
fs.add(m2)
|
||||
go func() {
|
||||
g.start(context.Background(), fs, fn, nil)
|
||||
g.start(context.Background(), fs, []notifier.Notifier{fn}, nil)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string) ([]datasource.Metric, error) {
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ datasource.Type) ([]datasource.Metric, error) {
|
||||
fq.Lock()
|
||||
defer fq.Unlock()
|
||||
if fq.err != nil {
|
||||
|
||||
@@ -2,20 +2,18 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
@@ -24,7 +22,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -32,69 +29,26 @@ var (
|
||||
rulePath = flagutil.NewArray("rule", `Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule /path/to/file. Path to a single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.`)
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.`)
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
|
||||
|
||||
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
|
||||
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
|
||||
datasourceURL = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
|
||||
datasourceTLSInsecureSkipVerify = flag.Bool("datasource.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -datasource.url")
|
||||
datasourceTLSCertFile = flag.String("datasource.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -datasource.url")
|
||||
datasourceTLSKeyFile = flag.String("datasource.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -datasource.url")
|
||||
datasourceTLSCAFile = flag.String("datasource.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -datasource.url. "+
|
||||
"By default system CA is used")
|
||||
datasourceTLSServerName = flag.String("datasource.tlsServerName", "", "Optional TLS server name to use for connections to -datasource.url. "+
|
||||
"By default the server name from -datasource.url is used")
|
||||
|
||||
remoteWriteURL = flag.String("remoteWrite.url", "", "Optional URL to Victoria Metrics or VMInsert where to persist alerts state"+
|
||||
" and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428")
|
||||
remoteWriteUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
|
||||
remoteWritePassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
|
||||
remoteWriteMaxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
remoteWriteMaxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines defines max number of timeseries to be flushed at once")
|
||||
remoteWriteConcurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote storage")
|
||||
remoteWriteTLSInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
remoteWriteTLSCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
|
||||
remoteWriteTLSKeyFile = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
|
||||
remoteWriteTLSCAFile = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
|
||||
"By default system CA is used")
|
||||
remoteWriteTLSServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
|
||||
remoteReadURL = flag.String("remoteRead.url", "", "Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts"+
|
||||
" state. This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
remoteReadUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
|
||||
remoteReadPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
remoteReadTLSInsecureSkipVerify = flag.Bool("remoteRead.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteRead.url")
|
||||
remoteReadTLSCertFile = flag.String("remoteRead.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url")
|
||||
remoteReadTLSKeyFile = flag.String("remoteRead.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url")
|
||||
remoteReadTLSCAFile = flag.String("remoteRead.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteRead.url. "+
|
||||
"By default system CA is used")
|
||||
remoteReadTLSServerName = flag.String("remoteRead.tlsServerName", "", "Optional TLS server name to use for connections to -remoteRead.url. "+
|
||||
"By default the server name from -remoteRead.url is used")
|
||||
|
||||
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
|
||||
notifierURL = flag.String("notifier.url", "", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
|
||||
notifierTLSInsecureSkipVerify = flag.Bool("notifier.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -notifier.url")
|
||||
notifierTLSCertFile = flag.String("notifier.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
notifierTLSKeyFile = flag.String("notifier.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
notifierTLSCAFile = flag.String("notifier.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
"By default system CA is used")
|
||||
notifierTLSServerName = flag.String("notifier.tlsServerName", "", "Optional TLS server name to use for connections to -notifier.url. "+
|
||||
"By default the server name from -notifier.url is used")
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used`)
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used`)
|
||||
externalLabels = flagutil.NewArray("external.label", "Optional label in the form 'name=value' to add to all generated recording rules and alerts. "+
|
||||
"Pass multiple -label flags in order to add multiple label sets.")
|
||||
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The `-rule` flag must be specified.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -104,64 +58,24 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
checkFlags()
|
||||
|
||||
if *dryRun {
|
||||
u, _ := url.Parse("https://victoriametrics.com/")
|
||||
notifier.InitTemplateFunc(u)
|
||||
groups, err := config.Parse(*rulePath, true, true)
|
||||
if err != nil {
|
||||
logger.Fatalf(err.Error())
|
||||
}
|
||||
if len(groups) == 0 {
|
||||
logger.Fatalf("No rules for validation. Please specify path to file(s) with alerting and/or recording rules using `-rule` flag")
|
||||
}
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
|
||||
manager, err := newManager(ctx)
|
||||
if err != nil {
|
||||
logger.Fatalf("can not get external url: %s ", err)
|
||||
logger.Fatalf("failed to init: %s", err)
|
||||
}
|
||||
notifier.InitTemplateFunc(eu)
|
||||
aug, err := getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
|
||||
if err != nil {
|
||||
logger.Fatalf("URL generator error: %s", err)
|
||||
}
|
||||
|
||||
dst, err := getTransport(datasourceURL, datasourceTLSCertFile, datasourceTLSKeyFile, datasourceTLSCAFile, datasourceTLSServerName, datasourceTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create datasource transport: %s", err)
|
||||
}
|
||||
|
||||
nt, err := getTransport(notifierURL, notifierTLSCertFile, notifierTLSKeyFile, notifierTLSCAFile, notifierTLSServerName, notifierTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create notifier transport: %s", err)
|
||||
}
|
||||
|
||||
manager := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
storage: datasource.NewVMStorage(*datasourceURL, *basicAuthUsername, *basicAuthPassword, &http.Client{Transport: dst}),
|
||||
notifier: notifier.NewAlertManager(*notifierURL, aug, &http.Client{Transport: nt}),
|
||||
}
|
||||
if *remoteWriteURL != "" {
|
||||
t, err := getTransport(remoteWriteURL, remoteWriteTLSCertFile, remoteWriteTLSKeyFile, remoteWriteTLSCAFile, remoteWriteTLSServerName, remoteWriteTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create remoteWrite transport: %s", err)
|
||||
}
|
||||
|
||||
c, err := remotewrite.NewClient(ctx, remotewrite.Config{
|
||||
Addr: *remoteWriteURL,
|
||||
Concurrency: *remoteWriteConcurrency,
|
||||
MaxQueueSize: *remoteWriteMaxQueueSize,
|
||||
MaxBatchSize: *remoteWriteMaxBatchSize,
|
||||
FlushInterval: *evaluationInterval,
|
||||
BasicAuthUser: *remoteWriteUsername,
|
||||
BasicAuthPass: *remoteWritePassword,
|
||||
Transport: t,
|
||||
})
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init remotewrite client: %s", err)
|
||||
}
|
||||
manager.rw = c
|
||||
}
|
||||
|
||||
if *remoteReadURL != "" {
|
||||
t, err := getTransport(remoteReadURL, remoteReadTLSCertFile, remoteReadTLSKeyFile, remoteReadTLSCAFile, remoteReadTLSServerName, remoteReadTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create remoteRead transport: %s", err)
|
||||
}
|
||||
|
||||
manager.rr = datasource.NewVMStorage(*remoteReadURL, *remoteReadUsername, *remoteReadPassword, &http.Client{Transport: t})
|
||||
}
|
||||
|
||||
if err := manager.start(ctx, *rulePath, *validateTemplates, *validateExpressions); err != nil {
|
||||
logger.Fatalf("failed to start: %s", err)
|
||||
}
|
||||
@@ -206,6 +120,56 @@ var (
|
||||
configTimestamp = metrics.NewCounter(`vmalert_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
func newManager(ctx context.Context) (*manager, error) {
|
||||
q, err := datasource.Init()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init datasource: %w", err)
|
||||
}
|
||||
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init `external.url`: %w", err)
|
||||
}
|
||||
notifier.InitTemplateFunc(eu)
|
||||
aug, err := getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init `external.alert.source`: %w", err)
|
||||
}
|
||||
nts, err := notifier.Init(aug)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
||||
}
|
||||
|
||||
manager := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
querier: q,
|
||||
notifiers: nts,
|
||||
labels: map[string]string{},
|
||||
}
|
||||
rw, err := remotewrite.Init(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init remoteWrite: %w", err)
|
||||
}
|
||||
manager.rw = rw
|
||||
|
||||
rr, err := remoteread.Init()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init remoteRead: %w", err)
|
||||
}
|
||||
manager.rr = rr
|
||||
|
||||
for _, s := range *externalLabels {
|
||||
if len(s) == 0 {
|
||||
continue
|
||||
}
|
||||
n := strings.IndexByte(s, '=')
|
||||
if n < 0 {
|
||||
return nil, fmt.Errorf("missing '=' in `-label`. It must contain label in the form `name=value`; got %q", s)
|
||||
}
|
||||
manager.labels[s[:n]] = s[n+1:]
|
||||
}
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL, error) {
|
||||
if externalURL != "" {
|
||||
return url.Parse(externalURL)
|
||||
@@ -235,14 +199,14 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
||||
if err := notifier.ValidateTemplates(map[string]string{
|
||||
"tpl": externalAlertSource,
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("error validating source template %s:%w", externalAlertSource, err)
|
||||
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
||||
}
|
||||
}
|
||||
m := map[string]string{
|
||||
"tpl": externalAlertSource,
|
||||
}
|
||||
return func(alert notifier.Alert) string {
|
||||
templated, err := alert.ExecTemplate(m)
|
||||
templated, err := alert.ExecTemplate(nil, m)
|
||||
if err != nil {
|
||||
logger.Errorf("can not exec source template %s", err)
|
||||
}
|
||||
@@ -250,76 +214,11 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getTLSConfig(certFile, keyFile, CAFile, serverName *string, insecureSkipVerify *bool) (*tls.Config, error) {
|
||||
var certs []tls.Certificate
|
||||
if *certFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(*certFile, *keyFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load TLS certificate from `cert_file`=%q, `key_file`=%q: %s", *certFile, *keyFile, err)
|
||||
}
|
||||
|
||||
certs = []tls.Certificate{cert}
|
||||
}
|
||||
|
||||
var rootCAs *x509.CertPool
|
||||
if *CAFile != "" {
|
||||
pem, err := ioutil.ReadFile(*CAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read `ca_file` %q: %s", *CAFile, err)
|
||||
}
|
||||
|
||||
rootCAs = x509.NewCertPool()
|
||||
if !rootCAs.AppendCertsFromPEM(pem) {
|
||||
return nil, fmt.Errorf("cannot parse data from `ca_file` %q", *CAFile)
|
||||
}
|
||||
}
|
||||
|
||||
return &tls.Config{
|
||||
Certificates: certs,
|
||||
InsecureSkipVerify: *insecureSkipVerify,
|
||||
RootCAs: rootCAs,
|
||||
ServerName: *serverName,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getTransport(URL, certFile, keyFile, CAFile, serverName *string, insecureSkipVerify *bool) (*http.Transport, error) {
|
||||
var u fasthttp.URI
|
||||
u.Update(*URL)
|
||||
|
||||
var t *http.Transport
|
||||
if string(u.Scheme()) == "https" {
|
||||
t = http.DefaultTransport.(*http.Transport).Clone()
|
||||
|
||||
tlsCfg, err := getTLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t.TLSClientConfig = tlsCfg
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func checkFlags() {
|
||||
if *notifierURL == "" {
|
||||
flag.PrintDefaults()
|
||||
logger.Fatalf("notifier.url is empty")
|
||||
}
|
||||
if *datasourceURL == "" {
|
||||
flag.PrintDefaults()
|
||||
logger.Fatalf("datasource.url is empty")
|
||||
}
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmalert processes alerts and recording rules.
|
||||
|
||||
See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/README.md .
|
||||
See the docs at https://victoriametrics.github.io/vmalert.html .
|
||||
`
|
||||
|
||||
f := flag.CommandLine.Output()
|
||||
fmt.Fprintf(f, "%s\n", s)
|
||||
flag.PrintDefaults()
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ func TestGetAlertURLGenerator(t *testing.T) {
|
||||
}
|
||||
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
|
||||
if err == nil {
|
||||
t.Errorf("exptected tempalte validation error got nil")
|
||||
t.Errorf("expected tempalte validation error got nil")
|
||||
}
|
||||
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}", true)
|
||||
if err != nil {
|
||||
@@ -51,55 +51,3 @@ func TestGetAlertURLGenerator(t *testing.T) {
|
||||
t.Errorf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetTLSConfig(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
serverName = "test"
|
||||
insecureSkipVerify = true
|
||||
tlsCfg, err := getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tlsCfg == nil {
|
||||
t.Errorf("expected tlsConfig to be set, got nil")
|
||||
}
|
||||
if tlsCfg.ServerName != serverName {
|
||||
t.Errorf("unexpected ServerName, want %s, got %s", serverName, tlsCfg.ServerName)
|
||||
}
|
||||
if tlsCfg.InsecureSkipVerify != insecureSkipVerify {
|
||||
t.Errorf("unexpected InsecureSkipVerify, want %v, got %v", insecureSkipVerify, tlsCfg.InsecureSkipVerify)
|
||||
}
|
||||
certFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected keypair error, got nil")
|
||||
}
|
||||
certFile = ""
|
||||
CAFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected read error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetTransport(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
URL := "http://victoriametrics.com"
|
||||
tr, err := getTransport(&URL, &certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tr != nil {
|
||||
t.Errorf("expected Transport to be nil, got %v", tr)
|
||||
}
|
||||
URL = "https://victoriametrics.com"
|
||||
tr, err = getTransport(&URL, &certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tr.TLSClientConfig == nil {
|
||||
t.Errorf("expected TLSClientConfig to be set, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,13 +15,14 @@ import (
|
||||
|
||||
// manager controls group states
|
||||
type manager struct {
|
||||
storage datasource.Querier
|
||||
notifier notifier.Notifier
|
||||
querier datasource.Querier
|
||||
notifiers []notifier.Notifier
|
||||
|
||||
rw *remotewrite.Client
|
||||
rr datasource.Querier
|
||||
|
||||
wg sync.WaitGroup
|
||||
wg sync.WaitGroup
|
||||
labels map[string]string
|
||||
|
||||
groupsMu sync.RWMutex
|
||||
groups map[uint64]*Group
|
||||
@@ -64,7 +65,7 @@ func (m *manager) close() {
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) {
|
||||
if restore && m.rr != nil {
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack)
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack, m.labels)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring state for group %q: %s", group.Name, err)
|
||||
}
|
||||
@@ -73,7 +74,7 @@ func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) {
|
||||
m.wg.Add(1)
|
||||
id := group.ID()
|
||||
go func() {
|
||||
group.start(ctx, m.storage, m.notifier, m.rw)
|
||||
group.start(ctx, m.querier, m.notifiers, m.rw)
|
||||
m.wg.Done()
|
||||
}()
|
||||
m.groups[id] = group
|
||||
@@ -83,42 +84,65 @@ func (m *manager) update(ctx context.Context, path []string, validateTpl, valida
|
||||
logger.Infof("reading rules configuration file from %q", strings.Join(path, ";"))
|
||||
groupsCfg, err := config.Parse(path, validateTpl, validateExpr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse configuration file: %s", err)
|
||||
return fmt.Errorf("cannot parse configuration file: %w", err)
|
||||
}
|
||||
|
||||
groupsRegistry := make(map[uint64]*Group)
|
||||
for _, cfg := range groupsCfg {
|
||||
ng := newGroup(cfg, *evaluationInterval)
|
||||
ng := newGroup(cfg, *evaluationInterval, m.labels)
|
||||
groupsRegistry[ng.ID()] = ng
|
||||
}
|
||||
|
||||
type updateItem struct {
|
||||
old *Group
|
||||
new *Group
|
||||
}
|
||||
var toUpdate []updateItem
|
||||
|
||||
m.groupsMu.Lock()
|
||||
for _, og := range m.groups {
|
||||
ng, ok := groupsRegistry[og.ID()]
|
||||
if !ok {
|
||||
// old group is not present in new list
|
||||
// and must be stopped and deleted
|
||||
// old group is not present in new list,
|
||||
// so must be stopped and deleted
|
||||
og.close()
|
||||
delete(m.groups, og.ID())
|
||||
og = nil
|
||||
continue
|
||||
}
|
||||
og.updateCh <- ng
|
||||
delete(groupsRegistry, ng.ID())
|
||||
if og.Checksum != ng.Checksum {
|
||||
toUpdate = append(toUpdate, updateItem{old: og, new: ng})
|
||||
}
|
||||
}
|
||||
|
||||
for _, ng := range groupsRegistry {
|
||||
m.startGroup(ctx, ng, restore)
|
||||
}
|
||||
m.groupsMu.Unlock()
|
||||
|
||||
if len(toUpdate) > 0 {
|
||||
var wg sync.WaitGroup
|
||||
for _, item := range toUpdate {
|
||||
wg.Add(1)
|
||||
go func(old *Group, new *Group) {
|
||||
old.updateCh <- new
|
||||
wg.Done()
|
||||
}(item.old, item.new)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *Group) toAPI() APIGroup {
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
|
||||
ag := APIGroup{
|
||||
// encode as strings to avoid rounding
|
||||
// encode as string to avoid rounding
|
||||
ID: fmt.Sprintf("%d", g.ID()),
|
||||
Name: g.Name,
|
||||
Type: g.Type.String(),
|
||||
File: g.File,
|
||||
Interval: g.Interval.String(),
|
||||
Concurrency: g.Concurrency,
|
||||
|
||||
@@ -5,11 +5,12 @@ import (
|
||||
"math/rand"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
@@ -19,16 +20,15 @@ func TestMain(m *testing.M) {
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestManagerUpdateError(t *testing.T) {
|
||||
// TestManagerEmptyRulesDir tests
|
||||
// successful cases of
|
||||
// starting with empty rules folder
|
||||
func TestManagerEmptyRulesDir(t *testing.T) {
|
||||
m := &manager{groups: make(map[uint64]*Group)}
|
||||
path := []string{"foo/bar"}
|
||||
err := m.update(context.Background(), path, true, true, false)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to have err; got nil instead")
|
||||
}
|
||||
expErr := "no groups found"
|
||||
if !strings.Contains(err.Error(), expErr) {
|
||||
t.Fatalf("expected to got err %s; got %s", expErr, err)
|
||||
if err != nil {
|
||||
t.Fatalf("expected to load succesfully with empty rules dir; got err instead: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,9 +37,9 @@ func TestManagerUpdateError(t *testing.T) {
|
||||
// Should be executed with -race flag
|
||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
m := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
storage: &fakeQuerier{},
|
||||
notifier: &fakeNotifier{},
|
||||
groups: make(map[uint64]*Group),
|
||||
querier: &fakeQuerier{},
|
||||
notifiers: []notifier.Notifier{&fakeNotifier{}},
|
||||
}
|
||||
paths := []string{
|
||||
"config/testdata/dir/rules0-good.rules",
|
||||
@@ -108,6 +108,18 @@ func TestManagerUpdate(t *testing.T) {
|
||||
Name: "ExampleAlertAlwaysFiring",
|
||||
Expr: "sum by(job) (up == 1)",
|
||||
}
|
||||
ExampleAlertGraphite = &AlertingRule{
|
||||
Name: "up graphite",
|
||||
Expr: "filterSeries(time('host.1',20),'>','0')",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
For: defaultEvalInterval,
|
||||
}
|
||||
ExampleAlertGraphite2 = &AlertingRule{
|
||||
Name: "up",
|
||||
Expr: "filterSeries(time('host.2',20),'>','0')",
|
||||
Type: datasource.NewGraphiteType(),
|
||||
For: defaultEvalInterval,
|
||||
}
|
||||
)
|
||||
|
||||
testCases := []struct {
|
||||
@@ -124,6 +136,7 @@ func TestManagerUpdate(t *testing.T) {
|
||||
{
|
||||
File: "config/testdata/dir/rules1-good.rules",
|
||||
Name: "duplicatedGroupDiffFiles",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Interval: defaultEvalInterval,
|
||||
Rules: []Rule{
|
||||
&AlertingRule{
|
||||
@@ -148,12 +161,14 @@ func TestManagerUpdate(t *testing.T) {
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Rules: []Rule{VMRows},
|
||||
Interval: defaultEvalInterval,
|
||||
},
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Name: "TestGroup", Rules: []Rule{
|
||||
Conns,
|
||||
ExampleAlertAlwaysFiring,
|
||||
@@ -168,23 +183,66 @@ func TestManagerUpdate(t *testing.T) {
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Interval: defaultEvalInterval,
|
||||
Rules: []Rule{VMRows},
|
||||
},
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Name: "TestGroup", Rules: []Rule{
|
||||
Name: "TestGroup",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
Conns,
|
||||
ExampleAlertAlwaysFiring,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update empty dir rules from 0 to 2 groups",
|
||||
initPath: "config/testdata/empty/*",
|
||||
updatePath: "config/testdata/rules0-good.rules",
|
||||
want: []*Group{
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert",
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Interval: defaultEvalInterval,
|
||||
Rules: []Rule{VMRows},
|
||||
},
|
||||
{
|
||||
File: "config/testdata/rules0-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Type: datasource.NewPrometheusType(),
|
||||
Name: "TestGroup", Rules: []Rule{
|
||||
Conns,
|
||||
ExampleAlertAlwaysFiring,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update prometheus to graphite type",
|
||||
initPath: "config/testdata/dir/rules-update0-good.rules",
|
||||
updatePath: "config/testdata/dir/rules-update1-good.rules",
|
||||
want: []*Group{
|
||||
{
|
||||
File: "config/testdata/dir/rules-update1-good.rules",
|
||||
Interval: defaultEvalInterval,
|
||||
Type: datasource.NewGraphiteType(),
|
||||
Name: "TestUpdateGroup",
|
||||
Rules: []Rule{
|
||||
ExampleAlertGraphite2,
|
||||
ExampleAlertGraphite,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
m := &manager{groups: make(map[uint64]*Group), storage: &fakeQuerier{}}
|
||||
m := &manager{groups: make(map[uint64]*Group), querier: &fakeQuerier{}}
|
||||
path := []string{tc.initPath}
|
||||
if err := m.update(ctx, path, true, true, false); err != nil {
|
||||
t.Fatalf("failed to complete initial rules update: %s", err)
|
||||
|
||||
39
app/vmalert/metrics.go
Normal file
39
app/vmalert/metrics.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package main
|
||||
|
||||
import "github.com/VictoriaMetrics/metrics"
|
||||
|
||||
type gauge struct {
|
||||
name string
|
||||
*metrics.Gauge
|
||||
}
|
||||
|
||||
func getOrCreateGauge(name string, f func() float64) *gauge {
|
||||
return &gauge{
|
||||
name: name,
|
||||
Gauge: metrics.GetOrCreateGauge(name, f),
|
||||
}
|
||||
}
|
||||
|
||||
type counter struct {
|
||||
name string
|
||||
*metrics.Counter
|
||||
}
|
||||
|
||||
func getOrCreateCounter(name string) *counter {
|
||||
return &counter{
|
||||
name: name,
|
||||
Counter: metrics.GetOrCreateCounter(name),
|
||||
}
|
||||
}
|
||||
|
||||
type summary struct {
|
||||
name string
|
||||
*metrics.Summary
|
||||
}
|
||||
|
||||
func getOrCreateSummary(name string) *summary {
|
||||
return &summary{
|
||||
name: name,
|
||||
Summary: metrics.GetOrCreateSummary(name),
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
// Alert the triggered alert
|
||||
@@ -50,7 +52,8 @@ func (as AlertState) String() string {
|
||||
return "inactive"
|
||||
}
|
||||
|
||||
type alertTplData struct {
|
||||
// AlertTplData is used to execute templating
|
||||
type AlertTplData struct {
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
Expr string
|
||||
@@ -58,45 +61,53 @@ type alertTplData struct {
|
||||
|
||||
const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $expr := .Expr }}`
|
||||
|
||||
// ExecTemplate executes the Alert template for give
|
||||
// ExecTemplate executes the Alert template for given
|
||||
// map of annotations.
|
||||
func (a *Alert) ExecTemplate(annotations map[string]string) (map[string]string, error) {
|
||||
tplData := alertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
|
||||
return templateAnnotations(annotations, tplHeader, tplData)
|
||||
// Every alert could have a different datasource, so function
|
||||
// requires a queryFunction as an argument.
|
||||
func (a *Alert) ExecTemplate(q QueryFn, annotations map[string]string) (map[string]string, error) {
|
||||
tplData := AlertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
|
||||
return templateAnnotations(annotations, tplData, funcsWithQuery(q))
|
||||
}
|
||||
|
||||
// ExecTemplate executes the given template for given annotations map.
|
||||
func ExecTemplate(q QueryFn, annotations map[string]string, tpl AlertTplData) (map[string]string, error) {
|
||||
return templateAnnotations(annotations, tpl, funcsWithQuery(q))
|
||||
}
|
||||
|
||||
// ValidateTemplates validate annotations for possible template error, uses empty data for template population
|
||||
func ValidateTemplates(annotations map[string]string) error {
|
||||
_, err := templateAnnotations(annotations, tplHeader, alertTplData{
|
||||
_, err := templateAnnotations(annotations, AlertTplData{
|
||||
Labels: map[string]string{},
|
||||
Value: 0,
|
||||
})
|
||||
}, tmplFunc)
|
||||
return err
|
||||
}
|
||||
|
||||
func templateAnnotations(annotations map[string]string, header string, data alertTplData) (map[string]string, error) {
|
||||
func templateAnnotations(annotations map[string]string, data AlertTplData, funcs template.FuncMap) (map[string]string, error) {
|
||||
var builder strings.Builder
|
||||
var buf bytes.Buffer
|
||||
eg := errGroup{}
|
||||
eg := new(utils.ErrGroup)
|
||||
r := make(map[string]string, len(annotations))
|
||||
for key, text := range annotations {
|
||||
r[key] = text
|
||||
buf.Reset()
|
||||
builder.Reset()
|
||||
builder.Grow(len(header) + len(text))
|
||||
builder.WriteString(header)
|
||||
builder.Grow(len(tplHeader) + len(text))
|
||||
builder.WriteString(tplHeader)
|
||||
builder.WriteString(text)
|
||||
if err := templateAnnotation(&buf, builder.String(), data); err != nil {
|
||||
eg.errs = append(eg.errs, fmt.Sprintf("key %q, template %q: %s", key, text, err))
|
||||
if err := templateAnnotation(&buf, builder.String(), data, funcs); err != nil {
|
||||
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||
continue
|
||||
}
|
||||
r[key] = buf.String()
|
||||
}
|
||||
return r, eg.err()
|
||||
return r, eg.Err()
|
||||
}
|
||||
|
||||
func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
|
||||
tpl, err := template.New("").Funcs(tmplFunc).Option("missingkey=zero").Parse(text)
|
||||
func templateAnnotation(dst io.Writer, text string, data AlertTplData, funcs template.FuncMap) error {
|
||||
t := template.New("").Funcs(funcs).Option("missingkey=zero")
|
||||
tpl, err := t.Parse(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing annotation: %w", err)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ package notifier
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
)
|
||||
|
||||
func TestAlert_ExecTemplate(t *testing.T) {
|
||||
@@ -60,11 +62,41 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
"exprEscapedPath": "vm_rows%7B%5C%22label%5C%22=%5C%22bar%5C%22%7D%3E0",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "query",
|
||||
alert: &Alert{Expr: `vm_rows{"label"="bar"}>0`},
|
||||
annotations: map[string]string{
|
||||
"summary": `{{ query "foo" | first | value }}`,
|
||||
"desc": `{{ range query "bar" }}{{ . | label "foo" }} {{ . | value }};{{ end }}`,
|
||||
},
|
||||
expTpl: map[string]string{
|
||||
"summary": "1",
|
||||
"desc": "bar 1;garply 2;",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
qFn := func(q string) ([]datasource.Metric, error) {
|
||||
return []datasource.Metric{
|
||||
{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "foo", Value: "bar"},
|
||||
{Name: "baz", Value: "qux"},
|
||||
},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "foo", Value: "garply"},
|
||||
{Name: "baz", Value: "fred"},
|
||||
},
|
||||
Value: 2,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
tpl, err := tc.alert.ExecTemplate(tc.annotations)
|
||||
tpl, err := tc.alert.ExecTemplate(qFn, tc.annotations)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -12,9 +12,11 @@ import (
|
||||
// AlertManager represents integration provider with Prometheus alert manager
|
||||
// https://github.com/prometheus/alertmanager
|
||||
type AlertManager struct {
|
||||
alertURL string
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
alertURL string
|
||||
basicAuthUser string
|
||||
basicAuthPass string
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// Send an alert or resolve message
|
||||
@@ -26,8 +28,11 @@ func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Content-Type", "application/json; charset=utf-8")
|
||||
req = req.WithContext(ctx)
|
||||
if am.basicAuthPass != "" {
|
||||
req.SetBasicAuth(am.basicAuthUser, am.basicAuthPass)
|
||||
}
|
||||
resp, err := am.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -38,7 +43,7 @@ func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read response from %q: %s", am.alertURL, err)
|
||||
return fmt.Errorf("failed to read response from %q: %w", am.alertURL, err)
|
||||
}
|
||||
return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, am.alertURL, string(body))
|
||||
}
|
||||
@@ -51,10 +56,13 @@ type AlertURLGenerator func(Alert) string
|
||||
const alertManagerPath = "/api/v2/alerts"
|
||||
|
||||
// NewAlertManager is a constructor for AlertManager
|
||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, c *http.Client) *AlertManager {
|
||||
func NewAlertManager(alertManagerURL, user, pass string, fn AlertURLGenerator, c *http.Client) *AlertManager {
|
||||
addr := strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath
|
||||
return &AlertManager{
|
||||
alertURL: strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath,
|
||||
argFunc: fn,
|
||||
client: c,
|
||||
alertURL: addr,
|
||||
argFunc: fn,
|
||||
client: c,
|
||||
basicAuthUser: user,
|
||||
basicAuthPass: pass,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,12 +11,21 @@ import (
|
||||
)
|
||||
|
||||
func TestAlertManager_Send(t *testing.T) {
|
||||
const baUser, baPass = "foo", "bar"
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
|
||||
user, pass, ok := r.BasicAuth()
|
||||
if !ok {
|
||||
t.Errorf("unauthorized request")
|
||||
}
|
||||
if user != baUser || pass != baPass {
|
||||
t.Errorf("wrong creds %q:%q; expected %q:%q",
|
||||
user, pass, baUser, baPass)
|
||||
}
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("expected POST method got %s", r.Method)
|
||||
@@ -43,22 +52,22 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
t.Errorf("expected 1 alert in array got %d", len(a))
|
||||
}
|
||||
if a[0].GeneratorURL != "0/0" {
|
||||
t.Errorf("exptected 0/0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
t.Errorf("expected 0/0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
}
|
||||
if a[0].Labels["alertname"] != "alert0" {
|
||||
t.Errorf("exptected alert0 as alert name got %s", a[0].Labels["alertname"])
|
||||
t.Errorf("expected alert0 as alert name got %s", a[0].Labels["alertname"])
|
||||
}
|
||||
if a[0].StartsAt.IsZero() {
|
||||
t.Errorf("exptected non-zero start time")
|
||||
t.Errorf("expected non-zero start time")
|
||||
}
|
||||
if a[0].EndAt.IsZero() {
|
||||
t.Errorf("exptected non-zero end time")
|
||||
t.Errorf("expected non-zero end time")
|
||||
}
|
||||
}
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
am := NewAlertManager(srv.URL, func(alert Alert) string {
|
||||
am := NewAlertManager(srv.URL, baUser, baPass, func(alert Alert) string {
|
||||
return strconv.FormatUint(alert.GroupID, 10) + "/" + strconv.FormatUint(alert.ID, 10)
|
||||
}, srv.Client())
|
||||
if err := am.Send(context.Background(), []Alert{{}, {}}); err == nil {
|
||||
|
||||
45
app/vmalert/notifier/init.go
Normal file
45
app/vmalert/notifier/init.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
)
|
||||
|
||||
var (
|
||||
addrs = flagutil.NewArray("notifier.url", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
|
||||
basicAuthUsername = flagutil.NewArray("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArray("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("notifier.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to -notifier.url")
|
||||
tlsCertFile = flagutil.NewArray("notifier.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
tlsKeyFile = flagutil.NewArray("notifier.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
tlsCAFile = flagutil.NewArray("notifier.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flagutil.NewArray("notifier.tlsServerName", "Optional TLS server name to use for connections to -notifier.url. "+
|
||||
"By default the server name from -notifier.url is used")
|
||||
)
|
||||
|
||||
// Init creates a Notifier object based on provided flags.
|
||||
func Init(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
if len(*addrs) == 0 {
|
||||
return nil, fmt.Errorf("at least one `-notifier.url` must be set")
|
||||
}
|
||||
|
||||
var notifiers []Notifier
|
||||
for i, addr := range *addrs {
|
||||
cert, key := tlsCertFile.GetOptionalArg(i), tlsKeyFile.GetOptionalArg(i)
|
||||
ca, serverName := tlsCAFile.GetOptionalArg(i), tlsServerName.GetOptionalArg(i)
|
||||
tr, err := utils.Transport(addr, cert, key, ca, serverName, tlsInsecureSkipVerify.GetOptionalArg(i))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
user, pass := basicAuthUsername.GetOptionalArg(i), basicAuthPassword.GetOptionalArg(i)
|
||||
am := NewAlertManager(addr, user, pass, gen, &http.Client{Transport: tr})
|
||||
notifiers = append(notifiers, am)
|
||||
}
|
||||
|
||||
return notifiers, nil
|
||||
}
|
||||
@@ -14,21 +14,40 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
html_template "html/template"
|
||||
"math"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
text_template "text/template"
|
||||
"time"
|
||||
|
||||
htmlTpl "html/template"
|
||||
textTpl "text/template"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
)
|
||||
|
||||
var tmplFunc text_template.FuncMap
|
||||
// QueryFn is used to wrap a call to datasource into simple-to-use function
|
||||
// for templating functions.
|
||||
type QueryFn func(query string) ([]datasource.Metric, error)
|
||||
|
||||
// InitTemplateFunc returns template helper functions
|
||||
func funcsWithQuery(query QueryFn) textTpl.FuncMap {
|
||||
fm := make(textTpl.FuncMap)
|
||||
for k, fn := range tmplFunc {
|
||||
fm[k] = fn
|
||||
}
|
||||
fm["query"] = func(q string) ([]datasource.Metric, error) {
|
||||
return query(q)
|
||||
}
|
||||
return fm
|
||||
}
|
||||
|
||||
var tmplFunc textTpl.FuncMap
|
||||
|
||||
// InitTemplateFunc initiates template helper functions
|
||||
func InitTemplateFunc(externalURL *url.URL) {
|
||||
tmplFunc = text_template.FuncMap{
|
||||
tmplFunc = textTpl.FuncMap{
|
||||
"args": func(args ...interface{}) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
for i, a := range args {
|
||||
@@ -40,8 +59,8 @@ func InitTemplateFunc(externalURL *url.URL) {
|
||||
re := regexp.MustCompile(pattern)
|
||||
return re.ReplaceAllString(text, repl)
|
||||
},
|
||||
"safeHtml": func(text string) html_template.HTML {
|
||||
return html_template.HTML(text)
|
||||
"safeHtml": func(text string) htmlTpl.HTML {
|
||||
return htmlTpl.HTML(text)
|
||||
},
|
||||
"match": regexp.MatchString,
|
||||
"title": strings.Title,
|
||||
@@ -148,9 +167,33 @@ func InitTemplateFunc(externalURL *url.URL) {
|
||||
"queryEscape": func(q string) string {
|
||||
return url.QueryEscape(q)
|
||||
},
|
||||
"crlfEscape": func(q string) string {
|
||||
q = strings.Replace(q, "\n", `\n`, -1)
|
||||
return strings.Replace(q, "\r", `\r`, -1)
|
||||
},
|
||||
"quotesEscape": func(q string) string {
|
||||
return strings.Replace(q, `"`, `\"`, -1)
|
||||
},
|
||||
// query function supposed to be substituted at funcsWithQuery().
|
||||
// it is present here only for validation purposes, when there is no
|
||||
// provided datasource.
|
||||
"query": func(q string) ([]datasource.Metric, error) {
|
||||
// return non-empty slice to pass validation with chained functions in template
|
||||
// see issue #989 for details
|
||||
return []datasource.Metric{{}}, nil
|
||||
},
|
||||
"first": func(metrics []datasource.Metric) (datasource.Metric, error) {
|
||||
if len(metrics) > 0 {
|
||||
return metrics[0], nil
|
||||
}
|
||||
return datasource.Metric{}, errors.New("first() called on vector with no elements")
|
||||
},
|
||||
"label": func(label string, m datasource.Metric) string {
|
||||
return m.Label(label)
|
||||
},
|
||||
"value": func(m datasource.Metric) float64 {
|
||||
return m.Value
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type errGroup struct {
|
||||
errs []string
|
||||
}
|
||||
|
||||
func (eg *errGroup) err() error {
|
||||
if eg == nil || len(eg.errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return eg
|
||||
}
|
||||
|
||||
func (eg *errGroup) Error() string {
|
||||
return fmt.Sprintf("errors: %s", strings.Join(eg.errs, "\n"))
|
||||
}
|
||||
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
@@ -12,12 +11,14 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// RecordingRule is a Rule that supposed
|
||||
// to evaluate configured Expression and
|
||||
// return TimeSeries as result.
|
||||
type RecordingRule struct {
|
||||
Type datasource.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
@@ -32,6 +33,12 @@ type RecordingRule struct {
|
||||
// resets on every successful Exec
|
||||
// may be used as Health state
|
||||
lastExecError error
|
||||
|
||||
metrics *recordingRuleMetrics
|
||||
}
|
||||
|
||||
type recordingRuleMetrics struct {
|
||||
errors *gauge
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
@@ -45,17 +52,34 @@ func (rr *RecordingRule) ID() uint64 {
|
||||
return rr.RuleID
|
||||
}
|
||||
|
||||
func newRecordingRule(gID uint64, cfg config.Rule) *RecordingRule {
|
||||
return &RecordingRule{
|
||||
func newRecordingRule(group *Group, cfg config.Rule) *RecordingRule {
|
||||
rr := &RecordingRule{
|
||||
Type: cfg.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: gID,
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = getOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
rr.mu.Lock()
|
||||
defer rr.mu.Unlock()
|
||||
if rr.lastExecError == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
return rr
|
||||
}
|
||||
|
||||
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels")
|
||||
// Close unregisters rule metrics
|
||||
func (rr *RecordingRule) Close() {
|
||||
metrics.UnregisterMetric(rr.metrics.errors.name)
|
||||
}
|
||||
|
||||
// Exec executes RecordingRule expression via the given Querier.
|
||||
func (rr *RecordingRule) Exec(ctx context.Context, q datasource.Querier, series bool) ([]prompbmarshal.TimeSeries, error) {
|
||||
@@ -63,15 +87,14 @@ func (rr *RecordingRule) Exec(ctx context.Context, q datasource.Querier, series
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
qMetrics, err := q.Query(ctx, rr.Expr)
|
||||
|
||||
qMetrics, err := q.Query(ctx, rr.Expr, rr.Type)
|
||||
rr.mu.Lock()
|
||||
defer rr.mu.Unlock()
|
||||
|
||||
rr.lastExecTime = time.Now()
|
||||
rr.lastExecError = err
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute query %q: %s", rr.Expr, err)
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
|
||||
}
|
||||
|
||||
duplicates := make(map[uint64]prompbmarshal.TimeSeries, len(qMetrics))
|
||||
@@ -141,6 +164,7 @@ func (rr *RecordingRule) RuleAPI() APIRecordingRule {
|
||||
ID: fmt.Sprintf("%d", rr.ID()),
|
||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||
Name: rr.Name,
|
||||
Type: rr.Type.String(),
|
||||
Expression: rr.Expr,
|
||||
LastError: lastErr,
|
||||
LastExec: rr.lastExecTime,
|
||||
|
||||
39
app/vmalert/remoteread/init.go
Normal file
39
app/vmalert/remoteread/init.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package remoteread
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("remoteRead.url", "", "Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts"+
|
||||
" state. This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
|
||||
basicAuthPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteRead.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteRead.url")
|
||||
tlsCertFile = flag.String("remoteRead.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url")
|
||||
tlsKeyFile = flag.String("remoteRead.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url")
|
||||
tlsCAFile = flag.String("remoteRead.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteRead.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flag.String("remoteRead.tlsServerName", "", "Optional TLS server name to use for connections to -remoteRead.url. "+
|
||||
"By default the server name from -remoteRead.url is used")
|
||||
)
|
||||
|
||||
// Init creates a Querier from provided flag values.
|
||||
// Returns nil if addr flag wasn't set.
|
||||
func Init() (datasource.Querier, error) {
|
||||
if *addr == "" {
|
||||
return nil, nil
|
||||
}
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
c := &http.Client{Transport: tr}
|
||||
return datasource.NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, 0, 0, c), nil
|
||||
}
|
||||
54
app/vmalert/remotewrite/init.go
Normal file
54
app/vmalert/remotewrite/init.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("remoteWrite.url", "", "Optional URL to Victoria Metrics or VMInsert where to persist alerts state"+
|
||||
" and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
|
||||
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
|
||||
|
||||
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines defines max number of timeseries to be flushed at once")
|
||||
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote querier")
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
tlsCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
|
||||
tlsKeyFile = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
|
||||
tlsCAFile = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
)
|
||||
|
||||
// Init creates Client object from given flags.
|
||||
// Returns nil if addr flag wasn't set.
|
||||
func Init(ctx context.Context) (*Client, error) {
|
||||
if *addr == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
t, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
|
||||
return NewClient(ctx, Config{
|
||||
Addr: *addr,
|
||||
Concurrency: *concurrency,
|
||||
MaxQueueSize: *maxQueueSize,
|
||||
MaxBatchSize: *maxBatchSize,
|
||||
FlushInterval: *flushInterval,
|
||||
BasicAuthUser: *basicAuthUsername,
|
||||
BasicAuthPass: *basicAuthPassword,
|
||||
Transport: t,
|
||||
})
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
@@ -61,7 +62,7 @@ const (
|
||||
defaultConcurrency = 4
|
||||
defaultMaxBatchSize = 1e3
|
||||
defaultMaxQueueSize = 1e5
|
||||
defaultFlushInterval = time.Second
|
||||
defaultFlushInterval = 5 * time.Second
|
||||
defaultWriteTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
@@ -85,6 +86,9 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
if cfg.WriteTimeout == 0 {
|
||||
cfg.WriteTimeout = defaultWriteTimeout
|
||||
}
|
||||
if cfg.Transport == nil {
|
||||
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
|
||||
}
|
||||
c := &Client{
|
||||
c: &http.Client{
|
||||
Timeout: cfg.WriteTimeout,
|
||||
@@ -95,6 +99,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
baPass: cfg.BasicAuthPass,
|
||||
flushInterval: cfg.FlushInterval,
|
||||
maxBatchSize: cfg.MaxBatchSize,
|
||||
maxQueueSize: cfg.MaxQueueSize,
|
||||
doneCh: make(chan struct{}),
|
||||
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
|
||||
}
|
||||
@@ -137,14 +142,11 @@ func (c *Client) Close() error {
|
||||
|
||||
func (c *Client) run(ctx context.Context) {
|
||||
ticker := time.NewTicker(c.flushInterval)
|
||||
wr := prompbmarshal.WriteRequest{}
|
||||
wr := &prompbmarshal.WriteRequest{}
|
||||
shutdown := func() {
|
||||
for ts := range c.input {
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
}
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
||||
c.flush(lastCtx, wr)
|
||||
cancel()
|
||||
@@ -163,44 +165,82 @@ func (c *Client) run(ctx context.Context) {
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.flush(ctx, wr)
|
||||
wr = prompbmarshal.WriteRequest{}
|
||||
case ts := <-c.input:
|
||||
case ts, ok := <-c.input:
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||
c.flush(ctx, wr)
|
||||
wr = prompbmarshal.WriteRequest{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (c *Client) flush(ctx context.Context, wr prompbmarshal.WriteRequest) {
|
||||
var (
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
|
||||
)
|
||||
|
||||
// flush is a blocking function that marshals WriteRequest and sends
|
||||
// it to remote write endpoint. Flush performs limited amount of retries
|
||||
// if request fails.
|
||||
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
defer prompbmarshal.ResetWriteRequest(wr)
|
||||
|
||||
data, err := wr.Marshal()
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal WriteRequest: %s", err)
|
||||
return
|
||||
}
|
||||
req, err := http.NewRequest("POST", c.addr, bytes.NewReader(snappy.Encode(nil, data)))
|
||||
|
||||
const attempts = 5
|
||||
b := snappy.Encode(nil, data)
|
||||
for i := 0; i < attempts; i++ {
|
||||
err := c.send(ctx, b)
|
||||
if err == nil {
|
||||
sentRows.Add(len(wr.Timeseries))
|
||||
sentBytes.Add(len(b))
|
||||
return
|
||||
}
|
||||
|
||||
logger.Errorf("attempt %d to send request failed: %s", i+1, err)
|
||||
// sleeping to avoid remote db hammering
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
droppedRows.Add(len(wr.Timeseries))
|
||||
droppedBytes.Add(len(b))
|
||||
logger.Errorf("all %d attempts to send request failed - dropping %d timeseries",
|
||||
attempts, len(wr.Timeseries))
|
||||
}
|
||||
|
||||
func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
r := bytes.NewReader(data)
|
||||
req, err := http.NewRequest("POST", c.addr, r)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to create new HTTP request: %s", err)
|
||||
return
|
||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||
}
|
||||
if c.baPass != "" {
|
||||
req.SetBasicAuth(c.baUser, c.baPass)
|
||||
}
|
||||
resp, err := c.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
logger.Errorf("error getting response from %s:%s", req.URL, err)
|
||||
return
|
||||
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
|
||||
req.URL, err, len(data), r.Size())
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusNoContent {
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
logger.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL, body)
|
||||
return
|
||||
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||
resp.StatusCode, req.URL, body)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
102
app/vmalert/remotewrite/remotewrite_test.go
Normal file
102
app/vmalert/remotewrite/remotewrite_test.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func TestClient_Push(t *testing.T) {
|
||||
testSrv := newRWServer()
|
||||
cfg := Config{
|
||||
Addr: testSrv.URL,
|
||||
MaxBatchSize: 100,
|
||||
}
|
||||
client, err := NewClient(context.Background(), cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create client: %s", err)
|
||||
}
|
||||
const rowsN = 1e4
|
||||
var sent int
|
||||
for i := 0; i < rowsN; i++ {
|
||||
s := prompbmarshal.TimeSeries{
|
||||
Samples: []prompbmarshal.Sample{{
|
||||
Value: rand.Float64(),
|
||||
Timestamp: time.Now().Unix(),
|
||||
}},
|
||||
}
|
||||
err := client.Push(s)
|
||||
if err == nil {
|
||||
sent++
|
||||
}
|
||||
}
|
||||
if sent == 0 {
|
||||
t.Fatalf("0 series sent")
|
||||
}
|
||||
if err := client.Close(); err != nil {
|
||||
t.Fatalf("failed to close client: %s", err)
|
||||
}
|
||||
got := testSrv.accepted()
|
||||
if got != sent {
|
||||
t.Fatalf("expected to have %d series; got %d", sent, got)
|
||||
}
|
||||
}
|
||||
|
||||
func newRWServer() *rwServer {
|
||||
rw := &rwServer{}
|
||||
rw.Server = httptest.NewServer(http.HandlerFunc(rw.handler))
|
||||
return rw
|
||||
}
|
||||
|
||||
type rwServer struct {
|
||||
// WARN: ordering of fields is important for alignment!
|
||||
// see https://golang.org/pkg/sync/atomic/#pkg-note-BUG
|
||||
acceptedRows uint64
|
||||
*httptest.Server
|
||||
}
|
||||
|
||||
func (rw *rwServer) accepted() int {
|
||||
return int(atomic.LoadUint64(&rw.acceptedRows))
|
||||
}
|
||||
|
||||
func (rw *rwServer) err(w http.ResponseWriter, err error) {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
w.Write([]byte(err.Error()))
|
||||
}
|
||||
|
||||
func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
rw.err(w, fmt.Errorf("bad method %q", r.Method))
|
||||
return
|
||||
}
|
||||
data, err := ioutil.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
rw.err(w, fmt.Errorf("body read err: %w", err))
|
||||
return
|
||||
}
|
||||
defer func() { _ = r.Body.Close() }()
|
||||
|
||||
b, err := snappy.Decode(nil, data)
|
||||
if err != nil {
|
||||
rw.err(w, fmt.Errorf("decode err: %w", err))
|
||||
return
|
||||
}
|
||||
wr := &prompb.WriteRequest{}
|
||||
if err := wr.Unmarshal(b); err != nil {
|
||||
rw.err(w, fmt.Errorf("unmarhsal err: %w", err))
|
||||
return
|
||||
}
|
||||
atomic.AddUint64(&rw.acceptedRows, uint64(len(wr.Timeseries)))
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
@@ -21,4 +22,9 @@ type Rule interface {
|
||||
// UpdateWith performs modification of current Rule
|
||||
// with fields of the given Rule.
|
||||
UpdateWith(Rule) error
|
||||
// Close performs the shutdown procedures for rule
|
||||
// such as metrics unregister
|
||||
Close()
|
||||
}
|
||||
|
||||
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels")
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func newTimeSeries(value float64, labels map[string]string, timestamp time.Time) prompbmarshal.TimeSeries {
|
||||
|
||||
43
app/vmalert/utils/err_group.go
Normal file
43
app/vmalert/utils/err_group.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ErrGroup accumulates multiple errors
|
||||
// and produces single error message.
|
||||
type ErrGroup struct {
|
||||
errs []error
|
||||
}
|
||||
|
||||
// Add adds a new error to group.
|
||||
// Isn't thread-safe.
|
||||
func (eg *ErrGroup) Add(err error) {
|
||||
eg.errs = append(eg.errs, err)
|
||||
}
|
||||
|
||||
// Err checks if group contains at least
|
||||
// one error.
|
||||
func (eg *ErrGroup) Err() error {
|
||||
if eg == nil || len(eg.errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return eg
|
||||
}
|
||||
|
||||
// Error satisfies Error interface
|
||||
func (eg *ErrGroup) Error() string {
|
||||
if len(eg.errs) == 0 {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
|
||||
for i, err := range eg.errs {
|
||||
b.WriteString(err.Error())
|
||||
if i != len(eg.errs)-1 {
|
||||
b.WriteString("\n")
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
38
app/vmalert/utils/err_group_test.go
Normal file
38
app/vmalert/utils/err_group_test.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestErrGroup(t *testing.T) {
|
||||
testCases := []struct {
|
||||
errs []error
|
||||
exp string
|
||||
}{
|
||||
{nil, ""},
|
||||
{[]error{errors.New("timeout")}, "errors(1): timeout"},
|
||||
{
|
||||
[]error{errors.New("timeout"), errors.New("deadline")},
|
||||
"errors(2): timeout\ndeadline",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
eg := new(ErrGroup)
|
||||
for _, err := range tc.errs {
|
||||
eg.Add(err)
|
||||
}
|
||||
if len(tc.errs) == 0 {
|
||||
if eg.Err() != nil {
|
||||
t.Fatalf("expected to get nil error")
|
||||
}
|
||||
continue
|
||||
}
|
||||
if eg.Err() == nil {
|
||||
t.Fatalf("expected to get non-nil error")
|
||||
}
|
||||
if eg.Error() != tc.exp {
|
||||
t.Fatalf("expected to have: \n%q\ngot:\n%q", tc.exp, eg.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
58
app/vmalert/utils/tls.go
Normal file
58
app/vmalert/utils/tls.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Transport creates http.Transport object based on provided URL.
|
||||
// Returns Transport with TLS configuration if URL contains `https` prefix
|
||||
func Transport(URL, certFile, keyFile, CAFile, serverName string, insecureSkipVerify bool) (*http.Transport, error) {
|
||||
t := http.DefaultTransport.(*http.Transport).Clone()
|
||||
if !strings.HasPrefix(URL, "https") {
|
||||
return t, nil
|
||||
}
|
||||
tlsCfg, err := TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t.TLSClientConfig = tlsCfg
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// TLSConfig creates tls.Config object from provided arguments
|
||||
func TLSConfig(certFile, keyFile, CAFile, serverName string, insecureSkipVerify bool) (*tls.Config, error) {
|
||||
var certs []tls.Certificate
|
||||
if certFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(certFile, keyFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load TLS certificate from `cert_file`=%q, `key_file`=%q: %w", certFile, keyFile, err)
|
||||
}
|
||||
|
||||
certs = []tls.Certificate{cert}
|
||||
}
|
||||
|
||||
var rootCAs *x509.CertPool
|
||||
if CAFile != "" {
|
||||
pem, err := ioutil.ReadFile(CAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read `ca_file` %q: %w", CAFile, err)
|
||||
}
|
||||
|
||||
rootCAs = x509.NewCertPool()
|
||||
if !rootCAs.AppendCertsFromPEM(pem) {
|
||||
return nil, fmt.Errorf("cannot parse data from `ca_file` %q", CAFile)
|
||||
}
|
||||
}
|
||||
|
||||
return &tls.Config{
|
||||
Certificates: certs,
|
||||
InsecureSkipVerify: insecureSkipVerify,
|
||||
RootCAs: rootCAs,
|
||||
ServerName: serverName,
|
||||
}, nil
|
||||
}
|
||||
52
app/vmalert/utils/tls_test.go
Normal file
52
app/vmalert/utils/tls_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package utils
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTLSConfig(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
serverName = "test"
|
||||
insecureSkipVerify = true
|
||||
tlsCfg, err := TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tlsCfg == nil {
|
||||
t.Errorf("expected tlsConfig to be set, got nil")
|
||||
}
|
||||
if tlsCfg.ServerName != serverName {
|
||||
t.Errorf("unexpected ServerName, want %s, got %s", serverName, tlsCfg.ServerName)
|
||||
}
|
||||
if tlsCfg.InsecureSkipVerify != insecureSkipVerify {
|
||||
t.Errorf("unexpected InsecureSkipVerify, want %v, got %v", insecureSkipVerify, tlsCfg.InsecureSkipVerify)
|
||||
}
|
||||
certFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected keypair error, got nil")
|
||||
}
|
||||
certFile = ""
|
||||
CAFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected read error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
URL := "http://victoriametrics.com"
|
||||
_, err := Transport(URL, certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
URL = "https://victoriametrics.com"
|
||||
tr, err := Transport(URL, certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tr.TLSClientConfig == nil {
|
||||
t.Errorf("expected TLSClientConfig to be set, got nil")
|
||||
}
|
||||
}
|
||||
@@ -27,7 +27,6 @@ var pathList = [][]string{
|
||||
}
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
resph := responseHandler{w}
|
||||
switch r.URL.Path {
|
||||
case "/":
|
||||
for _, path := range pathList {
|
||||
@@ -36,10 +35,22 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return true
|
||||
case "/api/v1/groups":
|
||||
resph.handle(rh.listGroups())
|
||||
data, err := rh.listGroups()
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/api/v1/alerts":
|
||||
resph.handle(rh.listAlerts())
|
||||
data, err := rh.listAlerts()
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/-/reload":
|
||||
logger.Infof("api config reload was called, sending sighup")
|
||||
@@ -47,12 +58,18 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
default:
|
||||
if !strings.HasSuffix(r.URL.Path, "/status") {
|
||||
return false
|
||||
}
|
||||
// /api/v1/<groupName>/<alertID>/status
|
||||
if strings.HasSuffix(r.URL.Path, "/status") {
|
||||
resph.handle(rh.alert(r.URL.Path))
|
||||
data, err := rh.alert(r.URL.Path)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
w.Write(data)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +97,7 @@ func (rh *requestHandler) listGroups() ([]byte, error) {
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %s`, err),
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
@@ -117,7 +134,7 @@ func (rh *requestHandler) listAlerts() ([]byte, error) {
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %s`, err),
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
@@ -138,11 +155,11 @@ func (rh *requestHandler) alert(path string) ([]byte, error) {
|
||||
|
||||
groupID, err := uint64FromPath(parts[0])
|
||||
if err != nil {
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse groupID: %s`, err))
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse groupID: %w`, err))
|
||||
}
|
||||
alertID, err := uint64FromPath(parts[1])
|
||||
if err != nil {
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse alertID: %s`, err))
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse alertID: %w`, err))
|
||||
}
|
||||
resp, err := rh.m.AlertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
@@ -151,18 +168,6 @@ func (rh *requestHandler) alert(path string) ([]byte, error) {
|
||||
return json.Marshal(resp)
|
||||
}
|
||||
|
||||
// responseHandler wrapper on http.ResponseWriter with sugar
|
||||
type responseHandler struct{ http.ResponseWriter }
|
||||
|
||||
func (w responseHandler) handle(b []byte, err error) {
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, "%s", err)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(b)
|
||||
}
|
||||
|
||||
func uint64FromPath(path string) (uint64, error) {
|
||||
s := strings.TrimRight(path, "/")
|
||||
return strconv.ParseUint(s, 10, 0)
|
||||
|
||||
@@ -21,6 +21,7 @@ type APIAlert struct {
|
||||
// APIGroup represents Group for WEB view
|
||||
type APIGroup struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
ID string `json:"id"`
|
||||
File string `json:"file"`
|
||||
Interval string `json:"interval"`
|
||||
@@ -33,6 +34,7 @@ type APIGroup struct {
|
||||
type APIAlertingRule struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
GroupID string `json:"group_id"`
|
||||
Expression string `json:"expression"`
|
||||
For string `json:"for"`
|
||||
@@ -46,6 +48,7 @@ type APIAlertingRule struct {
|
||||
type APIRecordingRule struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
GroupID string `json:"group_id"`
|
||||
Expression string `json:"expression"`
|
||||
LastError string `json:"last_error"`
|
||||
|
||||
@@ -58,19 +58,22 @@ run-vmauth:
|
||||
$(MAKE) run-via-docker
|
||||
|
||||
vmauth-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-amd64 ./app/vmauth
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-arm ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-arm64 ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-ppc64le ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-386 ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-local-with-goarch:
|
||||
APP_NAME=vmauth $(MAKE) app-local-with-goarch
|
||||
|
||||
vmauth-pure:
|
||||
APP_NAME=vmauth $(MAKE) app-local-pure
|
||||
|
||||
@@ -5,7 +5,7 @@ It reads username and password from [Basic Auth headers](https://en.wikipedia.or
|
||||
matches them against configs pointed by `-auth.config` command-line flag and proxies incoming HTTP requests to the configured per-user `url_prefix` on successful match.
|
||||
|
||||
|
||||
### Quick start
|
||||
## Quick start
|
||||
|
||||
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
|
||||
and pass the following flag to `vmauth` binary in order to start authorizing and routing requests:
|
||||
@@ -26,7 +26,7 @@ Pass `-help` to `vmauth` in order to see all the supported command-line flags wi
|
||||
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML, accounting, limits, etc.
|
||||
|
||||
|
||||
### Auth config
|
||||
## Auth config
|
||||
|
||||
Auth config is represented in the following simple `yml` format:
|
||||
|
||||
@@ -46,7 +46,7 @@ users:
|
||||
url_prefix: "http://localhost:8428"
|
||||
|
||||
# The user for querying account 123 in VictoriaMetrics cluster
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
|
||||
# See https://victoriametrics.github.io/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://vmselect:8481/select/123/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://vmselect:8481/select/123/prometheus/api/v1/select
|
||||
@@ -55,7 +55,7 @@ users:
|
||||
url_prefix: "http://vmselect:8481/select/123/prometheus"
|
||||
|
||||
# The user for inserting Prometheus data into VictoriaMetrics cluster under account 42
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
|
||||
# See https://victoriametrics.github.io/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the reuqests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://vminsert:8480/insert/42/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/write is routed to http://vminsert:8480/insert/42/prometheus/api/v1/write
|
||||
@@ -64,8 +64,11 @@ users:
|
||||
url_prefix: "http://vminsert:8480/insert/42/prometheus"
|
||||
```
|
||||
|
||||
The config may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
This may be useful for passing secrets to the config.
|
||||
|
||||
### Security
|
||||
|
||||
## Security
|
||||
|
||||
Do not transfer Basic Auth headers in plaintext over untrusted networks. Enable https. This can be done by passing the following `-tls*` command-line flags to `vmauth`:
|
||||
|
||||
@@ -81,44 +84,44 @@ Do not transfer Basic Auth headers in plaintext over untrusted networks. Enable
|
||||
Alternatively, [https termination proxy](https://en.wikipedia.org/wiki/TLS_termination_proxy) may be put in front of `vmauth`.
|
||||
|
||||
|
||||
### Monitoring
|
||||
## Monitoring
|
||||
|
||||
`vmauth` exports various metrics in Prometheus exposition format at `http://vmauth-host:8427/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
either via [vmagent](https://victoriametrics.github.io/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
|
||||
### How to build from sources
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmauth` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmauth` from the root folder of the repository.
|
||||
It builds `vmauth` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmauth-prod` from the root folder of the repository.
|
||||
It builds `vmauth-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Building docker images
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmauth`. It builds `victoriametrics/vmauth:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmauth`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmauth
|
||||
ROOT_IMAGE=scratch make package-vmauth
|
||||
```
|
||||
|
||||
|
||||
### Profiling
|
||||
## Profiling
|
||||
|
||||
`vmauth` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
@@ -137,3 +140,68 @@ curl -s http://<vmauth-host>:8427/debug/pprof/profile > cpu.pprof
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
|
||||
|
||||
## Advanced usage
|
||||
|
||||
Pass `-help` command-line arg to `vmauth` in order to see all the configuration options:
|
||||
|
||||
```
|
||||
./vmauth -help
|
||||
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://victoriametrics.github.io/vmauth.html .
|
||||
|
||||
-auth.config string
|
||||
Path to auth config. See https://victoriametrics.github.io/vmauth.html for details on the format of this auth config
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help spreading incoming load among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for graceful shutdown of HTTP server. Highly loaded server may require increased value for graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this dealy the servier returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections (default ":8427")
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-memory.allowedBytes value
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof. It overrides httpAuth settings
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -16,7 +17,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
authConfigPath = flag.String("auth.config", "", "Path to auth config. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md "+
|
||||
authConfigPath = flag.String("auth.config", "", "Path to auth config. See https://victoriametrics.github.io/vmauth.html "+
|
||||
"for details on the format of this auth config")
|
||||
)
|
||||
|
||||
@@ -82,20 +83,21 @@ var stopCh chan struct{}
|
||||
func readAuthConfig(path string) (map[string]*UserInfo, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read %q: %s", path, err)
|
||||
return nil, fmt.Errorf("cannot read %q: %w", path, err)
|
||||
}
|
||||
m, err := parseAuthConfig(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse %q: %s", path, err)
|
||||
return nil, fmt.Errorf("cannot parse %q: %w", path, err)
|
||||
}
|
||||
logger.Infof("Loaded information about %d users from %q", len(m), path)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
data = envtemplate.Replace(data)
|
||||
var ac AuthConfig
|
||||
if err := yaml.UnmarshalStrict(data, &ac); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %s", err)
|
||||
return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %w", err)
|
||||
}
|
||||
uis := ac.Users
|
||||
if len(uis) == 0 {
|
||||
@@ -115,7 +117,7 @@ func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
// Validate urlPrefix
|
||||
target, err := url.Parse(urlPrefix)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid `url_prefix: %q`: %s", urlPrefix, err)
|
||||
return nil, fmt.Errorf("invalid `url_prefix: %q`: %w", urlPrefix, err)
|
||||
}
|
||||
if target.Scheme != "http" && target.Scheme != "https" {
|
||||
return nil, fmt.Errorf("unsupported scheme for `url_prefix: %q`: %q; must be `http` or `https`", urlPrefix, target.Scheme)
|
||||
|
||||
@@ -12,7 +12,7 @@ users:
|
||||
url_prefix: "http://localhost:8428"
|
||||
|
||||
# The user for querying account 123 in VictoriaMetrics cluster
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
|
||||
# See https://victoriametrics.github.io/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://vmselect:8481/select/123/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://vmselect:8481/select/123/prometheus/api/v1/select
|
||||
@@ -21,7 +21,7 @@ users:
|
||||
url_prefix: "http://vmselect:8481/select/123/prometheus"
|
||||
|
||||
# The user for inserting Prometheus data into VictoriaMetrics cluster under account 42
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
|
||||
# See https://victoriametrics.github.io/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the reuqests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://vminsert:8480/insert/42/prometheus .
|
||||
# For example, http://vmauth:8427/api/v1/write is routed to http://vminsert:8480/insert/42/prometheus/api/v1/write
|
||||
|
||||
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
@@ -11,6 +10,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
@@ -49,20 +49,21 @@ func main() {
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
username, password, ok := r.BasicAuth()
|
||||
if !ok {
|
||||
httpserver.Errorf(w, "Missing `Authorization: Basic *` header")
|
||||
w.Header().Set("WWW-Authenticate", `Basic realm="Restricted"`)
|
||||
http.Error(w, "missing `Authorization: Basic *` header", http.StatusUnauthorized)
|
||||
return true
|
||||
}
|
||||
ac := authConfig.Load().(map[string]*UserInfo)
|
||||
info := ac[username]
|
||||
if info == nil || info.Password != password {
|
||||
httpserver.Errorf(w, "Cannot find the provided username %q or password in config", username)
|
||||
httpserver.Errorf(w, r, "cannot find the provided username %q or password in config", username)
|
||||
return true
|
||||
}
|
||||
info.requests.Inc()
|
||||
|
||||
targetURL := createTargetURL(info.URLPrefix, r.URL)
|
||||
if _, err := url.Parse(targetURL); err != nil {
|
||||
httpserver.Errorf(w, "Invalid targetURL=%q: %s", targetURL, err)
|
||||
httpserver.Errorf(w, r, "invalid targetURL=%q: %s", targetURL, err)
|
||||
return true
|
||||
}
|
||||
r.Header.Set("vm-target-url", targetURL)
|
||||
@@ -95,10 +96,7 @@ func usage() {
|
||||
const s = `
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md .
|
||||
See the docs at https://victoriametrics.github.io/vmauth.html .
|
||||
`
|
||||
|
||||
f := flag.CommandLine.Output()
|
||||
fmt.Fprintf(f, "%s\n", s)
|
||||
flag.PrintDefaults()
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
@@ -51,20 +51,23 @@ package-vmbackup-386:
|
||||
publish-vmbackup:
|
||||
APP_NAME=vmbackup $(MAKE) publish-via-docker
|
||||
|
||||
vmbackup-pure:
|
||||
APP_NAME=vmbackup $(MAKE) app-local-pure
|
||||
|
||||
vmbackup-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-amd64 ./app/vmbackup
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm64 ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-ppc64le ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-386 ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-local-with-goarch:
|
||||
APP_NAME=vmbackup $(MAKE) app-local-with-goarch
|
||||
|
||||
vmbackup-pure:
|
||||
APP_NAME=vmbackup $(MAKE) app-local-pure
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
## vmbackup
|
||||
|
||||
`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
|
||||
`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
|
||||
Supported storage systems for backups:
|
||||
|
||||
* [GCS](https://cloud.google.com/storage/). Example: `gcs://<bucket>/<path/to/backup>`
|
||||
* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/docs/mimic/radosgw/s3/) or [Swift](https://www.swiftstack.com/docs/admin/middleware/s3_middleware.html). See `-customS3Endpoint` command-line flag.
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/docs/mimic/radosgw/s3/) or [Swift](https://www.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
|
||||
* Local filesystem. Example: `fs://</absolute/path/to/backup>`
|
||||
|
||||
Incremental backups and full backups are supported. Incremental backups are created automatically if the destination path already contains data from the previous backup.
|
||||
`vmbackup` supports incremental and full backups. Incremental backups created automatically if the destination path already contains data from the previous backup.
|
||||
Full backups can be sped up with `-origin` pointing to already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
|
||||
data between the existing backup and new backup. This saves time and costs on data transfer.
|
||||
data between the existing backup and new backup. It saves time and costs on data transfer.
|
||||
|
||||
Backup process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmbackup` with the same args.
|
||||
|
||||
Backed up data can be restored with [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md).
|
||||
Backed up data can be restored with [vmrestore](https://victoriametrics.github.io/vmrestore.html).
|
||||
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
|
||||
@@ -23,9 +23,9 @@ See also [vmbackuper](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
|
||||
|
||||
### Use cases
|
||||
## Use cases
|
||||
|
||||
#### Regular backups
|
||||
### Regular backups
|
||||
|
||||
Regular backup can be performed with the following command:
|
||||
|
||||
@@ -34,13 +34,13 @@ vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-
|
||||
```
|
||||
|
||||
* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
|
||||
There is no need to stop VictoriaMetrics for creating backups, since they are performed from immutable [instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
|
||||
* `<local-snapshot>` is the snapshot to backup. See [how to create instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
|
||||
* `<bucket>` is already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
|
||||
There is no need to stop VictoriaMetrics for creating backups, since they are performed from immutable [instant snapshots](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
* `<local-snapshot>` is the snapshot to back up. See [how to create instant snapshots](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
* `<bucket>` is an already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
|
||||
* `<path/to/new/backup>` is the destination path where new backup will be placed.
|
||||
|
||||
|
||||
#### Regular backups with server-side copy from existing backup
|
||||
### Regular backups with server-side copy from existing backup
|
||||
|
||||
If the destination GCS bucket already contains the previous backup at `-origin` path, then new backup can be sped up
|
||||
with the following command:
|
||||
@@ -49,20 +49,20 @@ with the following command:
|
||||
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/new/backup> -origin=gcs://<bucket>/<path/to/existing/backup>
|
||||
```
|
||||
|
||||
This saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
|
||||
It saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
|
||||
|
||||
|
||||
#### Incremental backups
|
||||
### Incremental backups
|
||||
|
||||
Incremental backups are performed if `-dst` points to already existing backup. In this case only new data is uploaded to remote storage.
|
||||
This saves time and network bandwidth costs when working with big backups:
|
||||
Incremental backups performed if `-dst` points to an already existing backup. In this case only new data uploaded to remote storage.
|
||||
It saves time and network bandwidth costs when working with big backups:
|
||||
|
||||
```
|
||||
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/existing/backup>
|
||||
```
|
||||
|
||||
|
||||
#### Smart backups
|
||||
### Smart backups
|
||||
|
||||
Smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
|
||||
|
||||
@@ -72,7 +72,7 @@ Smart backups mean storing full daily backups into `YYYYMMDD` folders and creati
|
||||
vmbackup -snapshotName=<latest-snapshot> -dst=gcs://<bucket>/latest
|
||||
```
|
||||
|
||||
Where `<latest-snapshot>` is the latest [snapshot](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
|
||||
Where `<latest-snapshot>` is the latest [snapshot](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
The command will upload only changed data to `gcs://<bucket>/latest`.
|
||||
|
||||
* Run the following command once a day:
|
||||
@@ -92,7 +92,7 @@ Do not forget removing old snapshots and backups when they are no longer needed
|
||||
See also [vmbackuper tool](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for automating smart backups.
|
||||
|
||||
|
||||
### How does it work?
|
||||
## How does it work?
|
||||
|
||||
The backup algorithm is the following:
|
||||
|
||||
@@ -100,16 +100,16 @@ The backup algorithm is the following:
|
||||
2. Determine files in `-dst`, which are missing in `-snapshotName`, and delete them. These are usually small files, which are already merged into bigger files in the snapshot.
|
||||
3. Determine files from `-snapshotName`, which are missing in `-dst`. These are usually small new files and bigger merged files.
|
||||
4. Determine files from step 3, which exist in the `-origin`, and perform server-side copy of these files from `-origin` to `-dst`.
|
||||
This are usually the biggest and the oldest files, which are shared between backups.
|
||||
5. Upload the remaining files from setp 3 from `-snapshotName` to `-dst`.
|
||||
These are usually the biggest and the oldest files, which are shared between backups.
|
||||
5. Upload the remaining files from step 3 from `-snapshotName` to `-dst`.
|
||||
|
||||
The algorithm splits source files into 100MB chunks in the backup. Each chunk is stored as a separate file in the backup.
|
||||
The algorithm splits source files into 100 MB chunks in the backup. Each chunk stored as a separate file in the backup.
|
||||
Such splitting minimizes the amounts of data to re-transfer after temporary errors.
|
||||
|
||||
`vmbackup` relies on [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) properties:
|
||||
|
||||
- All the files in the snapshot are immutable.
|
||||
- Old files are periodically merged into new files.
|
||||
- Old files periodically merged into new files.
|
||||
- Smaller files have higher probability to be merged.
|
||||
- Consecutive snapshots share many identical files.
|
||||
|
||||
@@ -118,18 +118,56 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
`vmbackup` can work improperly or slowly when these properties are violated.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
## Troubleshooting
|
||||
|
||||
* If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
|
||||
* If `vmbackup` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
|
||||
* If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process.
|
||||
* Backups created from [single-node VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md) cannot be restored
|
||||
at [cluster VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md) and vice versa.
|
||||
* Backups created from [single-node VictoriaMetrics](https://victoriametrics.github.io/Single-server-VictoriaMetrics.html) cannot be restored
|
||||
at [cluster VictoriaMetrics](https://victoriametrics.github.io/Cluster-VictoriaMetrics.html) and vice versa.
|
||||
|
||||
|
||||
### Advanced usage
|
||||
## Advanced usage
|
||||
|
||||
Run `vmbackup -help` in order to see all the available options:
|
||||
|
||||
* Obtaining credentials from a file.
|
||||
|
||||
Add flag `-credsFilePath=/etc/credentials` with the following content:
|
||||
|
||||
for s3 (aws, minio or other s3 compatible storages):
|
||||
```bash
|
||||
[default]
|
||||
aws_access_key_id=theaccesskey
|
||||
aws_secret_access_key=thesecretaccesskeyvalue
|
||||
```
|
||||
|
||||
for gce cloud storage:
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "project-id",
|
||||
"private_key_id": "key-id",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nprivate-key\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "service-account-email",
|
||||
"client_id": "client-id",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://accounts.google.com/o/oauth2/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account-email"
|
||||
}
|
||||
```
|
||||
|
||||
* Usage with s3 custom url endpoint. It is possible to use `vmbackup` with s3 compatible storages like minio, cloudian, etc.
|
||||
You have to add a custom url endpoint via flag:
|
||||
```
|
||||
# for minio
|
||||
-customS3Endpoint=http://localhost:9000
|
||||
|
||||
# for aws gov region
|
||||
-customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com
|
||||
```
|
||||
|
||||
* Run `vmbackup -help` in order to see all the available options:
|
||||
|
||||
```
|
||||
-concurrency int
|
||||
@@ -138,7 +176,7 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-configProfile string
|
||||
Profile name for S3 configs (default "default")
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used
|
||||
-credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
@@ -152,25 +190,31 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond int
|
||||
-maxBytesPerSecond value
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedBytes value
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
Supports the following optional suffixes for values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-origin string
|
||||
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
|
||||
-snapshot.createURL string
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snaphsot/create
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup. Example: http://victoriametrics:8428/snaphsot/create
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted.Example: http://victoriametrics:8428/snaphsot/delete
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. All created snaphosts will be automatically deleted. Example: http://victoriametrics:8428/snaphsot/delete
|
||||
-snapshotName string
|
||||
Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots
|
||||
Name for the snapshot to backup. See https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots
|
||||
-storageDataPath string
|
||||
Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
|
||||
-version
|
||||
@@ -178,32 +222,32 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
```
|
||||
|
||||
|
||||
### How to build from sources
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - see `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmbackup` from the root folder of the repository.
|
||||
It builds `vmbackup` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmbackup-prod` from the root folder of the repository.
|
||||
It builds `vmbackup-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Building docker images
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmbackup
|
||||
ROOT_IMAGE=scratch make package-vmbackup
|
||||
```
|
||||
|
||||
@@ -10,24 +10,26 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/actions"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fsnil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
var (
|
||||
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage")
|
||||
snapshotName = flag.String("snapshotName", "", "Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots")
|
||||
snapshotCreateURL = flag.String("snapshot.createURL", "", "VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup."+
|
||||
snapshotName = flag.String("snapshotName", "", "Name for the snapshot to backup. See https://victoriametrics.github.io/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots")
|
||||
snapshotCreateURL = flag.String("snapshot.createURL", "", "VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup. "+
|
||||
"Example: http://victoriametrics:8428/snaphsot/create")
|
||||
snapshotDeleteURL = flag.String("snapshot.deleteURL", "", "VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted."+
|
||||
"Example: http://victoriametrics:8428/snaphsot/delete")
|
||||
snapshotDeleteURL = flag.String("snapshot.deleteURL", "", "VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. "+
|
||||
"All created snaphosts will be automatically deleted. Example: http://victoriametrics:8428/snaphsot/delete")
|
||||
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
|
||||
"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir\n"+
|
||||
"-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded")
|
||||
origin = flag.String("origin", "", "Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups")
|
||||
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce backup duration")
|
||||
maxBytesPerSecond = flag.Int("maxBytesPerSecond", 0, "The maximum upload speed. There is no limit if it is set to 0")
|
||||
maxBytesPerSecond = flagutil.NewBytes("maxBytesPerSecond", 0, "The maximum upload speed. There is no limit if it is set to 0")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -36,9 +38,10 @@ func main() {
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
if len(*snapshotCreateURL) > 0 {
|
||||
logger.Infof("%s", "Snapshots enabled")
|
||||
logger.Infof("Snapshots enabled")
|
||||
logger.Infof("Snapshot create url %s", *snapshotCreateURL)
|
||||
if len(*snapshotDeleteURL) <= 0 {
|
||||
err := flag.Set("snapshot.deleteURL", strings.Replace(*snapshotCreateURL, "/create", "/delete", 1))
|
||||
@@ -50,17 +53,17 @@ func main() {
|
||||
|
||||
name, err := snapshot.Create(*snapshotCreateURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("%s", err)
|
||||
logger.Fatalf("cannot create snapshot: %s", err)
|
||||
}
|
||||
err = flag.Set("snapshotName", name)
|
||||
if err != nil {
|
||||
logger.Fatalf("Failed to set snapshotName flag: %v", err)
|
||||
logger.Fatalf("cannot set snapshotName flag: %v", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := snapshot.Delete(*snapshotDeleteURL, name)
|
||||
if err != nil {
|
||||
logger.Fatalf("%s", err)
|
||||
logger.Fatalf("cannot delete snapshot: %s", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -86,6 +89,9 @@ func main() {
|
||||
if err := a.Run(); err != nil {
|
||||
logger.Fatalf("cannot create backup: %s", err)
|
||||
}
|
||||
srcFS.MustStop()
|
||||
dstFS.MustStop()
|
||||
originFS.MustStop()
|
||||
}
|
||||
|
||||
func usage() {
|
||||
@@ -93,12 +99,9 @@ func usage() {
|
||||
vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3
|
||||
or local filesystem. Backed up data can be restored with vmrestore.
|
||||
|
||||
See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md .
|
||||
See the docs at https://victoriametrics.github.io/vbackup.html .
|
||||
`
|
||||
|
||||
f := flag.CommandLine.Output()
|
||||
fmt.Fprintf(f, "%s\n", s)
|
||||
flag.PrintDefaults()
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
func newSrcFS() (*fslocal.FS, error) {
|
||||
@@ -110,12 +113,12 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
// Verify the snapshot exists.
|
||||
f, err := os.Open(snapshotPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot open snapshot at %q: %s", snapshotPath, err)
|
||||
return nil, fmt.Errorf("cannot open snapshot at %q: %w", snapshotPath, err)
|
||||
}
|
||||
fi, err := f.Stat()
|
||||
_ = f.Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot stat %q: %s", snapshotPath, err)
|
||||
return nil, fmt.Errorf("cannot stat %q: %w", snapshotPath, err)
|
||||
}
|
||||
if !fi.IsDir() {
|
||||
return nil, fmt.Errorf("snapshot %q must be a directory", snapshotPath)
|
||||
@@ -123,10 +126,10 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
|
||||
fs := &fslocal.FS{
|
||||
Dir: snapshotPath,
|
||||
MaxBytesPerSecond: *maxBytesPerSecond,
|
||||
MaxBytesPerSecond: maxBytesPerSecond.N,
|
||||
}
|
||||
if err := fs.Init(); err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize fs: %s", err)
|
||||
return nil, fmt.Errorf("cannot initialize fs: %w", err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
@@ -134,18 +137,18 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
func newDstFS() (common.RemoteFS, error) {
|
||||
fs, err := actions.NewRemoteFS(*dst)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-dst`=%q: %s", *dst, err)
|
||||
return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
func newOriginFS() (common.RemoteFS, error) {
|
||||
func newOriginFS() (common.OriginFS, error) {
|
||||
if len(*origin) == 0 {
|
||||
return nil, nil
|
||||
return &fsnil.FS{}, nil
|
||||
}
|
||||
fs, err := actions.NewRemoteFS(*origin)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %s", *origin, err)
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
@@ -20,26 +20,27 @@ type snapshot struct {
|
||||
// Create creates a snapshot and the provided api endpoint and returns
|
||||
// the snapshot name
|
||||
func Create(createSnapshotURL string) (string, error) {
|
||||
logger.Infof("%s", "Creating snapshot")
|
||||
logger.Infof("Creating snapshot")
|
||||
u, err := url.Parse(createSnapshotURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
resp, err := http.Get(u.String())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("unexpected status code returned from %q; expecting %d; got %d; response body: %q", createSnapshotURL, resp.StatusCode, http.StatusOK, body)
|
||||
}
|
||||
|
||||
snap := snapshot{}
|
||||
err = json.Unmarshal(body, &snap)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return "", fmt.Errorf("cannot parse JSON response from %q: %w; response body: %q", createSnapshotURL, err, body)
|
||||
}
|
||||
|
||||
if snap.Status == "ok" {
|
||||
@@ -58,26 +59,26 @@ func Delete(deleteSnapshotURL string, snapshotName string) error {
|
||||
formData := url.Values{
|
||||
"snapshot": {snapshotName},
|
||||
}
|
||||
|
||||
u, err := url.Parse(deleteSnapshotURL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := http.PostForm(u.String(), formData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("unexpected status code returned from %q; expecting %d; got %d; response body: %q", deleteSnapshotURL, resp.StatusCode, http.StatusOK, body)
|
||||
}
|
||||
|
||||
snap := snapshot{}
|
||||
err = json.Unmarshal(body, &snap)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("cannot parse JSON response from %q: %w; response body: %q", deleteSnapshotURL, err, body)
|
||||
}
|
||||
|
||||
if snap.Status == "ok" {
|
||||
|
||||
73
app/vmctl/Makefile
Normal file
73
app/vmctl/Makefile
Normal file
@@ -0,0 +1,73 @@
|
||||
# All these commands must run from repository root.
|
||||
|
||||
vmctl:
|
||||
APP_NAME=vmctl $(MAKE) app-local
|
||||
|
||||
vmctl-race:
|
||||
APP_NAME=vmctl RACE=-race $(MAKE) app-local
|
||||
|
||||
vmctl-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker
|
||||
|
||||
vmctl-pure-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-pure
|
||||
|
||||
vmctl-amd64-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-amd64
|
||||
|
||||
vmctl-arm-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-arm
|
||||
|
||||
vmctl-arm64-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-arm64
|
||||
|
||||
vmctl-ppc64le-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-ppc64le
|
||||
|
||||
vmctl-386-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-386
|
||||
|
||||
package-vmctl:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker
|
||||
|
||||
package-vmctl-pure:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-pure
|
||||
|
||||
package-vmctl-amd64:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-amd64
|
||||
|
||||
package-vmctl-arm:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-arm
|
||||
|
||||
package-vmctl-arm64:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-arm64
|
||||
|
||||
package-vmctl-ppc64le:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-ppc64le
|
||||
|
||||
package-vmctl-386:
|
||||
APP_NAME=vmctl $(MAKE) package-via-docker-386
|
||||
|
||||
publish-vmctl:
|
||||
APP_NAME=vmctl $(MAKE) publish-via-docker
|
||||
|
||||
vmctl-amd64:
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-arm:
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-arm64:
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-ppc64le:
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-386:
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmctl-local-with-goarch
|
||||
|
||||
vmctl-local-with-goarch:
|
||||
APP_NAME=vmctl $(MAKE) app-local-with-goarch
|
||||
|
||||
vmctl-pure:
|
||||
APP_NAME=vmctl $(MAKE) app-local-pure
|
||||
473
app/vmctl/README.md
Normal file
473
app/vmctl/README.md
Normal file
@@ -0,0 +1,473 @@
|
||||
# vmctl - Victoria metrics command-line tool
|
||||
|
||||
Features:
|
||||
- [x] Prometheus: migrate data from Prometheus to VictoriaMetrics using snapshot API
|
||||
- [x] Thanos: migrate data from Thanos to VictoriaMetrics
|
||||
- [ ] ~~Prometheus: migrate data from Prometheus to VictoriaMetrics by query~~(discarded)
|
||||
- [x] InfluxDB: migrate data from InfluxDB to VictoriaMetrics
|
||||
- [ ] Storage Management: data re-balancing between nodes
|
||||
|
||||
# Table of contents
|
||||
|
||||
* [Articles](#articles)
|
||||
* [How to build](#how-to-build)
|
||||
* [Migrating data from InfluxDB 1.x](#migrating-data-from-influxdb-1x)
|
||||
* [Data mapping](#data-mapping)
|
||||
* [Configuration](#configuration)
|
||||
* [Filtering](#filtering)
|
||||
* [Migrating data from InfluxDB 2.x](#migrating-data-from-influxdb-2x)
|
||||
* [Migrating data from Prometheus](#migrating-data-from-prometheus)
|
||||
* [Data mapping](#data-mapping-1)
|
||||
* [Configuration](#configuration-1)
|
||||
* [Filtering](#filtering-1)
|
||||
* [Migrating data from Thanos](#migrating-data-from-thanos)
|
||||
* [Current data](#current-data)
|
||||
* [Historical data](#historical-data)
|
||||
* [Migrating data from VictoriaMetrics](#migrating-data-from-victoriametrics)
|
||||
* [Native protocol](#native-protocol)
|
||||
* [Tuning](#tuning)
|
||||
* [Influx mode](#influx-mode)
|
||||
* [Prometheus mode](#prometheus-mode)
|
||||
* [VictoriaMetrics importer](#victoriametrics-importer)
|
||||
* [Importer stats](#importer-stats)
|
||||
* [Significant figures](#significant-figures)
|
||||
* [Adding extra labels](#adding-extra-labels)
|
||||
|
||||
|
||||
## Articles
|
||||
|
||||
* [How to migrate data from Prometheus](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-d44a6728f043)
|
||||
* [How to migrate data from Prometheus. Filtering and modifying time series](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-filtering-and-modifying-time-series-6d40cea4bf21)
|
||||
|
||||
## How to build
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmctl` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmctl` from the root folder of the repository.
|
||||
It builds `vmctl` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmctl-prod` from the root folder of the repository.
|
||||
It builds `vmctl-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmctl`. It builds `victoriametrics/vmctl:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmctl`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=scratch make package-vmctl
|
||||
```
|
||||
|
||||
### ARM build
|
||||
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
#### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmctl-arm` or `make vmctl-arm64` from the root folder of the repository.
|
||||
It builds `vmctl-arm` or `vmctl-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
#### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmctl-arm-prod` or `make vmctl-arm64-prod` from the root folder of the repository.
|
||||
It builds `vmctl-arm-prod` or `vmctl-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
|
||||
## Migrating data from InfluxDB (1.x)
|
||||
|
||||
`vmctl` supports the `influx` mode to migrate data from InfluxDB to VictoriaMetrics time-series database.
|
||||
|
||||
See `./vmctl influx --help` for details and full list of flags.
|
||||
|
||||
To use migration tool please specify the InfluxDB address `--influx-addr`, the database `--influx-database` and VictoriaMetrics address `--vm-addr`.
|
||||
Flag `--vm-addr` for single-node VM is usually equal to `--httpListenAddr`, and for cluster version
|
||||
is equal to `--httpListenAddr` flag of VMInsert component. Please note, that vmctl performs initial readiness check for the given address
|
||||
by checking `/health` endpoint. For cluster version it is additionally required to specify the `--vm-account-id` flag.
|
||||
See more details for cluster version [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
|
||||
As soon as required flags are provided and all endpoints are accessible, `vmctl` will start the InfluxDB scheme exploration.
|
||||
Basically, it just fetches all fields and timeseries from the provided database and builds up registry of all available timeseries.
|
||||
Then `vmctl` sends fetch requests for each timeseries to InfluxDB one by one and pass results to VM importer.
|
||||
VM importer then accumulates received samples in batches and sends import requests to VM.
|
||||
|
||||
The importing process example for local installation of InfluxDB(`http://localhost:8086`)
|
||||
and single-node VictoriaMetrics(`http://localhost:8428`):
|
||||
```
|
||||
./vmctl influx --influx-database benchmark
|
||||
InfluxDB import mode
|
||||
2020/01/18 20:47:11 Exploring scheme for database "benchmark"
|
||||
2020/01/18 20:47:11 fetching fields: command: "show field keys"; database: "benchmark"; retention: "autogen"
|
||||
2020/01/18 20:47:11 found 10 fields
|
||||
2020/01/18 20:47:11 fetching series: command: "show series "; database: "benchmark"; retention: "autogen"
|
||||
Found 40000 timeseries to import. Continue? [Y/n] y
|
||||
40000 / 40000 [-----------------------------------------------------------------------------------------------------------------------------------------------] 100.00% 21 p/s
|
||||
2020/01/18 21:19:00 Import finished!
|
||||
2020/01/18 21:19:00 VictoriaMetrics importer stats:
|
||||
idle duration: 13m51.461434876s;
|
||||
time spent while importing: 17m56.923899847s;
|
||||
total samples: 345600000;
|
||||
samples/s: 320914.04;
|
||||
total bytes: 5.9 GB;
|
||||
bytes/s: 5.4 MB;
|
||||
import requests: 40001;
|
||||
2020/01/18 21:19:00 Total time: 31m48.467044016s
|
||||
```
|
||||
|
||||
### Data mapping
|
||||
|
||||
Vmctl maps Influx data the same way as VictoriaMetrics does by using the following rules:
|
||||
|
||||
* `influx-database` arg is mapped into `db` label value unless `db` tag exists in the Influx line.
|
||||
* Field names are mapped to time series names prefixed with {measurement}{separator} value,
|
||||
where {separator} equals to _ by default.
|
||||
It can be changed with `--influx-measurement-field-separator` command-line flag.
|
||||
* Field values are mapped to time series values.
|
||||
* Tags are mapped to Prometheus labels format as-is.
|
||||
|
||||
For example, the following Influx line:
|
||||
```
|
||||
foo,tag1=value1,tag2=value2 field1=12,field2=40
|
||||
```
|
||||
|
||||
is converted into the following Prometheus format data points:
|
||||
```
|
||||
foo_field1{tag1="value1", tag2="value2"} 12
|
||||
foo_field2{tag1="value1", tag2="value2"} 40
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
The configuration flags should contain self-explanatory descriptions.
|
||||
|
||||
### Filtering
|
||||
|
||||
The filtering consists of two parts: timeseries and time.
|
||||
The first step of application is to select all available timeseries
|
||||
for given database and retention. User may specify additional filtering
|
||||
condition via `--influx-filter-series` flag. For example:
|
||||
```
|
||||
./vmctl influx --influx-database benchmark \
|
||||
--influx-filter-series "on benchmark from cpu where hostname='host_1703'"
|
||||
InfluxDB import mode
|
||||
2020/01/26 14:23:29 Exploring scheme for database "benchmark"
|
||||
2020/01/26 14:23:29 fetching fields: command: "show field keys"; database: "benchmark"; retention: "autogen"
|
||||
2020/01/26 14:23:29 found 12 fields
|
||||
2020/01/26 14:23:29 fetching series: command: "show series on benchmark from cpu where hostname='host_1703'"; database: "benchmark"; retention: "autogen"
|
||||
Found 10 timeseries to import. Continue? [Y/n]
|
||||
```
|
||||
The timeseries select query would be following:
|
||||
`fetching series: command: "show series on benchmark from cpu where hostname='host_1703'"; database: "benchmark"; retention: "autogen"`
|
||||
|
||||
The second step of filtering is a time filter and it applies when fetching the datapoints from Influx.
|
||||
Time filtering may be configured with two flags:
|
||||
* --influx-filter-time-start
|
||||
* --influx-filter-time-end
|
||||
Here's an example of importing timeseries for one day only:
|
||||
`./vmctl influx --influx-database benchmark --influx-filter-series "where hostname='host_1703'" --influx-filter-time-start "2020-01-01T10:07:00Z" --influx-filter-time-end "2020-01-01T15:07:00Z"`
|
||||
|
||||
Please see more about time filtering [here](https://docs.influxdata.com/influxdb/v1.7/query_language/schema_exploration#filter-meta-queries-by-time).
|
||||
|
||||
## Migrating data from InfluxDB (2.x)
|
||||
|
||||
Migrating data from InfluxDB v2.x is not supported yet ([#32](https://github.com/VictoriaMetrics/vmctl/issues/32)).
|
||||
You may find useful a 3rd party solution for this - https://github.com/jonppe/influx_to_victoriametrics.
|
||||
|
||||
|
||||
## Migrating data from Prometheus
|
||||
|
||||
`vmctl` supports the `prometheus` mode for migrating data from Prometheus to VictoriaMetrics time-series database.
|
||||
Migration is based on reading Prometheus snapshot, which is basically a hard-link to Prometheus data files.
|
||||
|
||||
See `./vmctl prometheus --help` for details and full list of flags.
|
||||
|
||||
To use migration tool please specify the path to Prometheus snapshot `--prom-snapshot` and VictoriaMetrics address `--vm-addr`.
|
||||
More about Prometheus snapshots may be found [here](https://www.robustperception.io/taking-snapshots-of-prometheus-data).
|
||||
Flag `--vm-addr` for single-node VM is usually equal to `--httpListenAddr`, and for cluster version
|
||||
is equal to `--httpListenAddr` flag of VMInsert component. Please note, that vmctl performs initial readiness check for the given address
|
||||
by checking `/health` endpoint. For cluster version it is additionally required to specify the `--vm-account-id` flag.
|
||||
See more details for cluster version [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
|
||||
As soon as required flags are provided and all endpoints are accessible, `vmctl` will start the Prometheus snapshot exploration.
|
||||
Basically, it just fetches all available blocks in provided snapshot and read the metadata. It also does initial filtering by time
|
||||
if flags `--prom-filter-time-start` or `--prom-filter-time-end` were set. The exploration procedure prints some stats from read blocks.
|
||||
Please note that stats are not taking into account timeseries or samples filtering. This will be done during importing process.
|
||||
|
||||
The importing process takes the snapshot blocks revealed from Explore procedure and processes them one by one
|
||||
accumulating timeseries and samples. Please note, that `vmctl` relies on responses from Influx on this stage,
|
||||
so ensure that Explore queries are executed without errors or limits. Please see this
|
||||
[issue](https://github.com/VictoriaMetrics/vmctl/issues/30) for details.
|
||||
The data processed in chunks and then sent to VM.
|
||||
|
||||
The importing process example for local installation of Prometheus
|
||||
and single-node VictoriaMetrics(`http://localhost:8428`):
|
||||
```
|
||||
./vmctl prometheus --prom-snapshot=/path/to/snapshot \
|
||||
--vm-concurrency=1 \
|
||||
--vm-batch-size=200000 \
|
||||
--prom-concurrency=3
|
||||
Prometheus import mode
|
||||
Prometheus snapshot stats:
|
||||
blocks found: 14;
|
||||
blocks skipped: 0;
|
||||
min time: 1581288163058 (2020-02-09T22:42:43Z);
|
||||
max time: 1582409128139 (2020-02-22T22:05:28Z);
|
||||
samples: 32549106;
|
||||
series: 27289.
|
||||
Found 14 blocks to import. Continue? [Y/n] y
|
||||
14 / 14 [-------------------------------------------------------------------------------------------] 100.00% 0 p/s
|
||||
2020/02/23 15:50:03 Import finished!
|
||||
2020/02/23 15:50:03 VictoriaMetrics importer stats:
|
||||
idle duration: 6.152953029s;
|
||||
time spent while importing: 44.908522491s;
|
||||
total samples: 32549106;
|
||||
samples/s: 724786.84;
|
||||
total bytes: 669.1 MB;
|
||||
bytes/s: 14.9 MB;
|
||||
import requests: 323;
|
||||
import requests retries: 0;
|
||||
2020/02/23 15:50:03 Total time: 51.077451066s
|
||||
```
|
||||
|
||||
### Data mapping
|
||||
|
||||
VictoriaMetrics has very similar data model to Prometheus and supports [RemoteWrite integration](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage).
|
||||
So no data changes will be applied.
|
||||
|
||||
### Configuration
|
||||
|
||||
The configuration flags should contain self-explanatory descriptions.
|
||||
|
||||
### Filtering
|
||||
|
||||
The filtering consists of three parts: by timeseries and time.
|
||||
|
||||
Filtering by time may be configured via flags `--prom-filter-time-start` and `--prom-filter-time-end`
|
||||
in in RFC3339 format. This filter applied twice: to drop blocks out of range and to filter timeseries in blocks with
|
||||
overlapping time range.
|
||||
|
||||
Example of applying time filter:
|
||||
```
|
||||
./vmctl prometheus --prom-snapshot=/path/to/snapshot \
|
||||
--prom-filter-time-start=2020-02-07T00:07:01Z \
|
||||
--prom-filter-time-end=2020-02-11T00:07:01Z
|
||||
Prometheus import mode
|
||||
Prometheus snapshot stats:
|
||||
blocks found: 2;
|
||||
blocks skipped: 12;
|
||||
min time: 1581288163058 (2020-02-09T22:42:43Z);
|
||||
max time: 1581328800000 (2020-02-10T10:00:00Z);
|
||||
samples: 1657698;
|
||||
series: 3930.
|
||||
Found 2 blocks to import. Continue? [Y/n] y
|
||||
```
|
||||
|
||||
Please notice, that total amount of blocks in provided snapshot is 14, but only 2 of them were in provided
|
||||
time range. So other 12 blocks were marked as `skipped`. The amount of samples and series is not taken into account,
|
||||
since this is heavy operation and will be done during import process.
|
||||
|
||||
|
||||
Filtering by timeseries is configured with following flags:
|
||||
* `--prom-filter-label` - the label name, e.g. `__name__` or `instance`;
|
||||
* `--prom-filter-label-value` - the regular expression to filter the label value. By default matches all `.*`
|
||||
|
||||
For example:
|
||||
```
|
||||
./vmctl prometheus --prom-snapshot=/path/to/snapshot \
|
||||
--prom-filter-label="__name__" \
|
||||
--prom-filter-label-value="promhttp.*" \
|
||||
--prom-filter-time-start=2020-02-07T00:07:01Z \
|
||||
--prom-filter-time-end=2020-02-11T00:07:01Z
|
||||
Prometheus import mode
|
||||
Prometheus snapshot stats:
|
||||
blocks found: 2;
|
||||
blocks skipped: 12;
|
||||
min time: 1581288163058 (2020-02-09T22:42:43Z);
|
||||
max time: 1581328800000 (2020-02-10T10:00:00Z);
|
||||
samples: 1657698;
|
||||
series: 3930.
|
||||
Found 2 blocks to import. Continue? [Y/n] y
|
||||
14 / 14 [------------------------------------------------------------------------------------------------------------------------------------------------------] 100.00% ? p/s
|
||||
2020/02/23 15:51:07 Import finished!
|
||||
2020/02/23 15:51:07 VictoriaMetrics importer stats:
|
||||
idle duration: 0s;
|
||||
time spent while importing: 37.415461ms;
|
||||
total samples: 10128;
|
||||
samples/s: 270690.24;
|
||||
total bytes: 195.2 kB;
|
||||
bytes/s: 5.2 MB;
|
||||
import requests: 2;
|
||||
import requests retries: 0;
|
||||
2020/02/23 15:51:07 Total time: 7.153158218s
|
||||
```
|
||||
|
||||
## Migrating data from Thanos
|
||||
|
||||
Thanos uses the same storage engine as Prometheus and the data layout on-disk should be the same. That means
|
||||
`vmctl` in mode `prometheus` may be used for Thanos historical data migration as well.
|
||||
These instructions may vary based on the details of your Thanos configuration.
|
||||
Please read carefully and verify as you go. We assume you're using Thanos Sidecar on your Prometheus pods,
|
||||
and that you have a separate Thanos Store installation.
|
||||
|
||||
### Current data
|
||||
|
||||
1. For now, keep your Thanos Sidecar and Thanos-related Prometheus configuration, but add this to also stream
|
||||
metrics to VictoriaMetrics:
|
||||
```
|
||||
remote_write:
|
||||
- url: http://victoria-metrics:8428/api/v1/write
|
||||
```
|
||||
2. Make sure VM is running, of course. Now check the logs to make sure that Prometheus is sending and VM is receiving.
|
||||
In Prometheus, make sure there are no errors. On the VM side, you should see messages like this:
|
||||
```
|
||||
2020-04-27T18:38:46.474Z info VictoriaMetrics/lib/storage/partition.go:207 creating a partition "2020_04" with smallPartsPath="/victoria-metrics-data/data/small/2020_04", bigPartsPath="/victoria-metrics-data/data/big/2020_04"
|
||||
2020-04-27T18:38:46.506Z info VictoriaMetrics/lib/storage/partition.go:222 partition "2020_04" has been created
|
||||
```
|
||||
3. Now just wait. Within two hours, Prometheus should finish its current data file and hand it off to Thanos Store for long term
|
||||
storage.
|
||||
|
||||
### Historical data
|
||||
|
||||
Let's assume your data is stored on S3 served by minio. You first need to copy that out to a local filesystem,
|
||||
then import it into VM using `vmctl` in `prometheus` mode.
|
||||
1. Copy data from minio.
|
||||
1. Run the `minio/mc` Docker container.
|
||||
1. `mc config host add minio http://minio:9000 accessKey secretKey`, substituting appropriate values for the last 3 items.
|
||||
1. `mc cp -r minio/prometheus thanos-data`
|
||||
1. Import using `vmctl`.
|
||||
1. Follow the [instructions](#how-to-build) to compile `vmctl` on your machine.
|
||||
1. Use [prometheus](#migrating-data-from-prometheus) mode to import data:
|
||||
```
|
||||
vmctl prometheus --prom-snapshot thanos-data --vm-addr http://victoria-metrics:8428
|
||||
```
|
||||
|
||||
## Migrating data from VictoriaMetrics
|
||||
|
||||
### Native protocol
|
||||
|
||||
The [native binary protocol](https://victoriametrics.github.io/#how-to-export-data-in-native-format)
|
||||
was introduced in [1.42.0 release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.42.0)
|
||||
and provides the most efficient way to migrate data between VM instances: single to single, cluster to cluster,
|
||||
single to cluster and vice versa. Please note that both instances (source and destination) should be of v1.42.0
|
||||
or higher.
|
||||
|
||||
See `./vmctl vm-native --help` for details and full list of flags.
|
||||
|
||||
In this mode `vmctl` acts as a proxy between two VM instances, where time series filtering is done by "source" (`src`)
|
||||
and processing is done by "destination" (`dst`). Because of that, `vmctl` doesn't actually know how much data will be
|
||||
processed and can't show the progress bar. It will show the current processing speed and total number of processed bytes:
|
||||
|
||||
```
|
||||
./vmctl vm-native --vm-native-src-addr=http://localhost:8528 \
|
||||
--vm-native-dst-addr=http://localhost:8428 \
|
||||
--vm-native-filter-match='{job="vmagent"}' \
|
||||
--vm-native-filter-time-start='2020-01-01T20:07:00Z'
|
||||
VictoriaMetrics Native import mode
|
||||
Initing export pipe from "http://localhost:8528" with filters:
|
||||
filter: match[]={job="vmagent"}
|
||||
Initing import process to "http://localhost:8428":
|
||||
Total: 336.75 KiB ↖ Speed: 454.46 KiB p/s
|
||||
2020/10/13 17:04:59 Total time: 952.143376ms
|
||||
```
|
||||
|
||||
Importing tips:
|
||||
1. Migrating all the metrics from one VM to another may collide with existing application metrics
|
||||
(prefixed with `vm_`) at destination and lead to confusion when using
|
||||
[official Grafana dashboards](https://grafana.com/orgs/victoriametrics/dashboards).
|
||||
To avoid such situation try to filter out VM process metrics via `--vm-native-filter-match` flag.
|
||||
2. Migration is a backfilling process, so it is recommended to read
|
||||
[Backfilling tips](https://github.com/VictoriaMetrics/VictoriaMetrics#backfilling) section.
|
||||
3. `vmctl` doesn't provide relabeling or other types of labels management in this mode.
|
||||
Instead, use [relabeling in VictoriaMetrics](https://github.com/VictoriaMetrics/vmctl/issues/4#issuecomment-683424375).
|
||||
|
||||
|
||||
## Tuning
|
||||
|
||||
### Influx mode
|
||||
|
||||
The flag `--influx-concurrency` controls how many concurrent requests may be sent to InfluxDB while fetching
|
||||
timeseries. Please set it wisely to avoid InfluxDB overwhelming.
|
||||
|
||||
The flag `--influx-chunk-size` controls the max amount of datapoints to return in single chunk from fetch requests.
|
||||
Please see more details [here](https://docs.influxdata.com/influxdb/v1.7/guides/querying_data/#chunking).
|
||||
The chunk size is used to control InfluxDB memory usage, so it won't OOM on processing large timeseries with
|
||||
billions of datapoints.
|
||||
|
||||
### Prometheus mode
|
||||
|
||||
The flag `--prom-concurrency` controls how many concurrent readers will be reading the blocks in snapshot.
|
||||
Since snapshots are just files on disk it would be hard to overwhelm the system. Please go with value equal
|
||||
to number of free CPU cores.
|
||||
|
||||
### VictoriaMetrics importer
|
||||
|
||||
The flag `--vm-concurrency` controls the number of concurrent workers that process the input from InfluxDB query results.
|
||||
Please note that each import request can load up to a single vCPU core on VictoriaMetrics. So try to set it according
|
||||
to allocated CPU resources of your VictoriMetrics installation.
|
||||
|
||||
The flag `--vm-batch-size` controls max amount of samples collected before sending the import request.
|
||||
For example, if `--influx-chunk-size=500` and `--vm-batch-size=2000` then importer will process not more
|
||||
than 4 chunks before sending the request.
|
||||
|
||||
### Importer stats
|
||||
|
||||
After successful import `vmctl` prints some statistics for details.
|
||||
The important numbers to watch are following:
|
||||
- `idle duration` - shows time that importer spent while waiting for data from InfluxDB/Prometheus
|
||||
to fill up `--vm-batch-size` batch size. Value shows total duration across all workers configured
|
||||
via `--vm-concurrency`. High value may be a sign of too slow InfluxDB/Prometheus fetches or too
|
||||
high `--vm-concurrency` value. Try to improve it by increasing `--<mode>-concurrency` value or
|
||||
decreasing `--vm-concurrency` value.
|
||||
- `import requests` - shows how many import requests were issued to VM server.
|
||||
The import request is issued once the batch size(`--vm-batch-size`) is full and ready to be sent.
|
||||
Please prefer big batch sizes (50k-500k) to improve performance.
|
||||
- `import requests retries` - shows number of unsuccessful import requests. Non-zero value may be
|
||||
a sign of network issues or VM being overloaded. See the logs during import for error messages.
|
||||
|
||||
### Silent mode
|
||||
|
||||
By default `vmctl` waits confirmation from user before starting the import. If this is unwanted
|
||||
behavior and no user interaction required - pass `-s` flag to enable "silence" mode:
|
||||
```
|
||||
-s Whether to run in silent mode. If set to true no confirmation prompts will appear. (default: false)
|
||||
```
|
||||
|
||||
### Significant figures
|
||||
|
||||
`vmctl` allows to limit the number of [significant figures](https://en.wikipedia.org/wiki/Significant_figures)
|
||||
before importing. For example, the average value for response size is `102.342305` bytes and it has 9 significant figures.
|
||||
If you ask a human to pronounce this value then with high probability value will be rounded to first 4 or 5 figures
|
||||
because the rest aren't really that important to mention. In most cases, such a high precision is too much.
|
||||
Moreover, such values may be just a result of [floating point arithmetic](https://en.wikipedia.org/wiki/Floating-point_arithmetic),
|
||||
create a [false precision](https://en.wikipedia.org/wiki/False_precision) and result into bad compression ratio
|
||||
according to [information theory](https://en.wikipedia.org/wiki/Information_theory).
|
||||
|
||||
`vmctl` provides the following flags for improving data compression:
|
||||
|
||||
* `--vm-round-digits` flag for rounding processed values to the given number of decimal digits after the point.
|
||||
For example, `--vm-round-digits=2` would round `1.2345` to `1.23`. By default the rounding is disabled.
|
||||
|
||||
* `--vm-significant-figures` flag for limiting the number of significant figures in processed values. It takes no effect if set
|
||||
to 0 (by default), but set `--vm-significant-figures=5` and `102.342305` will be rounded to `102.34`.
|
||||
|
||||
The most common case for using these flags is to improve data compression for time series storing aggregation
|
||||
results such as `average`, `rate`, etc.
|
||||
|
||||
### Adding extra labels
|
||||
|
||||
`vmctl` allows to add extra labels to all imported series. It can be achived with flag `--vm-extra-label label=value`.
|
||||
If multiple labels needs to be added, set flag for each label, for example, `--vm-extra-label label1=value1 --vm-extra-label label2=value2`.
|
||||
If timeseries already have label, that must be added with `--vm-extra-label` flag, flag has priority and will override label value from timeseries.
|
||||
|
||||
6
app/vmctl/deployment/Dockerfile
Normal file
6
app/vmctl/deployment/Dockerfile
Normal file
@@ -0,0 +1,6 @@
|
||||
ARG base_image
|
||||
FROM $base_image
|
||||
|
||||
ENTRYPOINT ["/vmctl-prod"]
|
||||
ARG src_binary
|
||||
COPY $src_binary ./vmctl-prod
|
||||
292
app/vmctl/flags.go
Normal file
292
app/vmctl/flags.go
Normal file
@@ -0,0 +1,292 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
globalSilent = "s"
|
||||
)
|
||||
|
||||
var (
|
||||
globalFlags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: globalSilent,
|
||||
Value: false,
|
||||
Usage: "Whether to run in silent mode. If set to true no confirmation prompts will appear.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
vmAddr = "vm-addr"
|
||||
vmUser = "vm-user"
|
||||
vmPassword = "vm-password"
|
||||
vmAccountID = "vm-account-id"
|
||||
vmConcurrency = "vm-concurrency"
|
||||
vmCompress = "vm-compress"
|
||||
vmBatchSize = "vm-batch-size"
|
||||
vmSignificantFigures = "vm-significant-figures"
|
||||
vmRoundDigits = "vm-round-digits"
|
||||
vmExtraLabel = "vm-extra-label"
|
||||
)
|
||||
|
||||
var (
|
||||
vmFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: vmAddr,
|
||||
Value: "http://localhost:8428",
|
||||
Usage: "VictoriaMetrics address to perform import requests. \n" +
|
||||
"Should be the same as --httpListenAddr value for single-node version or VMInsert component. \n" +
|
||||
"Please note, that `vmctl` performs initial readiness check for the given address by checking `/health` endpoint.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmUser,
|
||||
Usage: "VictoriaMetrics username for basic auth",
|
||||
EnvVars: []string{"VM_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmPassword,
|
||||
Usage: "VictoriaMetrics password for basic auth",
|
||||
EnvVars: []string{"VM_PASSWORD"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmAccountID,
|
||||
Usage: "AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant). \n" +
|
||||
"It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer. \n" +
|
||||
"If projectID isn't set, then it equals to 0",
|
||||
},
|
||||
&cli.UintFlag{
|
||||
Name: vmConcurrency,
|
||||
Usage: "Number of workers concurrently performing import requests to VM",
|
||||
Value: 2,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmCompress,
|
||||
Value: true,
|
||||
Usage: "Whether to apply gzip compression to import requests",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: vmBatchSize,
|
||||
Value: 200e3,
|
||||
Usage: "How many samples importer collects before sending the import request to VM",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: vmSignificantFigures,
|
||||
Value: 0,
|
||||
Usage: "The number of significant figures to leave in metric values before importing. " +
|
||||
"See https://en.wikipedia.org/wiki/Significant_figures. Zero value saves all the significant figures. " +
|
||||
"This option may be used for increasing on-disk compression level for the stored metrics. " +
|
||||
"See also --vm-round-digits option",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: vmRoundDigits,
|
||||
Value: 100,
|
||||
Usage: "Round metric values to the given number of decimal digits after the point. " +
|
||||
"This option may be used for increasing on-disk compression level for the stored metrics",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
influxAddr = "influx-addr"
|
||||
influxUser = "influx-user"
|
||||
influxPassword = "influx-password"
|
||||
influxDB = "influx-database"
|
||||
influxRetention = "influx-retention-policy"
|
||||
influxChunkSize = "influx-chunk-size"
|
||||
influxConcurrency = "influx-concurrency"
|
||||
influxFilterSeries = "influx-filter-series"
|
||||
influxFilterTimeStart = "influx-filter-time-start"
|
||||
influxFilterTimeEnd = "influx-filter-time-end"
|
||||
influxMeasurementFieldSeparator = "influx-measurement-field-separator"
|
||||
)
|
||||
|
||||
var (
|
||||
influxFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: influxAddr,
|
||||
Value: "http://localhost:8086",
|
||||
Usage: "Influx server addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxUser,
|
||||
Usage: "Influx user",
|
||||
EnvVars: []string{"INFLUX_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxPassword,
|
||||
Usage: "Influx user password",
|
||||
EnvVars: []string{"INFLUX_PASSWORD"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxDB,
|
||||
Usage: "Influx database",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxRetention,
|
||||
Usage: "Influx retention policy",
|
||||
Value: "autogen",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: influxChunkSize,
|
||||
Usage: "The chunkSize defines max amount of series to be returned in one chunk",
|
||||
Value: 10e3,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: influxConcurrency,
|
||||
Usage: "Number of concurrently running fetch queries to InfluxDB",
|
||||
Value: 1,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxFilterSeries,
|
||||
Usage: "Influx filter expression to select series. E.g. \"from cpu where arch='x86' AND hostname='host_2753'\".\n" +
|
||||
"See for details https://docs.influxdata.com/influxdb/v1.7/query_language/schema_exploration#show-series",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxFilterTimeStart,
|
||||
Usage: "The time filter to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxFilterTimeEnd,
|
||||
Usage: "The time filter to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: influxMeasurementFieldSeparator,
|
||||
Usage: "The {separator} symbol used to concatenate {measurement} and {field} names into series name {measurement}{separator}{field}.",
|
||||
Value: "_",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
promSnapshot = "prom-snapshot"
|
||||
promConcurrency = "prom-concurrency"
|
||||
promFilterTimeStart = "prom-filter-time-start"
|
||||
promFilterTimeEnd = "prom-filter-time-end"
|
||||
promFilterLabel = "prom-filter-label"
|
||||
promFilterLabelValue = "prom-filter-label-value"
|
||||
)
|
||||
|
||||
var (
|
||||
promFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: promSnapshot,
|
||||
Usage: "Path to Prometheus snapshot. Pls see for details https://www.robustperception.io/taking-snapshots-of-prometheus-data",
|
||||
Required: true,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: promConcurrency,
|
||||
Usage: "Number of concurrently running snapshot readers",
|
||||
Value: 1,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterTimeStart,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterTimeEnd,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterLabel,
|
||||
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: promFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", promFilterLabel),
|
||||
Value: ".*",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
vmNativeFilterMatch = "vm-native-filter-match"
|
||||
vmNativeFilterTimeStart = "vm-native-filter-time-start"
|
||||
vmNativeFilterTimeEnd = "vm-native-filter-time-end"
|
||||
|
||||
vmNativeSrcAddr = "vm-native-src-addr"
|
||||
vmNativeSrcUser = "vm-native-src-user"
|
||||
vmNativeSrcPassword = "vm-native-src-password"
|
||||
|
||||
vmNativeDstAddr = "vm-native-dst-addr"
|
||||
vmNativeDstUser = "vm-native-dst-user"
|
||||
vmNativeDstPassword = "vm-native-dst-password"
|
||||
)
|
||||
|
||||
var (
|
||||
vmNativeFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeFilterMatch,
|
||||
Usage: "Time series selector to match series for export. For example, select {instance!=\"localhost\"} will " +
|
||||
"match all series with \"instance\" label different to \"localhost\".\n" +
|
||||
" See more details here https://github.com/VictoriaMetrics/VictoriaMetrics#how-to-export-data-in-native-format",
|
||||
Value: `{__name__!=""}`,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeFilterTimeStart,
|
||||
Usage: "The time filter may contain either unix timestamp in seconds or RFC3339 values. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeFilterTimeEnd,
|
||||
Usage: "The time filter may contain either unix timestamp in seconds or RFC3339 values. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcAddr,
|
||||
Usage: "VictoriaMetrics address to perform export from. \n" +
|
||||
" Should be the same as --httpListenAddr value for single-node version or VMSelect component." +
|
||||
" If exporting from cluster version - include the tenet token in address.",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcUser,
|
||||
Usage: "VictoriaMetrics username for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_SRC_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcPassword,
|
||||
Usage: "VictoriaMetrics password for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_SRC_PASSWORD"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstAddr,
|
||||
Usage: "VictoriaMetrics address to perform import to. \n" +
|
||||
" Should be the same as --httpListenAddr value for single-node version or VMInsert component." +
|
||||
" If importing into cluster version - include the tenet token in address.",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstUser,
|
||||
Usage: "VictoriaMetrics username for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_DST_USERNAME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstPassword,
|
||||
Usage: "VictoriaMetrics password for basic auth",
|
||||
EnvVars: []string{"VM_NATIVE_DST_PASSWORD"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func mergeFlags(flags ...[]cli.Flag) []cli.Flag {
|
||||
var result []cli.Flag
|
||||
for _, f := range flags {
|
||||
result = append(result, f...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
146
app/vmctl/influx.go
Normal file
146
app/vmctl/influx.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
)
|
||||
|
||||
type influxProcessor struct {
|
||||
ic *influx.Client
|
||||
im *vm.Importer
|
||||
cc int
|
||||
separator string
|
||||
}
|
||||
|
||||
func newInfluxProcessor(ic *influx.Client, im *vm.Importer, cc int, separator string) *influxProcessor {
|
||||
if cc < 1 {
|
||||
cc = 1
|
||||
}
|
||||
return &influxProcessor{
|
||||
ic: ic,
|
||||
im: im,
|
||||
cc: cc,
|
||||
separator: separator,
|
||||
}
|
||||
}
|
||||
|
||||
func (ip *influxProcessor) run(silent bool) error {
|
||||
series, err := ip.ic.Explore()
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore query failed: %s", err)
|
||||
}
|
||||
if len(series) < 1 {
|
||||
return fmt.Errorf("found no timeseries to import")
|
||||
}
|
||||
|
||||
question := fmt.Sprintf("Found %d timeseries to import. Continue?", len(series))
|
||||
if !silent && !prompt(question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
bar := pb.StartNew(len(series))
|
||||
seriesCh := make(chan *influx.Series)
|
||||
errCh := make(chan error)
|
||||
ip.im.ResetStats()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(ip.cc)
|
||||
for i := 0; i < ip.cc; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for s := range seriesCh {
|
||||
if err := ip.do(s); err != nil {
|
||||
errCh <- fmt.Errorf("request failed for %q.%q: %s", s.Measurement, s.Field, err)
|
||||
return
|
||||
}
|
||||
bar.Increment()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
for _, s := range series {
|
||||
select {
|
||||
case infErr := <-errCh:
|
||||
return fmt.Errorf("influx error: %s", infErr)
|
||||
case vmErr := <-ip.im.Errors():
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
case seriesCh <- s:
|
||||
}
|
||||
}
|
||||
|
||||
close(seriesCh)
|
||||
wg.Wait()
|
||||
ip.im.Close()
|
||||
// drain import errors channel
|
||||
for vmErr := range ip.im.Errors() {
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
}
|
||||
bar.Finish()
|
||||
log.Println("Import finished!")
|
||||
log.Print(ip.im.Stats())
|
||||
return nil
|
||||
}
|
||||
|
||||
const dbLabel = "db"
|
||||
|
||||
func (ip *influxProcessor) do(s *influx.Series) error {
|
||||
cr, err := ip.ic.FetchDataPoints(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch datapoints: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = cr.Close()
|
||||
}()
|
||||
var name string
|
||||
if s.Measurement != "" {
|
||||
name = fmt.Sprintf("%s%s%s", s.Measurement, ip.separator, s.Field)
|
||||
} else {
|
||||
name = s.Field
|
||||
}
|
||||
|
||||
labels := make([]vm.LabelPair, len(s.LabelPairs))
|
||||
var containsDBLabel bool
|
||||
for i, lp := range s.LabelPairs {
|
||||
if lp.Name == dbLabel {
|
||||
containsDBLabel = true
|
||||
break
|
||||
}
|
||||
labels[i] = vm.LabelPair{
|
||||
Name: lp.Name,
|
||||
Value: lp.Value,
|
||||
}
|
||||
}
|
||||
if !containsDBLabel {
|
||||
labels = append(labels, vm.LabelPair{
|
||||
Name: dbLabel,
|
||||
Value: ip.ic.Database(),
|
||||
})
|
||||
}
|
||||
|
||||
for {
|
||||
time, values, err := cr.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
// skip empty results
|
||||
if len(time) < 1 {
|
||||
continue
|
||||
}
|
||||
ip.im.Input() <- &vm.TimeSeries{
|
||||
Name: name,
|
||||
LabelPairs: labels,
|
||||
Timestamps: time,
|
||||
Values: values,
|
||||
}
|
||||
}
|
||||
}
|
||||
362
app/vmctl/influx/influx.go
Normal file
362
app/vmctl/influx/influx.go
Normal file
@@ -0,0 +1,362 @@
|
||||
package influx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
influx "github.com/influxdata/influxdb/client/v2"
|
||||
)
|
||||
|
||||
// Client represents a wrapper over
|
||||
// influx HTTP client
|
||||
type Client struct {
|
||||
influx.Client
|
||||
|
||||
database string
|
||||
retention string
|
||||
chunkSize int
|
||||
|
||||
filterSeries string
|
||||
filterTime string
|
||||
}
|
||||
|
||||
// Config contains fields required
|
||||
// for Client configuration
|
||||
type Config struct {
|
||||
Addr string
|
||||
Username string
|
||||
Password string
|
||||
Database string
|
||||
Retention string
|
||||
ChunkSize int
|
||||
|
||||
Filter Filter
|
||||
}
|
||||
|
||||
// Filter contains configuration for filtering
|
||||
// the timeseries
|
||||
type Filter struct {
|
||||
Series string
|
||||
TimeStart string
|
||||
TimeEnd string
|
||||
}
|
||||
|
||||
// Series holds the time series
|
||||
type Series struct {
|
||||
Measurement string
|
||||
Field string
|
||||
LabelPairs []LabelPair
|
||||
}
|
||||
|
||||
var valueEscaper = strings.NewReplacer(`\`, `\\`, `'`, `\'`)
|
||||
|
||||
func (s Series) fetchQuery(timeFilter string) string {
|
||||
f := &strings.Builder{}
|
||||
fmt.Fprintf(f, "select %q from %q", s.Field, s.Measurement)
|
||||
if len(s.LabelPairs) > 0 || len(timeFilter) > 0 {
|
||||
f.WriteString(" where")
|
||||
}
|
||||
for i, pair := range s.LabelPairs {
|
||||
pairV := valueEscaper.Replace(pair.Value)
|
||||
fmt.Fprintf(f, " %q='%s'", pair.Name, pairV)
|
||||
if i != len(s.LabelPairs)-1 {
|
||||
f.WriteString(" and")
|
||||
}
|
||||
}
|
||||
if len(timeFilter) > 0 {
|
||||
if len(s.LabelPairs) > 0 {
|
||||
f.WriteString(" and")
|
||||
}
|
||||
fmt.Fprintf(f, " %s", timeFilter)
|
||||
}
|
||||
return f.String()
|
||||
}
|
||||
|
||||
// LabelPair is the key-value record
|
||||
// of time series label
|
||||
type LabelPair struct {
|
||||
Name string
|
||||
Value string
|
||||
}
|
||||
|
||||
// NewClient creates and returns influx client
|
||||
// configured with passed Config
|
||||
func NewClient(cfg Config) (*Client, error) {
|
||||
c := influx.HTTPConfig{
|
||||
Addr: cfg.Addr,
|
||||
Username: cfg.Username,
|
||||
Password: cfg.Password,
|
||||
InsecureSkipVerify: true,
|
||||
}
|
||||
hc, err := influx.NewHTTPClient(c)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to establish conn: %s", err)
|
||||
}
|
||||
if _, _, err := hc.Ping(time.Second); err != nil {
|
||||
return nil, fmt.Errorf("ping failed: %s", err)
|
||||
}
|
||||
|
||||
chunkSize := cfg.ChunkSize
|
||||
if chunkSize < 1 {
|
||||
chunkSize = 10e3
|
||||
}
|
||||
|
||||
client := &Client{
|
||||
Client: hc,
|
||||
database: cfg.Database,
|
||||
retention: cfg.Retention,
|
||||
chunkSize: chunkSize,
|
||||
filterTime: timeFilter(cfg.Filter.TimeStart, cfg.Filter.TimeEnd),
|
||||
filterSeries: cfg.Filter.Series,
|
||||
}
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// Database returns database name
|
||||
func (c Client) Database() string {
|
||||
return c.database
|
||||
}
|
||||
|
||||
func timeFilter(start, end string) string {
|
||||
if start == "" && end == "" {
|
||||
return ""
|
||||
}
|
||||
var tf string
|
||||
if start != "" {
|
||||
tf = fmt.Sprintf("time >= '%s'", start)
|
||||
}
|
||||
if end != "" {
|
||||
if tf != "" {
|
||||
tf += " and "
|
||||
}
|
||||
tf += fmt.Sprintf("time <= '%s'", end)
|
||||
}
|
||||
return tf
|
||||
}
|
||||
|
||||
// Explore checks the existing data schema in influx
|
||||
// by checking available fields and series,
|
||||
// which unique combination represents all possible
|
||||
// time series existing in database.
|
||||
// The explore required to reduce the load on influx
|
||||
// by querying field of the exact time series at once,
|
||||
// instead of fetching all of the values over and over.
|
||||
//
|
||||
// May contain non-existing time series.
|
||||
func (c *Client) Explore() ([]*Series, error) {
|
||||
log.Printf("Exploring scheme for database %q", c.database)
|
||||
mFields, err := c.fieldsByMeasurement()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get field keys: %s", err)
|
||||
}
|
||||
|
||||
series, err := c.getSeries()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get series: %s", err)
|
||||
}
|
||||
|
||||
var iSeries []*Series
|
||||
for _, s := range series {
|
||||
fields, ok := mFields[s.Measurement]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("can't find field keys for measurement %q", s.Measurement)
|
||||
}
|
||||
for _, field := range fields {
|
||||
is := &Series{
|
||||
Measurement: s.Measurement,
|
||||
Field: field,
|
||||
LabelPairs: s.LabelPairs,
|
||||
}
|
||||
iSeries = append(iSeries, is)
|
||||
}
|
||||
}
|
||||
return iSeries, nil
|
||||
}
|
||||
|
||||
// ChunkedResponse is a wrapper over influx.ChunkedResponse.
|
||||
// Used for better memory usage control while iterating
|
||||
// over huge time series.
|
||||
type ChunkedResponse struct {
|
||||
cr *influx.ChunkedResponse
|
||||
iq influx.Query
|
||||
field string
|
||||
}
|
||||
|
||||
// Close closes cr.
|
||||
func (cr *ChunkedResponse) Close() error {
|
||||
return cr.cr.Close()
|
||||
}
|
||||
|
||||
// Next reads the next part/chunk of time series.
|
||||
// Returns io.EOF when time series was read entirely.
|
||||
func (cr *ChunkedResponse) Next() ([]int64, []float64, error) {
|
||||
resp, err := cr.cr.NextResponse()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if resp.Error() != nil {
|
||||
return nil, nil, fmt.Errorf("response error for %s: %s", cr.iq.Command, resp.Error())
|
||||
}
|
||||
if len(resp.Results) != 1 {
|
||||
return nil, nil, fmt.Errorf("unexpected number of results in response: %d", len(resp.Results))
|
||||
}
|
||||
results, err := parseResult(resp.Results[0])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if len(results) < 1 {
|
||||
return nil, nil, nil
|
||||
}
|
||||
r := results[0]
|
||||
|
||||
const key = "time"
|
||||
timestamps, ok := r.values[key]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("response doesn't contain field %q", key)
|
||||
}
|
||||
|
||||
fieldValues, ok := r.values[cr.field]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("response doesn't contain filed %q", cr.field)
|
||||
}
|
||||
values := make([]float64, len(fieldValues))
|
||||
for i, fv := range fieldValues {
|
||||
v, err := toFloat64(fv)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to convert value %q.%v to float64: %s",
|
||||
cr.field, v, err)
|
||||
}
|
||||
values[i] = v
|
||||
}
|
||||
|
||||
ts := make([]int64, len(results[0].values[key]))
|
||||
for i, v := range timestamps {
|
||||
t, err := parseDate(v.(string))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
ts[i] = t
|
||||
}
|
||||
return ts, values, nil
|
||||
}
|
||||
|
||||
// FetchDataPoints performs SELECT request to fetch
|
||||
// datapoints for particular field.
|
||||
func (c *Client) FetchDataPoints(s *Series) (*ChunkedResponse, error) {
|
||||
iq := influx.Query{
|
||||
Command: s.fetchQuery(c.filterTime),
|
||||
Database: c.database,
|
||||
RetentionPolicy: c.retention,
|
||||
Chunked: true,
|
||||
ChunkSize: 1e4,
|
||||
}
|
||||
cr, err := c.QueryAsChunk(iq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query %q err: %s", iq.Command, err)
|
||||
}
|
||||
return &ChunkedResponse{cr, iq, s.Field}, nil
|
||||
}
|
||||
|
||||
func (c *Client) fieldsByMeasurement() (map[string][]string, error) {
|
||||
q := influx.Query{
|
||||
Command: "show field keys",
|
||||
Database: c.database,
|
||||
RetentionPolicy: c.retention,
|
||||
}
|
||||
log.Printf("fetching fields: %s", stringify(q))
|
||||
qValues, err := c.do(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while executing query %q: %s", q.Command, err)
|
||||
}
|
||||
|
||||
var total int
|
||||
var skipped int
|
||||
const fKey = "fieldKey"
|
||||
const fType = "fieldType"
|
||||
result := make(map[string][]string, len(qValues))
|
||||
for _, qv := range qValues {
|
||||
types := qv.values[fType]
|
||||
fields := qv.values[fKey]
|
||||
values := make([]string, 0)
|
||||
for key, field := range fields {
|
||||
if types[key].(string) == "string" {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
values = append(values, field.(string))
|
||||
total++
|
||||
}
|
||||
result[qv.name] = values
|
||||
}
|
||||
|
||||
if skipped > 0 {
|
||||
log.Printf("found %d fields; skipped %d non-numeric fields", total, skipped)
|
||||
} else {
|
||||
log.Printf("found %d fields", total)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *Client) getSeries() ([]*Series, error) {
|
||||
com := "show series"
|
||||
if c.filterSeries != "" {
|
||||
com = fmt.Sprintf("%s %s", com, c.filterSeries)
|
||||
}
|
||||
q := influx.Query{
|
||||
Command: com,
|
||||
Database: c.database,
|
||||
RetentionPolicy: c.retention,
|
||||
Chunked: true,
|
||||
ChunkSize: c.chunkSize,
|
||||
}
|
||||
|
||||
log.Printf("fetching series: %s", stringify(q))
|
||||
cr, err := c.QueryAsChunk(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while executing query %q: %s", q.Command, err)
|
||||
}
|
||||
|
||||
const key = "key"
|
||||
var result []*Series
|
||||
for {
|
||||
resp, err := cr.NextResponse()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if resp.Error() != nil {
|
||||
return nil, fmt.Errorf("response error for query %q: %s", q.Command, resp.Error())
|
||||
}
|
||||
qValues, err := parseResult(resp.Results[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, qv := range qValues {
|
||||
for _, v := range qv.values[key] {
|
||||
s := &Series{}
|
||||
if err := s.unmarshal(v.(string)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result = append(result, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Printf("found %d series", len(result))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *Client) do(q influx.Query) ([]queryValues, error) {
|
||||
res, err := c.Query(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query %q err: %s", q.Command, err)
|
||||
}
|
||||
if len(res.Results) < 1 {
|
||||
return nil, fmt.Errorf("exploration query %q returned 0 results", q.Command)
|
||||
}
|
||||
return parseResult(res.Results[0])
|
||||
}
|
||||
127
app/vmctl/influx/influx_test.go
Normal file
127
app/vmctl/influx/influx_test.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package influx
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestFetchQuery(t *testing.T) {
|
||||
testCases := []struct {
|
||||
s Series
|
||||
timeFilter string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: `select "value" from "cpu" where "foo"='bar'`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
{
|
||||
Name: "baz",
|
||||
Value: "qux",
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: `select "value" from "cpu" where "foo"='bar' and "baz"='qux'`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "b'ar",
|
||||
},
|
||||
},
|
||||
},
|
||||
timeFilter: "time >= now()",
|
||||
expected: `select "value" from "cpu" where "foo"='b\'ar' and time >= now()`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
LabelPairs: []LabelPair{
|
||||
{
|
||||
Name: "name",
|
||||
Value: `dev-mapper-centos\x2dswap.swap`,
|
||||
},
|
||||
{
|
||||
Name: "state",
|
||||
Value: "dev-mapp'er-c'en'tos",
|
||||
},
|
||||
},
|
||||
},
|
||||
timeFilter: "time >= now()",
|
||||
expected: `select "value" from "cpu" where "name"='dev-mapper-centos\\x2dswap.swap' and "state"='dev-mapp\'er-c\'en\'tos' and time >= now()`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
},
|
||||
timeFilter: "time >= now()",
|
||||
expected: `select "value" from "cpu" where time >= now()`,
|
||||
},
|
||||
{
|
||||
s: Series{
|
||||
Measurement: "cpu",
|
||||
Field: "value",
|
||||
},
|
||||
expected: `select "value" from "cpu"`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
query := tc.s.fetchQuery(tc.timeFilter)
|
||||
if query != tc.expected {
|
||||
t.Fatalf("got: \n%s;\nexpected: \n%s", query, tc.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTimeFilter(t *testing.T) {
|
||||
testCases := []struct {
|
||||
start string
|
||||
end string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
start: "2020-01-01T20:07:00Z",
|
||||
end: "2020-01-01T21:07:00Z",
|
||||
expected: "time >= '2020-01-01T20:07:00Z' and time <= '2020-01-01T21:07:00Z'",
|
||||
},
|
||||
{
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
start: "2020-01-01T20:07:00Z",
|
||||
expected: "time >= '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
{
|
||||
end: "2020-01-01T21:07:00Z",
|
||||
expected: "time <= '2020-01-01T21:07:00Z'",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
f := timeFilter(tc.start, tc.end)
|
||||
if f != tc.expected {
|
||||
t.Fatalf("got: \n%q;\nexpected: \n%q", f, tc.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
191
app/vmctl/influx/parser.go
Normal file
191
app/vmctl/influx/parser.go
Normal file
@@ -0,0 +1,191 @@
|
||||
package influx
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
influx "github.com/influxdata/influxdb/client/v2"
|
||||
)
|
||||
|
||||
type queryValues struct {
|
||||
name string
|
||||
values map[string][]interface{}
|
||||
}
|
||||
|
||||
func parseResult(r influx.Result) ([]queryValues, error) {
|
||||
if len(r.Err) > 0 {
|
||||
return nil, fmt.Errorf("result error: %s", r.Err)
|
||||
}
|
||||
qValues := make([]queryValues, len(r.Series))
|
||||
for i, row := range r.Series {
|
||||
values := make(map[string][]interface{}, len(row.Values))
|
||||
for _, value := range row.Values {
|
||||
for idx, v := range value {
|
||||
key := row.Columns[idx]
|
||||
values[key] = append(values[key], v)
|
||||
}
|
||||
}
|
||||
qValues[i] = queryValues{
|
||||
name: row.Name,
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
return qValues, nil
|
||||
}
|
||||
|
||||
func toFloat64(v interface{}) (float64, error) {
|
||||
switch i := v.(type) {
|
||||
case json.Number:
|
||||
return i.Float64()
|
||||
case float64:
|
||||
return i, nil
|
||||
case float32:
|
||||
return float64(i), nil
|
||||
case int64:
|
||||
return float64(i), nil
|
||||
case int32:
|
||||
return float64(i), nil
|
||||
case int:
|
||||
return float64(i), nil
|
||||
case uint64:
|
||||
return float64(i), nil
|
||||
case uint32:
|
||||
return float64(i), nil
|
||||
case uint:
|
||||
return float64(i), nil
|
||||
case string:
|
||||
return strconv.ParseFloat(i, 64)
|
||||
default:
|
||||
return 0, fmt.Errorf("unexpected value type %v", i)
|
||||
}
|
||||
}
|
||||
|
||||
func parseDate(dateStr string) (int64, error) {
|
||||
startTime, err := time.Parse(time.RFC3339, dateStr)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse %q: %s", dateStr, err)
|
||||
}
|
||||
return startTime.UnixNano() / 1e6, nil
|
||||
}
|
||||
|
||||
func stringify(q influx.Query) string {
|
||||
return fmt.Sprintf("command: %q; database: %q; retention: %q",
|
||||
q.Command, q.Database, q.RetentionPolicy)
|
||||
}
|
||||
|
||||
func (s *Series) unmarshal(v string) error {
|
||||
noEscapeChars := strings.IndexByte(v, '\\') < 0
|
||||
n := nextUnescapedChar(v, ',', noEscapeChars)
|
||||
if n < 0 {
|
||||
s.Measurement = unescapeTagValue(v, noEscapeChars)
|
||||
return nil
|
||||
}
|
||||
s.Measurement = unescapeTagValue(v[:n], noEscapeChars)
|
||||
var err error
|
||||
s.LabelPairs, err = unmarshalTags(v[n+1:], noEscapeChars)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to unmarhsal tags: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalTags(s string, noEscapeChars bool) ([]LabelPair, error) {
|
||||
var result []LabelPair
|
||||
for {
|
||||
lp := LabelPair{}
|
||||
n := nextUnescapedChar(s, ',', noEscapeChars)
|
||||
if n < 0 {
|
||||
if err := lp.unmarshal(s, noEscapeChars); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(lp.Name) == 0 || len(lp.Value) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
result = append(result, lp)
|
||||
return result, nil
|
||||
}
|
||||
if err := lp.unmarshal(s[:n], noEscapeChars); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s = s[n+1:]
|
||||
if len(lp.Name) == 0 || len(lp.Value) == 0 {
|
||||
continue
|
||||
}
|
||||
result = append(result, lp)
|
||||
}
|
||||
}
|
||||
|
||||
func (lp *LabelPair) unmarshal(s string, noEscapeChars bool) error {
|
||||
n := nextUnescapedChar(s, '=', noEscapeChars)
|
||||
if n < 0 {
|
||||
return fmt.Errorf("missing tag value for %q", s)
|
||||
}
|
||||
lp.Name = unescapeTagValue(s[:n], noEscapeChars)
|
||||
lp.Value = unescapeTagValue(s[n+1:], noEscapeChars)
|
||||
return nil
|
||||
}
|
||||
|
||||
func unescapeTagValue(s string, noEscapeChars bool) string {
|
||||
if noEscapeChars {
|
||||
// Fast path - no escape chars.
|
||||
return s
|
||||
}
|
||||
n := strings.IndexByte(s, '\\')
|
||||
if n < 0 {
|
||||
return s
|
||||
}
|
||||
|
||||
// Slow path. Remove escape chars.
|
||||
dst := make([]byte, 0, len(s))
|
||||
for {
|
||||
dst = append(dst, s[:n]...)
|
||||
s = s[n+1:]
|
||||
if len(s) == 0 {
|
||||
return string(append(dst, '\\'))
|
||||
}
|
||||
ch := s[0]
|
||||
if ch != ' ' && ch != ',' && ch != '=' && ch != '\\' {
|
||||
dst = append(dst, '\\')
|
||||
}
|
||||
dst = append(dst, ch)
|
||||
s = s[1:]
|
||||
n = strings.IndexByte(s, '\\')
|
||||
if n < 0 {
|
||||
return string(append(dst, s...))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func nextUnescapedChar(s string, ch byte, noEscapeChars bool) int {
|
||||
if noEscapeChars {
|
||||
// Fast path: just search for ch in s, since s has no escape chars.
|
||||
return strings.IndexByte(s, ch)
|
||||
}
|
||||
|
||||
sOrig := s
|
||||
again:
|
||||
n := strings.IndexByte(s, ch)
|
||||
if n < 0 {
|
||||
return -1
|
||||
}
|
||||
if n == 0 {
|
||||
return len(sOrig) - len(s) + n
|
||||
}
|
||||
if s[n-1] != '\\' {
|
||||
return len(sOrig) - len(s) + n
|
||||
}
|
||||
nOrig := n
|
||||
slashes := 0
|
||||
for n > 0 && s[n-1] == '\\' {
|
||||
slashes++
|
||||
n--
|
||||
}
|
||||
if slashes&1 == 0 {
|
||||
return len(sOrig) - len(s) + nOrig
|
||||
}
|
||||
s = s[nOrig+1:]
|
||||
goto again
|
||||
}
|
||||
60
app/vmctl/influx/parser_test.go
Normal file
60
app/vmctl/influx/parser_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package influx
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSeries_Unmarshal(t *testing.T) {
|
||||
tag := func(name, value string) LabelPair {
|
||||
return LabelPair{
|
||||
Name: name,
|
||||
Value: value,
|
||||
}
|
||||
}
|
||||
series := func(measurement string, lp ...LabelPair) Series {
|
||||
return Series{
|
||||
Measurement: measurement,
|
||||
LabelPairs: lp,
|
||||
}
|
||||
}
|
||||
testCases := []struct {
|
||||
got string
|
||||
want Series
|
||||
}{
|
||||
{
|
||||
got: "cpu",
|
||||
want: series("cpu"),
|
||||
},
|
||||
{
|
||||
got: "cpu,host=localhost",
|
||||
want: series("cpu", tag("host", "localhost")),
|
||||
},
|
||||
{
|
||||
got: "cpu,host=localhost,instance=instance",
|
||||
want: series("cpu", tag("host", "localhost"), tag("instance", "instance")),
|
||||
},
|
||||
{
|
||||
got: `fo\,bar\=baz,x\=\b=\\a\,\=\q\ `,
|
||||
want: series("fo,bar=baz", tag(`x=\b`, `\a,=\q `)),
|
||||
},
|
||||
{
|
||||
got: "cpu,host=192.168.0.1,instance=fe80::fdc8:5e36:c2c6:baac%utun1",
|
||||
want: series("cpu", tag("host", "192.168.0.1"), tag("instance", "fe80::fdc8:5e36:c2c6:baac%utun1")),
|
||||
},
|
||||
{
|
||||
got: `cpu,db=db1,host=localhost,server=host\=localhost\ user\=user\ `,
|
||||
want: series("cpu", tag("db", "db1"),
|
||||
tag("host", "localhost"), tag("server", "host=localhost user=user ")),
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
s := Series{}
|
||||
if err := s.unmarshal(tc.got); err != nil {
|
||||
t.Fatalf("%q: unmarshal err: %s", tc.got, err)
|
||||
}
|
||||
if !reflect.DeepEqual(s, tc.want) {
|
||||
t.Fatalf("%q: expected\n%#v\nto be equal\n%#v", tc.got, s, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
159
app/vmctl/main.go
Normal file
159
app/vmctl/main.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
func main() {
|
||||
start := time.Now()
|
||||
app := &cli.App{
|
||||
Name: "vmctl",
|
||||
Usage: "Victoria metrics command-line tool",
|
||||
Version: buildinfo.Version,
|
||||
Commands: []*cli.Command{
|
||||
{
|
||||
Name: "influx",
|
||||
Usage: "Migrate timeseries from InfluxDB",
|
||||
Flags: mergeFlags(globalFlags, influxFlags, vmFlags),
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("InfluxDB import mode")
|
||||
|
||||
iCfg := influx.Config{
|
||||
Addr: c.String(influxAddr),
|
||||
Username: c.String(influxUser),
|
||||
Password: c.String(influxPassword),
|
||||
Database: c.String(influxDB),
|
||||
Retention: c.String(influxRetention),
|
||||
Filter: influx.Filter{
|
||||
Series: c.String(influxFilterSeries),
|
||||
TimeStart: c.String(influxFilterTimeStart),
|
||||
TimeEnd: c.String(influxFilterTimeEnd),
|
||||
},
|
||||
ChunkSize: c.Int(influxChunkSize),
|
||||
}
|
||||
influxClient, err := influx.NewClient(iCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create influx client: %s", err)
|
||||
}
|
||||
|
||||
vmCfg := initConfigVM(c)
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
|
||||
processor := newInfluxProcessor(influxClient, importer,
|
||||
c.Int(influxConcurrency), c.String(influxMeasurementFieldSeparator))
|
||||
return processor.run(c.Bool(globalSilent))
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "prometheus",
|
||||
Usage: "Migrate timeseries from Prometheus",
|
||||
Flags: mergeFlags(globalFlags, promFlags, vmFlags),
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("Prometheus import mode")
|
||||
|
||||
vmCfg := initConfigVM(c)
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
|
||||
promCfg := prometheus.Config{
|
||||
Snapshot: c.String(promSnapshot),
|
||||
Filter: prometheus.Filter{
|
||||
TimeMin: c.String(promFilterTimeStart),
|
||||
TimeMax: c.String(promFilterTimeEnd),
|
||||
Label: c.String(promFilterLabel),
|
||||
LabelValue: c.String(promFilterLabelValue),
|
||||
},
|
||||
}
|
||||
cl, err := prometheus.NewClient(promCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create prometheus client: %s", err)
|
||||
}
|
||||
pp := prometheusProcessor{
|
||||
cl: cl,
|
||||
im: importer,
|
||||
cc: c.Int(promConcurrency),
|
||||
}
|
||||
return pp.run(c.Bool(globalSilent))
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "vm-native",
|
||||
Usage: "Migrate time series between VictoriaMetrics installations via native binary format",
|
||||
Flags: vmNativeFlags,
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("VictoriaMetrics Native import mode")
|
||||
|
||||
if c.String(vmNativeFilterMatch) == "" {
|
||||
return fmt.Errorf("flag %q can't be empty", vmNativeFilterMatch)
|
||||
}
|
||||
|
||||
p := vmNativeProcessor{
|
||||
filter: filter{
|
||||
match: c.String(vmNativeFilterMatch),
|
||||
timeStart: c.String(vmNativeFilterTimeStart),
|
||||
timeEnd: c.String(vmNativeFilterTimeEnd),
|
||||
},
|
||||
src: &vmNativeClient{
|
||||
addr: strings.Trim(c.String(vmNativeSrcAddr), "/"),
|
||||
user: c.String(vmNativeSrcUser),
|
||||
password: c.String(vmNativeSrcPassword),
|
||||
},
|
||||
dst: &vmNativeClient{
|
||||
addr: strings.Trim(c.String(vmNativeDstAddr), "/"),
|
||||
user: c.String(vmNativeDstUser),
|
||||
password: c.String(vmNativeDstPassword),
|
||||
extraLabels: c.StringSlice(vmExtraLabel),
|
||||
},
|
||||
}
|
||||
return p.run()
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
c := make(chan os.Signal, 1)
|
||||
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-c
|
||||
fmt.Println("\r- Execution cancelled")
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
err := app.Run(os.Args)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
log.Printf("Total time: %v", time.Since(start))
|
||||
}
|
||||
|
||||
func initConfigVM(c *cli.Context) vm.Config {
|
||||
return vm.Config{
|
||||
Addr: c.String(vmAddr),
|
||||
User: c.String(vmUser),
|
||||
Password: c.String(vmPassword),
|
||||
Concurrency: uint8(c.Int(vmConcurrency)),
|
||||
Compress: c.Bool(vmCompress),
|
||||
AccountID: c.String(vmAccountID),
|
||||
BatchSize: c.Int(vmBatchSize),
|
||||
SignificantFigures: c.Int(vmSignificantFigures),
|
||||
RoundDigits: c.Int(vmRoundDigits),
|
||||
ExtraLabels: c.StringSlice(vmExtraLabel),
|
||||
}
|
||||
}
|
||||
131
app/vmctl/prometheus.go
Normal file
131
app/vmctl/prometheus.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
"github.com/prometheus/prometheus/tsdb"
|
||||
)
|
||||
|
||||
type prometheusProcessor struct {
|
||||
// prometheus client fetches and reads
|
||||
// snapshot blocks
|
||||
cl *prometheus.Client
|
||||
// importer performs import requests
|
||||
// for timeseries data returned from
|
||||
// snapshot blocks
|
||||
im *vm.Importer
|
||||
// cc stands for concurrency
|
||||
// and defines number of concurrently
|
||||
// running snapshot block readers
|
||||
cc int
|
||||
}
|
||||
|
||||
func (pp *prometheusProcessor) run(silent bool) error {
|
||||
blocks, err := pp.cl.Explore()
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore failed: %s", err)
|
||||
}
|
||||
if len(blocks) < 1 {
|
||||
return fmt.Errorf("found no blocks to import")
|
||||
}
|
||||
question := fmt.Sprintf("Found %d blocks to import. Continue?", len(blocks))
|
||||
if !silent && !prompt(question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
bar := pb.StartNew(len(blocks))
|
||||
blockReadersCh := make(chan tsdb.BlockReader)
|
||||
errCh := make(chan error, pp.cc)
|
||||
pp.im.ResetStats()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(pp.cc)
|
||||
for i := 0; i < pp.cc; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for br := range blockReadersCh {
|
||||
if err := pp.do(br); err != nil {
|
||||
errCh <- fmt.Errorf("read failed for block %q: %s", br.Meta().ULID, err)
|
||||
return
|
||||
}
|
||||
bar.Increment()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
for _, br := range blocks {
|
||||
select {
|
||||
case promErr := <-errCh:
|
||||
close(blockReadersCh)
|
||||
return fmt.Errorf("prometheus error: %s", promErr)
|
||||
case vmErr := <-pp.im.Errors():
|
||||
close(blockReadersCh)
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
case blockReadersCh <- br:
|
||||
}
|
||||
}
|
||||
|
||||
close(blockReadersCh)
|
||||
wg.Wait()
|
||||
// wait for all buffers to flush
|
||||
pp.im.Close()
|
||||
// drain import errors channel
|
||||
for vmErr := range pp.im.Errors() {
|
||||
return fmt.Errorf("Import process failed: \n%s", wrapErr(vmErr))
|
||||
}
|
||||
bar.Finish()
|
||||
log.Println("Import finished!")
|
||||
log.Print(pp.im.Stats())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
|
||||
ss, err := pp.cl.Read(b)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read block: %s", err)
|
||||
}
|
||||
for ss.Next() {
|
||||
var name string
|
||||
var labels []vm.LabelPair
|
||||
series := ss.At()
|
||||
|
||||
for _, label := range series.Labels() {
|
||||
if label.Name == "__name__" {
|
||||
name = label.Value
|
||||
continue
|
||||
}
|
||||
labels = append(labels, vm.LabelPair{
|
||||
Name: label.Name,
|
||||
Value: label.Value,
|
||||
})
|
||||
}
|
||||
if name == "" {
|
||||
return fmt.Errorf("failed to find `__name__` label in labelset for block %v", b.Meta().ULID)
|
||||
}
|
||||
|
||||
var timestamps []int64
|
||||
var values []float64
|
||||
it := series.Iterator()
|
||||
for it.Next() {
|
||||
t, v := it.At()
|
||||
timestamps = append(timestamps, t)
|
||||
values = append(values, v)
|
||||
}
|
||||
if err := it.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
pp.im.Input() <- &vm.TimeSeries{
|
||||
Name: name,
|
||||
LabelPairs: labels,
|
||||
Timestamps: timestamps,
|
||||
Values: values,
|
||||
}
|
||||
}
|
||||
return ss.Err()
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user