mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-07 19:06:17 +03:00
Compare commits
901 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
73dfb030dd | ||
|
|
52b5498165 | ||
|
|
b6dda0fefe | ||
|
|
d9b3a92348 | ||
|
|
1f28b46ae9 | ||
|
|
4f7f750850 | ||
|
|
0645688f32 | ||
|
|
7cbda6796c | ||
|
|
1531d757ea | ||
|
|
c605d64a95 | ||
|
|
63b1cab454 | ||
|
|
4a1d29126c | ||
|
|
16df18ec14 | ||
|
|
e0e16a2d36 | ||
|
|
2ce02a7fe6 | ||
|
|
39ba4fc1c4 | ||
|
|
278278af95 | ||
|
|
d330c7e6fc | ||
|
|
3cbc0975f6 | ||
|
|
09b403d38a | ||
|
|
664db964ca | ||
|
|
60bdbdaa70 | ||
|
|
3b2dc2b098 | ||
|
|
7b15834cbe | ||
|
|
f68d93cca2 | ||
|
|
242050ba94 | ||
|
|
3a65f4a733 | ||
|
|
39c1b0f8d1 | ||
|
|
686bf6c5ad | ||
|
|
478258bc4d | ||
|
|
c19fc52676 | ||
|
|
1c47acda11 | ||
|
|
2613110f75 | ||
|
|
616175b1ce | ||
|
|
318a87c36f | ||
|
|
160453b86c | ||
|
|
b7fe7b801c | ||
|
|
f0fad01e8a | ||
|
|
5a7159ab2e | ||
|
|
ad2d079ba5 | ||
|
|
193a9c3328 | ||
|
|
410ae99c2e | ||
|
|
1e4a9a8dfe | ||
|
|
9855b38da2 | ||
|
|
5d22c36904 | ||
|
|
73812c71a5 | ||
|
|
adc5635a07 | ||
|
|
bb7a295146 | ||
|
|
a55d3f6882 | ||
|
|
7ea2531db0 | ||
|
|
da037cafc5 | ||
|
|
86424e079e | ||
|
|
2ec17bed2c | ||
|
|
0ccb7f51dd | ||
|
|
2caf0b05c6 | ||
|
|
51196739af | ||
|
|
c8799a5d97 | ||
|
|
060a0cdbf4 | ||
|
|
2e494e2375 | ||
|
|
9eb1abdefe | ||
|
|
4e0345a5ef | ||
|
|
78b23c9a83 | ||
|
|
491831df49 | ||
|
|
b9bb64ce55 | ||
|
|
7db647e924 | ||
|
|
83a1c2484c | ||
|
|
11eb94d3bc | ||
|
|
ff3c90d305 | ||
|
|
fc42617ecd | ||
|
|
887555669e | ||
|
|
5a02bc56fb | ||
|
|
3c0470f91e | ||
|
|
74155afb71 | ||
|
|
185894fc5a | ||
|
|
39530903bf | ||
|
|
d906e83e5e | ||
|
|
ec3943d14a | ||
|
|
1db9b78b88 | ||
|
|
80946f06c2 | ||
|
|
6601fa3e9d | ||
|
|
92a549bccb | ||
|
|
23595465b8 | ||
|
|
8f43f496d7 | ||
|
|
09268d41ed | ||
|
|
1b288e0a05 | ||
|
|
24eeb8d9af | ||
|
|
8225a48b56 | ||
|
|
fa3a17938e | ||
|
|
c0cb3e4f98 | ||
|
|
cae87da4bb | ||
|
|
5b8450fc1b | ||
|
|
d71a2605d1 | ||
|
|
01520d3e5d | ||
|
|
8f4de6fa47 | ||
|
|
4e71003620 | ||
|
|
22e65402af | ||
|
|
aca256735c | ||
|
|
56b84140a9 | ||
|
|
2eb27ddb22 | ||
|
|
ddcc3b1e9d | ||
|
|
49b77ec01a | ||
|
|
4786f036de | ||
|
|
73b6c23271 | ||
|
|
baf456978d | ||
|
|
bf3b6732bd | ||
|
|
4ddfc67d54 | ||
|
|
f3b829125e | ||
|
|
5dde81259c | ||
|
|
51fbf58d89 | ||
|
|
3d955d1078 | ||
|
|
a5fb8b93a8 | ||
|
|
ee1da35071 | ||
|
|
3383f12a4b | ||
|
|
eb746a4dab | ||
|
|
29e059e49c | ||
|
|
6c322b4a00 | ||
|
|
9e99f2f5b3 | ||
|
|
03150c8973 | ||
|
|
b21a55febf | ||
|
|
5ee344824f | ||
|
|
5c955dd876 | ||
|
|
89a1c941c2 | ||
|
|
43ededf3a1 | ||
|
|
bc17f4828c | ||
|
|
368571be00 | ||
|
|
0660cc7128 | ||
|
|
cafad95790 | ||
|
|
2b34c01f6d | ||
|
|
4052c44ac1 | ||
|
|
041e188df8 | ||
|
|
4f3f9950d0 | ||
|
|
45a551df9c | ||
|
|
cf567badcf | ||
|
|
a80b0aebe8 | ||
|
|
752895d1ee | ||
|
|
68e31a6000 | ||
|
|
e2053baf32 | ||
|
|
73e22dcf81 | ||
|
|
77f76371d0 | ||
|
|
31e174977e | ||
|
|
05ab34f2c8 | ||
|
|
3140aa34de | ||
|
|
25759082f4 | ||
|
|
87b925afb2 | ||
|
|
de61a73c63 | ||
|
|
472fe3fd03 | ||
|
|
06aefcec81 | ||
|
|
413701454c | ||
|
|
2a4c48c59d | ||
|
|
52006149b2 | ||
|
|
2a2036160d | ||
|
|
3727251910 | ||
|
|
60d92894c5 | ||
|
|
df619bdff0 | ||
|
|
2a3b19e1d2 | ||
|
|
c0b852d50d | ||
|
|
9183a439c7 | ||
|
|
5eb163a08a | ||
|
|
fda1a54343 | ||
|
|
f341b7b3f8 | ||
|
|
bd6de6406a | ||
|
|
4b43c91f8c | ||
|
|
e0595af2bf | ||
|
|
88a4b9c313 | ||
|
|
5e87e03409 | ||
|
|
9f8209d593 | ||
|
|
550d5c7ea4 | ||
|
|
2a5b9ff782 | ||
|
|
809fbaeaac | ||
|
|
780abc3b3b | ||
|
|
5f487ed996 | ||
|
|
30425ca81a | ||
|
|
036a7b7365 | ||
|
|
344209e5e6 | ||
|
|
b15c5961ab | ||
|
|
2a8395be05 | ||
|
|
25f089de9d | ||
|
|
42bba64aa7 | ||
|
|
e1211a1187 | ||
|
|
ca54e58c1f | ||
|
|
90b876cd1e | ||
|
|
47e16594dd | ||
|
|
174a70369c | ||
|
|
81f28f0f1f | ||
|
|
51466ce379 | ||
|
|
b9ab07ced9 | ||
|
|
1fe87691ec | ||
|
|
7849545e78 | ||
|
|
ddb8883817 | ||
|
|
0ad6010c91 | ||
|
|
b7cce552da | ||
|
|
0e1e0b036a | ||
|
|
9586dabd94 | ||
|
|
05d44525b7 | ||
|
|
b5d18c0d28 | ||
|
|
28975067c6 | ||
|
|
a3eebf118e | ||
|
|
244c18fa38 | ||
|
|
01fc228fb0 | ||
|
|
ee80e71d17 | ||
|
|
4770377fb3 | ||
|
|
44aad84a53 | ||
|
|
608d87273d | ||
|
|
7a65329e65 | ||
|
|
37a7627254 | ||
|
|
a1601929ec | ||
|
|
4b2cc1b32c | ||
|
|
74eea53dee | ||
|
|
593c151831 | ||
|
|
4725549cb2 | ||
|
|
29a692f278 | ||
|
|
d87a700528 | ||
|
|
8249451dbb | ||
|
|
13668fc935 | ||
|
|
052160dcdc | ||
|
|
a265da4f53 | ||
|
|
5074cc672a | ||
|
|
c7bcf750c2 | ||
|
|
59102db4cf | ||
|
|
8a6acce7d3 | ||
|
|
a8d1497024 | ||
|
|
33c6cc2530 | ||
|
|
19b189e9b7 | ||
|
|
38fc55976e | ||
|
|
55b5276b70 | ||
|
|
f3af8331ec | ||
|
|
de0fe02f6e | ||
|
|
edb45d7fc1 | ||
|
|
f638496298 | ||
|
|
cc0427897c | ||
|
|
06b721dd07 | ||
|
|
42087518ba | ||
|
|
02b714c110 | ||
|
|
dd200409d9 | ||
|
|
27b958ba8b | ||
|
|
ffdf430be0 | ||
|
|
cddfc4d3f8 | ||
|
|
4d00107b92 | ||
|
|
d577657fb7 | ||
|
|
59c350d0d2 | ||
|
|
5e5fc66e3b | ||
|
|
4a49577028 | ||
|
|
ec45f1bc5f | ||
|
|
0945a03843 | ||
|
|
ff72ca14b9 | ||
|
|
9199c23720 | ||
|
|
94cabf29b0 | ||
|
|
e094c8e214 | ||
|
|
7048a316aa | ||
|
|
aea6df8197 | ||
|
|
f3a51e8b1d | ||
|
|
9b1e002287 | ||
|
|
02ee4ffd4d | ||
|
|
79e1c6a6fc | ||
|
|
9e02b3d48a | ||
|
|
622000797a | ||
|
|
4021aa11b5 | ||
|
|
3214b1c315 | ||
|
|
92199c964c | ||
|
|
72a0b49330 | ||
|
|
5832242b44 | ||
|
|
a1e496ced6 | ||
|
|
28f054bb00 | ||
|
|
811f4a9380 | ||
|
|
c8f2febaa1 | ||
|
|
36bbdd7d4b | ||
|
|
b14d96618c | ||
|
|
34634ec357 | ||
|
|
2b851e69d2 | ||
|
|
e7f46a0aab | ||
|
|
7205c79c5a | ||
|
|
9436ae3b07 | ||
|
|
5ba347bd2c | ||
|
|
25446a7933 | ||
|
|
ebc1caa5dc | ||
|
|
27f9a1eda2 | ||
|
|
7c86dcc4fa | ||
|
|
c1d871a45a | ||
|
|
54796f69db | ||
|
|
365d2ff0bf | ||
|
|
8db6a71f83 | ||
|
|
edeb56d208 | ||
|
|
24938872a6 | ||
|
|
ba505dd357 | ||
|
|
dc2c712a29 | ||
|
|
023c65968f | ||
|
|
36edba9bfb | ||
|
|
a2f716b6cc | ||
|
|
f0b09a1382 | ||
|
|
6a78755b66 | ||
|
|
e79cd24807 | ||
|
|
e480b9881e | ||
|
|
9e16329b2f | ||
|
|
70959d5dab | ||
|
|
4856a4cf5a | ||
|
|
8622dee4b5 | ||
|
|
8d709f3483 | ||
|
|
91533531f5 | ||
|
|
3283f0dae4 | ||
|
|
8da9502df6 | ||
|
|
d4525bd2d0 | ||
|
|
cc67eb4ff3 | ||
|
|
a2af2e5a1b | ||
|
|
5c92022cc6 | ||
|
|
43b24164ef | ||
|
|
6460475e3b | ||
|
|
6d56149b9f | ||
|
|
4ea27d6f6a | ||
|
|
f3c7302772 | ||
|
|
a26c6628fd | ||
|
|
8fdd613f25 | ||
|
|
57b00bafc9 | ||
|
|
ac3043ff74 | ||
|
|
d3608be313 | ||
|
|
fdbb819195 | ||
|
|
fbefc940ef | ||
|
|
e566d49e3a | ||
|
|
f8a2a3784b | ||
|
|
20aa707979 | ||
|
|
ddbbc9a86d | ||
|
|
91cbb9063d | ||
|
|
55afae8641 | ||
|
|
0691e115b1 | ||
|
|
32266aaea2 | ||
|
|
a11ac9648c | ||
|
|
90e1818068 | ||
|
|
8f6d5217d1 | ||
|
|
6a5d236245 | ||
|
|
4d68f5b1fc | ||
|
|
6f6333831e | ||
|
|
3e7bfe1200 | ||
|
|
02ffe05750 | ||
|
|
b9e79250b3 | ||
|
|
388d6ee16e | ||
|
|
e8225d7d6b | ||
|
|
911bab4f6a | ||
|
|
1428aa2c22 | ||
|
|
00a0816ab1 | ||
|
|
be68a6a1ee | ||
|
|
468de76e9a | ||
|
|
0af9e2b693 | ||
|
|
7257a2a97f | ||
|
|
c28c25ed2e | ||
|
|
01367faa39 | ||
|
|
a52413ce0a | ||
|
|
b19de3fa12 | ||
|
|
2f1d24fccf | ||
|
|
b5db69fe05 | ||
|
|
babc9e9815 | ||
|
|
d95037a175 | ||
|
|
e3488c6cbc | ||
|
|
48e32b325e | ||
|
|
856c2db144 | ||
|
|
927d9da270 | ||
|
|
c0c3dc02cf | ||
|
|
3eebe52a06 | ||
|
|
f7a7eb8f3e | ||
|
|
4c3bc04efa | ||
|
|
3c9058c168 | ||
|
|
d66bae212b | ||
|
|
7f54c181bb | ||
|
|
3de7fc5c71 | ||
|
|
09e3742a82 | ||
|
|
cbba6bd3db | ||
|
|
1b5dc9f91d | ||
|
|
05709bdfae | ||
|
|
ed73317622 | ||
|
|
70b831e684 | ||
|
|
6e8e64f695 | ||
|
|
138757629b | ||
|
|
9e71462cee | ||
|
|
884e58d58d | ||
|
|
1bb529e23e | ||
|
|
66bf1987bf | ||
|
|
202083f38c | ||
|
|
ccb08fc28b | ||
|
|
7a3e16e774 | ||
|
|
bbf8e459a0 | ||
|
|
02f13d5681 | ||
|
|
2472baa934 | ||
|
|
1b194bc6de | ||
|
|
cc5b916237 | ||
|
|
75977a4d02 | ||
|
|
4b136abff8 | ||
|
|
95dc65e7b3 | ||
|
|
a0cbef1c46 | ||
|
|
4e8de26fec | ||
|
|
0d8f80ce90 | ||
|
|
62beea23f7 | ||
|
|
943243321a | ||
|
|
6bfe9cc733 | ||
|
|
d056be710b | ||
|
|
de621c0cb7 | ||
|
|
14ef5311f0 | ||
|
|
5872b5711d | ||
|
|
8bab50dc29 | ||
|
|
d6fa4da712 | ||
|
|
2e153b68cd | ||
|
|
3814747e94 | ||
|
|
ec2abf9b69 | ||
|
|
06854738b6 | ||
|
|
95e1173423 | ||
|
|
8937de5f99 | ||
|
|
8288e327ee | ||
|
|
dfe3939665 | ||
|
|
46127b432d | ||
|
|
0d3f31f60e | ||
|
|
39cdc546dd | ||
|
|
5fadd58cf6 | ||
|
|
ac6d937372 | ||
|
|
b3bb18d674 | ||
|
|
ac4c7adec6 | ||
|
|
5d7da8479f | ||
|
|
4ee73f54a6 | ||
|
|
23871fb0bf | ||
|
|
1a6f2f07fd | ||
|
|
27c9446520 | ||
|
|
bcbf73225e | ||
|
|
255a0cf635 | ||
|
|
007530f882 | ||
|
|
f7ef80aaad | ||
|
|
ffa327d6d1 | ||
|
|
eece61a611 | ||
|
|
0c016279d5 | ||
|
|
2d36dbcfa9 | ||
|
|
8cfe4064b5 | ||
|
|
b4bf8dc2f0 | ||
|
|
e1c3267e34 | ||
|
|
c33cc4322c | ||
|
|
6a88d5402d | ||
|
|
dac21d874b | ||
|
|
87aeeec3e8 | ||
|
|
d8eaa511b0 | ||
|
|
a2340e6c95 | ||
|
|
c6ad3692ad | ||
|
|
43e104a83f | ||
|
|
c87c7d1e29 | ||
|
|
8b7a828c65 | ||
|
|
d4fc0ed874 | ||
|
|
a02cf92fd1 | ||
|
|
c734416f86 | ||
|
|
b285207aa7 | ||
|
|
c080443fef | ||
|
|
e688121de8 | ||
|
|
24915fd4bc | ||
|
|
637043a40e | ||
|
|
6d019a3c37 | ||
|
|
510f78a96b | ||
|
|
9dec3c8f80 | ||
|
|
aaef0bac00 | ||
|
|
fc720a5a78 | ||
|
|
383bf9689e | ||
|
|
9fbd45a22f | ||
|
|
eb08579452 | ||
|
|
4f649a0573 | ||
|
|
f8811411fd | ||
|
|
b6845951a5 | ||
|
|
d2b92d3264 | ||
|
|
84c82e988d | ||
|
|
317fef95f9 | ||
|
|
110c3896e7 | ||
|
|
57801660ab | ||
|
|
6b4ccc17c2 | ||
|
|
10cf6c9781 | ||
|
|
836d56876a | ||
|
|
d59dc7616d | ||
|
|
ffebc20f6d | ||
|
|
f04ec714c2 | ||
|
|
5446ce0018 | ||
|
|
21546e6922 | ||
|
|
7be8a8a37b | ||
|
|
6c38702212 | ||
|
|
ef1ef0598b | ||
|
|
9d91d8fc91 | ||
|
|
f4f1f2f976 | ||
|
|
a678cbe62e | ||
|
|
76f2c70be3 | ||
|
|
74f122294d | ||
|
|
d5171c155c | ||
|
|
2cea923ff7 | ||
|
|
c86f1f1d1b | ||
|
|
1030be91ae | ||
|
|
8bd2eee8e0 | ||
|
|
406822a16c | ||
|
|
c8467f37a9 | ||
|
|
6ef6f3a771 | ||
|
|
b58107c85a | ||
|
|
87f1ed5d87 | ||
|
|
11ce30820b | ||
|
|
5123a61be9 | ||
|
|
5c4f5b83fc | ||
|
|
ccdddf7996 | ||
|
|
9be1398b92 | ||
|
|
8830607021 | ||
|
|
a646841c07 | ||
|
|
7568658c19 | ||
|
|
af37717108 | ||
|
|
7720d403c0 | ||
|
|
fe196e0b7a | ||
|
|
f83d6d69b2 | ||
|
|
f3be9483f4 | ||
|
|
057698f7fb | ||
|
|
a645a95bd6 | ||
|
|
934646ccf7 | ||
|
|
8ea02eaa8e | ||
|
|
2e2b1ba87e | ||
|
|
9fff48c3e3 | ||
|
|
438b2e11bd | ||
|
|
e5070c0bcd | ||
|
|
c5b9f7f751 | ||
|
|
f9b3409ee3 | ||
|
|
25a9017a72 | ||
|
|
3ec8a4dc80 | ||
|
|
4156e78e11 | ||
|
|
b209d4ace0 | ||
|
|
b11bdc46be | ||
|
|
ed4492ddd5 | ||
|
|
776391917f | ||
|
|
f3625e4f3f | ||
|
|
70f8911ca7 | ||
|
|
f582f9e8ab | ||
|
|
73358571ee | ||
|
|
14c20c1843 | ||
|
|
c12eb2cc7f | ||
|
|
291c41978e | ||
|
|
d4b97b69bf | ||
|
|
035a2b5ed5 | ||
|
|
0e0095d350 | ||
|
|
a42e3e8dfb | ||
|
|
f13a255918 | ||
|
|
513707a8c7 | ||
|
|
f40661e7b7 | ||
|
|
a1432e6b0a | ||
|
|
bff18cb5dd | ||
|
|
e1063ce3c1 | ||
|
|
46a521191f | ||
|
|
8afc0aef8d | ||
|
|
114c14febf | ||
|
|
75bcf86a31 | ||
|
|
a1ee679042 | ||
|
|
a8e88e74cc | ||
|
|
c9d2934bb4 | ||
|
|
6c21b6ec09 | ||
|
|
e83f14210d | ||
|
|
6495b62866 | ||
|
|
86e47177dc | ||
|
|
146fd2eca3 | ||
|
|
67b01329a0 | ||
|
|
f2be447270 | ||
|
|
1901fbf19b | ||
|
|
3a51a3bc42 | ||
|
|
6f24fa2055 | ||
|
|
977c642934 | ||
|
|
c32d8ea29e | ||
|
|
8aa7559462 | ||
|
|
6fd10e8871 | ||
|
|
0a824d9490 | ||
|
|
e4c04b6dbe | ||
|
|
98dc968920 | ||
|
|
f63f487787 | ||
|
|
88fed0232c | ||
|
|
af1a9c5eda | ||
|
|
7440f971ab | ||
|
|
12cf8d9f69 | ||
|
|
e18f8e9413 | ||
|
|
07b7fe83c4 | ||
|
|
a2ba1f09e4 | ||
|
|
79527441ec | ||
|
|
df1e545c0e | ||
|
|
dc142867b8 | ||
|
|
da539bc286 | ||
|
|
fe736c5388 | ||
|
|
607b542222 | ||
|
|
8b9ebf625a | ||
|
|
7b87fac8e7 | ||
|
|
7b1caf1db3 | ||
|
|
9254e494f9 | ||
|
|
68985455f1 | ||
|
|
4cf37c5e70 | ||
|
|
3d331e4c5d | ||
|
|
4ecb61e247 | ||
|
|
442a9f16b4 | ||
|
|
dcc5616126 | ||
|
|
080a3e2396 | ||
|
|
ac8bc77688 | ||
|
|
1cbdcd391c | ||
|
|
0788be35eb | ||
|
|
ab57b92932 | ||
|
|
6a7faf9f22 | ||
|
|
ac14d50c18 | ||
|
|
51ad94677c | ||
|
|
bd716d1b0c | ||
|
|
06f6b76521 | ||
|
|
a0c8b86eab | ||
|
|
ff39a91147 | ||
|
|
372b1688d7 | ||
|
|
1b81d8f542 | ||
|
|
7e355080ce | ||
|
|
2afa4ae00a | ||
|
|
6342eb5523 | ||
|
|
54a0ccbaca | ||
|
|
b28cf0faa8 | ||
|
|
3251d392b5 | ||
|
|
119010f7f2 | ||
|
|
eedb294754 | ||
|
|
4250b67fe8 | ||
|
|
465c889f7f | ||
|
|
834c18a346 | ||
|
|
36941d6d75 | ||
|
|
558165521b | ||
|
|
0890adde67 | ||
|
|
28d92a2f31 | ||
|
|
cb374677a9 | ||
|
|
73256fe438 | ||
|
|
2c1419c687 | ||
|
|
465a285324 | ||
|
|
95ee86b600 | ||
|
|
28f66f0079 | ||
|
|
d655d6b047 | ||
|
|
99d49e3ceb | ||
|
|
20ad848c5d | ||
|
|
1a8875b417 | ||
|
|
c7c4786f3f | ||
|
|
a971bcc3fe | ||
|
|
0fd440cdb4 | ||
|
|
c496f06ca3 | ||
|
|
f7acdb13db | ||
|
|
1cfa183c2b | ||
|
|
3536bef36e | ||
|
|
babecd8363 | ||
|
|
393876e52a | ||
|
|
2c4e384f07 | ||
|
|
ba5a6c851c | ||
|
|
1a3a6ef907 | ||
|
|
7030429958 | ||
|
|
30e968df6d | ||
|
|
f2b40dbe9a | ||
|
|
a11dc6689a | ||
|
|
0a4d8dc777 | ||
|
|
3d1cb011b6 | ||
|
|
a7f8ce5e3d | ||
|
|
7c1daade15 | ||
|
|
f3cb412508 | ||
|
|
edc51b3119 | ||
|
|
cb5d073bc9 | ||
|
|
864c651c03 | ||
|
|
4e25bc2087 | ||
|
|
74df30456b | ||
|
|
df7b81b44d | ||
|
|
d513230d43 | ||
|
|
e8554cd1cb | ||
|
|
59b67f1cfa | ||
|
|
adeec6e369 | ||
|
|
9785b3f57f | ||
|
|
9c62391a5c | ||
|
|
59b97f26c0 | ||
|
|
19e28ce7b6 | ||
|
|
777038fe44 | ||
|
|
e867df5ef5 | ||
|
|
2ac530eb28 | ||
|
|
b8409d6600 | ||
|
|
1ac025bbc9 | ||
|
|
0c625185cb | ||
|
|
68463c9e87 | ||
|
|
d79f1b106c | ||
|
|
322d96bfe5 | ||
|
|
46b3b76d6d | ||
|
|
5cb8ce8174 | ||
|
|
fcef2ff6b2 | ||
|
|
1ffa793322 | ||
|
|
e58921aa8f | ||
|
|
c94020f7dc | ||
|
|
006af394ff | ||
|
|
289af65071 | ||
|
|
e06168f489 | ||
|
|
09d7fa2737 | ||
|
|
094ae82df5 | ||
|
|
092e2c8f2d | ||
|
|
04c408e986 | ||
|
|
cdb6d651e9 | ||
|
|
207a62a3c2 | ||
|
|
27afe7bc38 | ||
|
|
ffe6e6fe59 | ||
|
|
7fd82c0d3a | ||
|
|
f32a79189b | ||
|
|
be8fba9b6a | ||
|
|
98de06ff38 | ||
|
|
20f28eb9d6 | ||
|
|
ec7c3f45ba | ||
|
|
7067e8206c | ||
|
|
e2498af530 | ||
|
|
9f5b5708ff | ||
|
|
9fdd1a10c6 | ||
|
|
a194982117 | ||
|
|
820312a2b1 | ||
|
|
ec23ab6bc2 | ||
|
|
20af29294e | ||
|
|
b4ad3a3b4c | ||
|
|
8537533beb | ||
|
|
5c8bb029b5 | ||
|
|
13a1a7f826 | ||
|
|
83c87f822a | ||
|
|
54aec2b1ba | ||
|
|
b3a70b8284 | ||
|
|
351fc152e0 | ||
|
|
975bb8722f | ||
|
|
f73953a049 | ||
|
|
dff47c73b7 | ||
|
|
65b9dcfcca | ||
|
|
0771d57860 | ||
|
|
31fc29599f | ||
|
|
a0f9cb27f9 | ||
|
|
4faf7ea41e | ||
|
|
c449714c0a | ||
|
|
37fe04999c | ||
|
|
eda940106a | ||
|
|
28df7c2a96 | ||
|
|
2d294cca59 | ||
|
|
95ce1ba6ce | ||
|
|
4225a0bd75 | ||
|
|
0811000bb0 | ||
|
|
37c52ccaf4 | ||
|
|
cbe62f23ba | ||
|
|
53d871d0b1 | ||
|
|
b2ccdaaa2f | ||
|
|
b06e795a1e | ||
|
|
e640ff72f1 | ||
|
|
9a563a6aef | ||
|
|
30ed33fae0 | ||
|
|
645c24dc5f | ||
|
|
2f3ddd4884 | ||
|
|
26cf680468 | ||
|
|
4f0c11ee93 | ||
|
|
562d6bca08 | ||
|
|
21ee9a1fab | ||
|
|
df2a494a7c | ||
|
|
c5e0f527bc | ||
|
|
7afcca0c51 | ||
|
|
67ab49baa9 | ||
|
|
e5eca54951 | ||
|
|
c38a10e143 | ||
|
|
1f9d605988 | ||
|
|
fe0e199859 | ||
|
|
8aee209c53 | ||
|
|
28f8dc41b0 | ||
|
|
5df9fddaf2 | ||
|
|
41e00a0df7 | ||
|
|
488940502c | ||
|
|
5fe7ff24c2 | ||
|
|
ad5bfe3089 | ||
|
|
af263fe881 | ||
|
|
45f39e291e | ||
|
|
986a05e18d | ||
|
|
293e4dc77b | ||
|
|
5c4bd4f7c1 | ||
|
|
c63755c316 | ||
|
|
f299d2ca1a | ||
|
|
e7637885a6 | ||
|
|
463b957e54 | ||
|
|
f392913d00 | ||
|
|
bced9fb978 | ||
|
|
5bdd880142 | ||
|
|
9f348cf8a1 | ||
|
|
cad8553c01 | ||
|
|
1a28f0e5b3 | ||
|
|
48f371a46c | ||
|
|
043b28c725 | ||
|
|
ec2c82e800 | ||
|
|
01bc0c94ab | ||
|
|
9d1104d812 | ||
|
|
8b763175ff | ||
|
|
2ee81a5dbb | ||
|
|
185cdcd813 | ||
|
|
0dea3b71da | ||
|
|
a1076abcbf | ||
|
|
a7e29c38bc | ||
|
|
2460e0f51e | ||
|
|
0e1f0ade31 | ||
|
|
04dff34de4 | ||
|
|
66947ee5a2 | ||
|
|
9d0e1f8e68 | ||
|
|
63bf583b3c | ||
|
|
09fe346d18 | ||
|
|
59f20c1034 | ||
|
|
0ff85d00a4 | ||
|
|
fafece1af8 | ||
|
|
5bca3a5be2 | ||
|
|
fd175ad80b | ||
|
|
fa13bbc48a | ||
|
|
add2c4bf07 | ||
|
|
f33e687723 | ||
|
|
d3a1c36842 | ||
|
|
6db5c3801e | ||
|
|
189f85b24c | ||
|
|
7b264b0c23 | ||
|
|
8fe21ec707 | ||
|
|
fa842d6534 | ||
|
|
93e935bcaa | ||
|
|
0a519c93ef | ||
|
|
4b3d8eb573 | ||
|
|
05fa11b296 | ||
|
|
1794f3d46e | ||
|
|
59e1e84a92 | ||
|
|
b8bc62431a | ||
|
|
1720bddb4f | ||
|
|
2cedb3e883 | ||
|
|
83870aeb8d | ||
|
|
7af8857b68 | ||
|
|
c90752a8be | ||
|
|
0a9e1d64fe | ||
|
|
6588fcbfca | ||
|
|
3dc684634e | ||
|
|
d2f89b55b7 | ||
|
|
3d082ed6db | ||
|
|
c4229a1bba | ||
|
|
1b16118e17 | ||
|
|
7ad9fff7e5 | ||
|
|
293dda7169 | ||
|
|
ed9161a04f | ||
|
|
c612bb165e | ||
|
|
34c705988e | ||
|
|
7d9c4bebc0 | ||
|
|
b11c806d1c | ||
|
|
2a7e392bb3 | ||
|
|
0dca224ec3 | ||
|
|
8ef1fe2047 | ||
|
|
f599e1bd34 | ||
|
|
4deea604bf | ||
|
|
f6d31f5216 | ||
|
|
f6ac045933 | ||
|
|
1df87807fd | ||
|
|
0076422350 | ||
|
|
f1441a598f | ||
|
|
fa236c5a84 | ||
|
|
d3de110070 | ||
|
|
31886aef3d | ||
|
|
3300546eab | ||
|
|
0f8ffc7df9 | ||
|
|
192db0f0b1 | ||
|
|
f443fad56d | ||
|
|
77874d6055 | ||
|
|
7ca49290b6 | ||
|
|
e4e9dfb785 | ||
|
|
731d189fa9 | ||
|
|
4be4645142 | ||
|
|
4e55b67a44 | ||
|
|
8f5e822565 | ||
|
|
cad90c7ac1 | ||
|
|
a48510573e | ||
|
|
8afa7ef837 | ||
|
|
1d7b5cb83c | ||
|
|
18e55d14c6 | ||
|
|
8122191368 | ||
|
|
6bf46c7bf5 | ||
|
|
9fa3f1dc57 | ||
|
|
eabb8762ee | ||
|
|
fd53f86c84 | ||
|
|
680925f872 | ||
|
|
d2ad184377 | ||
|
|
944effca54 | ||
|
|
6530344a8f | ||
|
|
9a0308ab32 | ||
|
|
0bf3ae9559 | ||
|
|
6c98b56935 | ||
|
|
dc0b08efb0 | ||
|
|
057fb2120b | ||
|
|
4cb83f0f4a | ||
|
|
72f8fce107 | ||
|
|
56a9ea3753 | ||
|
|
285841bcce | ||
|
|
65f8fc527f | ||
|
|
86dae56bd0 | ||
|
|
07e9322157 | ||
|
|
e40c7d6efa | ||
|
|
17a244571b | ||
|
|
ad8852759d | ||
|
|
4de9d35458 | ||
|
|
5d30080555 | ||
|
|
38341802c2 | ||
|
|
1896c47fee | ||
|
|
231569f89b | ||
|
|
0c54ff20eb | ||
|
|
76445bfd98 | ||
|
|
09a70d3e90 | ||
|
|
ac5948b3f3 | ||
|
|
ea44c39377 | ||
|
|
e0009ec466 | ||
|
|
7a61bafe59 | ||
|
|
b89d862aa5 | ||
|
|
a50120a212 | ||
|
|
d3418bafc0 | ||
|
|
0d41d933e9 | ||
|
|
33bff00890 | ||
|
|
cca3bc756b | ||
|
|
bde93844de | ||
|
|
d1af6046c7 | ||
|
|
3b18931050 | ||
|
|
ebcb4ab617 | ||
|
|
8286f9608f | ||
|
|
eb275be99d | ||
|
|
7ae744fce6 |
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -1,46 +0,0 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
It would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html).
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Logs**
|
||||
Check if any warnings or errors were logged by VictoriaMetrics components
|
||||
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
|
||||
|
||||
**Version**
|
||||
The line returned when passing `--version` command line flag to the binary. For example:
|
||||
```
|
||||
$ ./victoria-metrics-prod --version
|
||||
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
|
||||
```
|
||||
|
||||
**Used command-line flags**
|
||||
Please provide the command-line flags used for running VictoriaMetrics and its components.
|
||||
86
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
86
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
name: Bug report
|
||||
description: Create a report to help us improve
|
||||
labels: [bug]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
|
||||
- type: textarea
|
||||
id: describe-the-bug
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: |
|
||||
A clear and concise description of what the bug is.
|
||||
placeholder: |
|
||||
When I do `A` VictoriaMetrics does `B`. I expect it to do `C`.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: to-reproduce
|
||||
attributes:
|
||||
label: To Reproduce
|
||||
description: |
|
||||
Steps to reproduce the behavior.
|
||||
If reproducing an issue requires some specific configuration file, please paste it here.
|
||||
placeholder: |
|
||||
Steps to reproduce the behavior.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Version
|
||||
description: |
|
||||
The line returned when passing `--version` command line flag to the binary. For example:
|
||||
```
|
||||
$ ./victoria-metrics-prod --version
|
||||
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Logs
|
||||
description: |
|
||||
Check if any warnings or errors were logged by VictoriaMetrics components
|
||||
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: screenshots
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: |
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: flags
|
||||
attributes:
|
||||
label: Used command-line flags
|
||||
description: |
|
||||
Please provide the command-line flags used for running VictoriaMetrics and its components.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional information
|
||||
placeholder: |
|
||||
Additional information that doesn't fit elsewhere
|
||||
validations:
|
||||
required: false
|
||||
5
.github/ISSUE_TEMPLATE/configuration.yml
vendored
Normal file
5
.github/ISSUE_TEMPLATE/configuration.yml
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Ask on Slack
|
||||
url: https://slack.victoriametrics.com/
|
||||
about: You can ask for help here!
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -1,20 +0,0 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
43
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
43
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
name: Feature request
|
||||
description: Suggest an idea for this project
|
||||
labels: [enhancement]
|
||||
body:
|
||||
- type: textarea
|
||||
id: describe-the-problem
|
||||
attributes:
|
||||
label: Is your feature request related to a problem? Please describe
|
||||
description: |
|
||||
A clear and concise description of what the problem is.
|
||||
placeholder: |
|
||||
Ex. I'm always frustrated when [...]
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: describe-the-solution
|
||||
attributes:
|
||||
label: Describe the solution you'd like
|
||||
description: |
|
||||
A clear and concise description of what you want to happen.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: alternative-solutions
|
||||
attributes:
|
||||
label: Describe alternatives you've considered
|
||||
description: |
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
placeholder: |
|
||||
I have tried to do `A`, but that doesn't solve a problem completely.
|
||||
I have tried to do `A` and `B`, but implementing this would be better.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: feature-additional-info
|
||||
attributes:
|
||||
label: Additional information
|
||||
description: |
|
||||
Additional information which you consider helpful for implementing this feature.
|
||||
placeholder: |
|
||||
Add any other context or screenshots about the feature request here.
|
||||
validations:
|
||||
required: false
|
||||
32
.github/ISSUE_TEMPLATE/question.yml
vendored
Normal file
32
.github/ISSUE_TEMPLATE/question.yml
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
name: Question
|
||||
description: Ask a question regarding VictoriaMetrics or its components
|
||||
labels: [question]
|
||||
body:
|
||||
- type: textarea
|
||||
id: describe-the-component
|
||||
attributes:
|
||||
label: Is your question request related to a specific component?
|
||||
placeholder: |
|
||||
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: describe-the-question
|
||||
attributes:
|
||||
label: Describe the question in detail
|
||||
description: |
|
||||
A clear and concise description of the issue and the question.
|
||||
validations:
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: troubleshooting
|
||||
attributes:
|
||||
label: Troubleshooting docs
|
||||
description: I am familiar with the following troubleshooting docs
|
||||
options:
|
||||
- label: General - https://docs.victoriametrics.com/Troubleshooting.html
|
||||
required: false
|
||||
- label: vmagent - https://docs.victoriametrics.com/vmagent.html#troubleshooting
|
||||
required: false
|
||||
- label: vmalert - https://docs.victoriametrics.com/vmalert.html#troubleshooting
|
||||
required: false
|
||||
2
.github/workflows/check-licenses.yml
vendored
2
.github/workflows/check-licenses.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.4
|
||||
go-version: 1.20.4
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
46
.github/workflows/codeql-analysis-js.yml
vendored
Normal file
46
.github/workflows/codeql-analysis-js.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
name: "CodeQL - JS"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
schedule:
|
||||
- cron: "30 18 * * 2"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ["javascript"]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
with:
|
||||
category: "javascript"
|
||||
84
.github/workflows/codeql-analysis.yml
vendored
84
.github/workflows/codeql-analysis.yml
vendored
@@ -13,12 +13,26 @@ name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master, cluster ]
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ master, cluster ]
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
schedule:
|
||||
- cron: '30 18 * * 2'
|
||||
- cron: "30 18 * * 2"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
@@ -32,45 +46,47 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'go', 'javascript' ]
|
||||
language: ["go"]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.19
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.20.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
|
||||
95
.github/workflows/main.yml
vendored
95
.github/workflows/main.yml
vendored
@@ -1,45 +1,96 @@
|
||||
name: main
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '**.md'
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '**.md'
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
lint:
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.4
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.20.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
make install-golint
|
||||
make install-errcheck
|
||||
make install-golangci-lint
|
||||
- name: Build
|
||||
run: |
|
||||
export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
make test-full
|
||||
make test-pure
|
||||
make test-full-386
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
matrix:
|
||||
scenario: ["test-full", "test-pure", "test-full-386"]
|
||||
name: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.20.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
build:
|
||||
needs: test
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.20.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
||||
48
.github/workflows/nightly-build.yml
vendored
Normal file
48
.github/workflows/nightly-build.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
name: nightly-build
|
||||
on:
|
||||
schedule:
|
||||
# Daily at 2:48am
|
||||
- cron: '48 2 * * *'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.20.4
|
||||
id: go
|
||||
|
||||
- name: Setup docker scan
|
||||
run: |
|
||||
mkdir -p ~/.docker/cli-plugins && \
|
||||
curl https://github.com/docker/scan-cli-plugin/releases/latest/download/docker-scan_linux_amd64 -L -s -S -o ~/.docker/cli-plugins/docker-scan &&\
|
||||
chmod +x ~/.docker/cli-plugins/docker-scan
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: build & publish
|
||||
run: |
|
||||
docker scan --severity=medium --login --token "$SNYK_TOKEN" --accept-license
|
||||
LATEST_TAG=nightly PKG_TAG=nightly make publish
|
||||
env:
|
||||
SNYK_TOKEN: ${{ secrets.SNYK_AUTH_TOKEN }}
|
||||
19
.golangci.yml
Normal file
19
.golangci.yml
Normal file
@@ -0,0 +1,19 @@
|
||||
run:
|
||||
timeout: 2m
|
||||
|
||||
linters:
|
||||
enable:
|
||||
- revive
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
- linters:
|
||||
- staticcheck
|
||||
text: "SA(4003|1019|5011):"
|
||||
include:
|
||||
- EXC0012
|
||||
- EXC0014
|
||||
|
||||
linters-settings:
|
||||
errcheck:
|
||||
exclude: ./errcheck_excludes.txt
|
||||
@@ -3,3 +3,4 @@ allowlist:
|
||||
- MIT
|
||||
- BSD-3-Clause
|
||||
- BSD-2-Clause
|
||||
- ISC
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2022 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2023 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
67
Makefile
67
Makefile
@@ -3,6 +3,7 @@ PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
|
||||
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
|
||||
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
|
||||
LATEST_TAG ?= latest
|
||||
|
||||
PKG_TAG ?= $(shell git tag -l --points-at HEAD)
|
||||
ifeq ($(PKG_TAG),)
|
||||
@@ -140,9 +141,12 @@ vmutils-windows-amd64: \
|
||||
vmagent-windows-amd64 \
|
||||
vmalert-windows-amd64 \
|
||||
vmauth-windows-amd64 \
|
||||
vmbackup-windows-amd64 \
|
||||
vmrestore-windows-amd64 \
|
||||
vmctl-windows-amd64
|
||||
|
||||
victoria-metrics-crossbuild: \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-amd64 \
|
||||
victoria-metrics-linux-arm64 \
|
||||
victoria-metrics-linux-arm \
|
||||
@@ -154,6 +158,7 @@ victoria-metrics-crossbuild: \
|
||||
victoria-metrics-openbsd-amd64
|
||||
|
||||
vmutils-crossbuild: \
|
||||
vmutils-linux-386 \
|
||||
vmutils-linux-amd64 \
|
||||
vmutils-linux-arm64 \
|
||||
vmutils-linux-arm \
|
||||
@@ -166,23 +171,29 @@ vmutils-crossbuild: \
|
||||
vmutils-windows-amd64
|
||||
|
||||
publish-release:
|
||||
git checkout $(TAG) && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release publish
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-386 \
|
||||
release-victoria-metrics-linux-amd64 \
|
||||
release-victoria-metrics-linux-arm \
|
||||
release-victoria-metrics-linux-arm64 \
|
||||
release-victoria-metrics-darwin-amd64 \
|
||||
release-victoria-metrics-darwin-arm64 \
|
||||
release-victoria-metrics-freebsd-amd64 \
|
||||
release-victoria-metrics-openbsd-amd64
|
||||
release-victoria-metrics-openbsd-amd64 \
|
||||
release-victoria-metrics-windows-amd64
|
||||
|
||||
# adds i386 arch
|
||||
release-victoria-metrics-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
release-victoria-metrics-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
@@ -205,6 +216,9 @@ release-victoria-metrics-freebsd-amd64:
|
||||
release-victoria-metrics-openbsd-amd64:
|
||||
GOOS=openbsd GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
release-victoria-metrics-windows-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-victoria-metrics-windows-goarch
|
||||
|
||||
release-victoria-metrics-goos-goarch: victoria-metrics-$(GOOS)-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf victoria-metrics-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
@@ -214,7 +228,18 @@ release-victoria-metrics-goos-goarch: victoria-metrics-$(GOOS)-$(GOARCH)-prod
|
||||
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > victoria-metrics-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf victoria-metrics-$(GOOS)-$(GOARCH)-prod
|
||||
|
||||
release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
zip victoria-metrics-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe \
|
||||
&& sha256sum victoria-metrics-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe \
|
||||
> victoria-metrics-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe
|
||||
|
||||
release-vmutils: \
|
||||
release-vmutils-linux-386 \
|
||||
release-vmutils-linux-amd64 \
|
||||
release-vmutils-linux-arm64 \
|
||||
release-vmutils-linux-arm \
|
||||
@@ -224,6 +249,9 @@ release-vmutils: \
|
||||
release-vmutils-openbsd-amd64 \
|
||||
release-vmutils-windows-amd64
|
||||
|
||||
release-vmutils-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
release-vmutils-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
@@ -283,26 +311,33 @@ release-vmutils-windows-goarch: \
|
||||
vmagent-windows-$(GOARCH)-prod \
|
||||
vmalert-windows-$(GOARCH)-prod \
|
||||
vmauth-windows-$(GOARCH)-prod \
|
||||
vmbackup-windows-$(GOARCH)-prod \
|
||||
vmrestore-windows-$(GOARCH)-prod \
|
||||
vmctl-windows-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe
|
||||
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
|
||||
@@ -314,21 +349,7 @@ vet:
|
||||
go vet ./lib/...
|
||||
go vet ./app/...
|
||||
|
||||
lint: install-golint
|
||||
golint lib/...
|
||||
golint app/...
|
||||
|
||||
install-golint:
|
||||
which golint || go install golang.org/x/lint/golint@latest
|
||||
|
||||
errcheck: install-errcheck
|
||||
errcheck -exclude=errcheck_excludes.txt ./lib/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/...
|
||||
|
||||
install-errcheck:
|
||||
which errcheck || go install github.com/kisielk/errcheck@latest
|
||||
|
||||
check-all: fmt vet lint errcheck golangci-lint govulncheck
|
||||
check-all: fmt vet golangci-lint govulncheck
|
||||
|
||||
test:
|
||||
go test ./lib/... ./app/...
|
||||
@@ -379,10 +400,10 @@ install-qtc:
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m
|
||||
golangci-lint run
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.48.0
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.51.2
|
||||
|
||||
govulncheck: install-govulncheck
|
||||
govulncheck ./...
|
||||
|
||||
428
README.md
428
README.md
@@ -2,6 +2,7 @@
|
||||
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
[](https://hub.docker.com/r/victoriametrics/victoria-metrics)
|
||||
[](https://snapcraft.io/victoriametrics)
|
||||
[](https://slack.victoriametrics.com/)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
@@ -39,20 +40,38 @@ VictoriaMetrics has the following prominent features:
|
||||
* It can be used as long-term storage for Prometheus. See [these docs](#prometheus-setup) for details.
|
||||
* It can be used as a drop-in replacement for Prometheus in Grafana, because it supports [Prometheus querying API](#prometheus-querying-api-usage).
|
||||
* It can be used as a drop-in replacement for Graphite in Grafana, because it supports [Graphite API](#graphite-api-usage).
|
||||
* It features easy setup and operation:
|
||||
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
|
||||
VictoriaMetrics allows reducing infrastructure costs by more than 10x comparing to Graphite - see [this case study](https://docs.victoriametrics.com/CaseStudies.html#grammarly).
|
||||
* It is easy to setup and operate:
|
||||
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d)
|
||||
without external dependencies.
|
||||
* All the configuration is done via explicit command-line flags with reasonable defaults.
|
||||
* All the data is stored in a single directory pointed by `-storageDataPath` command-line flag.
|
||||
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) to S3 or GCS can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools. See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
* It implements PromQL-based query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
|
||||
can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
* It implements PromQL-like query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* It provides global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics. Later this data may be queried via a single query.
|
||||
* It provides high performance and good vertical and horizontal scalability for both [data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4). It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f) when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
|
||||
* It provides high performance and good vertical and horizontal scalability for both
|
||||
[data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
|
||||
and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
|
||||
It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893)
|
||||
and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
|
||||
when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
|
||||
* It is optimized for time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate).
|
||||
* It provides high data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) may be crammed into limited storage comparing to TimescaleDB and [up to 7x less storage space is required compared to Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
|
||||
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
|
||||
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB. See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae), [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683) and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* It provides high data compression, so up to 70x more data points may be stored into limited storage comparing to TimescaleDB
|
||||
according to [these benchmarks](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
|
||||
and up to 7x less storage space is required compared to Prometheus, Thanos or Cortex
|
||||
according to [this benchmark](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
|
||||
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc).
|
||||
See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
|
||||
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
|
||||
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
|
||||
[comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
|
||||
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
|
||||
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to
|
||||
[the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* It supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
|
||||
* [Metrics scraping from Prometheus exporters](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* [Prometheus remote write API](#prometheus-setup).
|
||||
@@ -65,11 +84,15 @@ VictoriaMetrics has the following prominent features:
|
||||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* [Native binary format](#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
|
||||
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
|
||||
* It supports metrics [relabeling](#relabeling).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and
|
||||
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data
|
||||
and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
|
||||
* It has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/) and [Google Filestore](https://cloud.google.com/filestore).
|
||||
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/)
|
||||
and [Google Filestore](https://cloud.google.com/filestore).
|
||||
|
||||
See also [various Articles about VictoriaMetrics](https://docs.victoriametrics.com/Articles.html).
|
||||
|
||||
@@ -84,7 +107,7 @@ Case studies:
|
||||
* [Brandwatch](https://docs.victoriametrics.com/CaseStudies.html#brandwatch)
|
||||
* [CERN](https://docs.victoriametrics.com/CaseStudies.html#cern)
|
||||
* [COLOPL](https://docs.victoriametrics.com/CaseStudies.html#colopl)
|
||||
* [Dreamteam](https://docs.victoriametrics.com/CaseStudies.html#dreamteam)
|
||||
* [Dig Security](https://docs.victoriametrics.com/CaseStudies.html#dig-security)
|
||||
* [Fly.io](https://docs.victoriametrics.com/CaseStudies.html#flyio)
|
||||
* [German Research Center for Artificial Intelligence](https://docs.victoriametrics.com/CaseStudies.html#german-research-center-for-artificial-intelligence)
|
||||
* [Grammarly](https://docs.victoriametrics.com/CaseStudies.html#grammarly)
|
||||
@@ -105,11 +128,22 @@ See also [articles and slides about VictoriaMetrics from our users](https://docs
|
||||
|
||||
## Operation
|
||||
|
||||
### How to start VictoriaMetrics
|
||||
### Install
|
||||
|
||||
Just download [VictoriaMetrics executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or [Docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and start it with the desired command-line flags.
|
||||
To quickly try VictoriaMetrics, just download [VictoriaMetrics executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or [Docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and start it with the desired command-line flags.
|
||||
See also [QuickStart guide](https://docs.victoriametrics.com/Quick-Start.html) for additional information.
|
||||
|
||||
VictoriaMetrics can also be installed via these installation methods:
|
||||
|
||||
* [Helm charts for single-node and cluster versions of VictoriaMetrics](https://github.com/VictoriaMetrics/helm-charts).
|
||||
* [Kubernetes operator for VictoriaMetrics](https://github.com/VictoriaMetrics/operator).
|
||||
* [Ansible role for installing cluster VictoriaMetrics (by VictoriaMetrics)](https://github.com/VictoriaMetrics/ansible-playbooks).
|
||||
* [Ansible role for installing cluster VictoriaMetrics (by community)](https://github.com/Slapper/ansible-victoriametrics-cluster-role).
|
||||
* [Ansible role for installing single-node VictoriaMetrics (by community)](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
|
||||
* [Snap package for VictoriaMetrics](https://snapcraft.io/victoriametrics).
|
||||
|
||||
### How to start VictoriaMetrics
|
||||
|
||||
The following command-line flags are used the most:
|
||||
|
||||
* `-storageDataPath` - VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in the current working directory.
|
||||
@@ -168,7 +202,7 @@ Changing scrape configuration is possible with text editor:
|
||||
vi $SNAP_DATA/var/snap/victoriametrics/current/etc/victoriametrics-scrape-config.yaml
|
||||
```
|
||||
|
||||
After changes were made, trigger config re-read with the command `curl 127.0.0.1:8248/-/reload`.
|
||||
After changes were made, trigger config re-read with the command `curl 127.0.0.1:8428/-/reload`.
|
||||
|
||||
## Prometheus setup
|
||||
|
||||
@@ -248,7 +282,11 @@ http://<victoriametrics-addr>:8428
|
||||
|
||||
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
|
||||
|
||||
Then build graphs and dashboards for the created datasource using [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
|
||||
Then build graphs and dashboards for the created datasource using [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
|
||||
or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
|
||||
|
||||
Alternatively, use VictoriaMetrics [datasource plugin](https://github.com/VictoriaMetrics/grafana-datasource) with support of extra features.
|
||||
See more in [description](https://github.com/VictoriaMetrics/grafana-datasource#victoriametrics-data-source-for-grafana).
|
||||
|
||||
## How to upgrade VictoriaMetrics
|
||||
|
||||
@@ -269,11 +307,21 @@ Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](
|
||||
## vmui
|
||||
|
||||
VictoriaMetrics provides UI for query troubleshooting and exploration. The UI is available at `http://victoriametrics:8428/vmui`.
|
||||
The UI allows exploring query results via graphs and tables.
|
||||
It also provides the following features:
|
||||
- [cardinality explorer](#cardinality-explorer)
|
||||
- [query tracer](#query-tracing)
|
||||
- [top queries explorer](#top-queries)
|
||||
The UI allows exploring query results via graphs and tables. It also provides the following features:
|
||||
|
||||
- Explore:
|
||||
- [Metrics explorer](#metrics-explorer) - automatically builds graphs for selected metrics;
|
||||
- [Cardinality explorer](#cardinality-explorer) - stats about existing metrics in TSDB;
|
||||
- [Top queries](#top-queries) - shows most frequently executed queries;
|
||||
- Tools:
|
||||
- [Trace analyzer](#query-tracing) - playground for loading query traces in JSON format;
|
||||
- [WITH expressions playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/expand-with-exprs) - test how WITH expressions work;
|
||||
- [Metric relabel debugger](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/relabeling) - playground for [relabeling](#relabeling) configs.
|
||||
|
||||
VMUI automatically switches from graph view to heatmap view when the query returns [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) buckets
|
||||
(both [Prometheus histograms](https://prometheus.io/docs/concepts/metric_types/#histogram)
|
||||
and [VictoriaMetrics histograms](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) are supported).
|
||||
Try, for example, [this query](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/?g0.expr=sum%28rate%28vm_promscrape_scrape_duration_seconds_bucket%29%29+by+%28vmrange%29&g0.range_input=24h&g0.end_input=2023-04-10T17%3A46%3A12&g0.relative_time=last_24_hours&g0.step_input=31m).
|
||||
|
||||
Graphs in `vmui` support scrolling and zooming:
|
||||
|
||||
@@ -285,9 +333,12 @@ Query history can be navigated by holding `Ctrl` (or `Cmd` on MacOS) and pressin
|
||||
|
||||
Multi-line queries can be entered by pressing `Shift-Enter` in query input field.
|
||||
|
||||
When querying the [backfilled data](https://docs.victoriametrics.com/#backfilling) or during [query troubleshooting](https://docs.victoriametrics.com/Troubleshooting.html#unexpected-query-results), it may be useful disabling response cache by clicking `Disable cache` checkbox.
|
||||
When querying the [backfilled data](https://docs.victoriametrics.com/#backfilling)
|
||||
or during [query troubleshooting](https://docs.victoriametrics.com/Troubleshooting.html#unexpected-query-results),
|
||||
it may be useful disabling response cache by clicking `Disable cache` checkbox.
|
||||
|
||||
VMUI automatically adjusts the interval between datapoints on the graph depending on the horizontal resolution and on the selected time range. The step value can be customized by changing `Step value` input.
|
||||
VMUI automatically adjusts the interval between datapoints on the graph depending on the horizontal resolution and on the selected time range.
|
||||
The step value can be customized by changing `Step value` input.
|
||||
|
||||
VMUI allows investigating correlations between multiple queries on the same graph. Just click `Add Query` button,
|
||||
enter an additional query in the newly appeared input field and press `Enter`.
|
||||
@@ -306,9 +357,21 @@ See the [example VMUI at VictoriaMetrics playground](https://play.victoriametric
|
||||
* queries with the biggest average execution duration;
|
||||
* queries that took the most summary time for execution.
|
||||
|
||||
## Metrics explorer
|
||||
|
||||
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
|
||||
|
||||
1. Open the `vmui` at `http://victoriametrics:8428/vmui/`.
|
||||
2. Click the `Explore metrics` tab.
|
||||
3. Select the `job` you want to explore.
|
||||
4. Optionally select the `instance` for the selected job to explore.
|
||||
5. Select metrics you want to explore and compare.
|
||||
|
||||
It is possible to change the selected time range for the graphs in the top right corner.
|
||||
|
||||
## Cardinality explorer
|
||||
|
||||
VictoriaMetrics provides an ability to explore time series cardinality at `cardinality` tab in [vmui](#vmui) in the following ways:
|
||||
VictoriaMetrics provides an ability to explore time series cardinality at `Explore cardinality` tab in [vmui](#vmui) in the following ways:
|
||||
|
||||
- To identify metric names with the highest number of series.
|
||||
- To identify labels with the highest number of series.
|
||||
@@ -319,8 +382,8 @@ VictoriaMetrics provides an ability to explore time series cardinality at `cardi
|
||||
may show lower than expected number of unique label values for labels with small number of unique values.
|
||||
This is because of [implementation limits](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/5a6e617b5e41c9170e7c562aecd15ee0c901d489/app/vmselect/netstorage/netstorage.go#L1039-L1045).
|
||||
|
||||
By default cardinality explorer analyzes time series for the current date. It provides the ability to select different day at the top right corner.
|
||||
By default all the time series for the selected date are analyzed. It is possible to narrow down the analysis to series
|
||||
By default, cardinality explorer analyzes time series for the current date. It provides the ability to select different day at the top right corner.
|
||||
By default, all the time series for the selected date are analyzed. It is possible to narrow down the analysis to series
|
||||
matching the specified [series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors).
|
||||
|
||||
Cardinality explorer is built on top of [/api/v1/status/tsdb](#tsdb-stats).
|
||||
@@ -363,7 +426,7 @@ DataDog agent allows configuring destinations for metrics sending via ENV variab
|
||||
or via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) in section `dd_url`.
|
||||
|
||||
<p align="center">
|
||||
<img src="Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
|
||||
</p>
|
||||
|
||||
To configure DataDog agent via ENV variable add the following prefix:
|
||||
@@ -397,7 +460,7 @@ DataDog allows configuring [Dual Shipping](https://docs.datadoghq.com/agent/guid
|
||||
sending via ENV variable `DD_ADDITIONAL_ENDPOINTS` or via configuration file `additional_endpoints`.
|
||||
|
||||
<p align="center">
|
||||
<img src="Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
|
||||
</p>
|
||||
|
||||
Run DataDog using the following ENV variable with VictoriaMetrics as additional metrics receiver:
|
||||
@@ -517,7 +580,8 @@ The `/api/v1/export` endpoint should return the following response:
|
||||
```
|
||||
|
||||
Note that InfluxDB line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
|
||||
while VictoriaMetrics stores them with *milliseconds* precision.
|
||||
while VictoriaMetrics stores them with *milliseconds* precision. It is allowed to ingest timestamps with seconds,
|
||||
microseconds or nanoseconds precision - VictoriaMetrics will automatically convert them to milliseconds.
|
||||
|
||||
Extra labels may be added to all the written time series by passing `extra_label=name=value` query args.
|
||||
For example, `/write?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
|
||||
@@ -580,7 +644,6 @@ The `__graphite__` pseudo-label supports e.g. alternate regexp filters such as `
|
||||
|
||||
VictoriaMetrics also supports Graphite query language - see [these docs](#graphite-render-api-usage).
|
||||
|
||||
|
||||
## How to send data from OpenTSDB-compatible agents
|
||||
|
||||
VictoriaMetrics supports [telnet put protocol](http://opentsdb.net/docs/build/html/api_telnet/put.html)
|
||||
@@ -695,21 +758,45 @@ All the Prometheus querying API handlers can be prepended with `/prometheus` pre
|
||||
|
||||
### Prometheus querying API enhancements
|
||||
|
||||
VictoriaMetrics accepts optional `extra_label=<label_name>=<label_value>` query arg, which can be used for enforcing additional label filters for queries. For example,
|
||||
`/api/v1/query_range?extra_label=user_id=123&extra_label=group_id=456&query=<query>` would automatically add `{user_id="123",group_id="456"}` label filters to the given `<query>`. This functionality can be used for limiting the scope of time series visible to the given tenant. It is expected that the `extra_label` query args are automatically set by auth proxy sitting in front of VictoriaMetrics. See [vmauth](https://docs.victoriametrics.com/vmauth.html) and [vmgateway](https://docs.victoriametrics.com/vmgateway.html) as examples of such proxies.
|
||||
VictoriaMetrics accepts optional `extra_label=<label_name>=<label_value>` query arg, which can be used
|
||||
for enforcing additional label filters for queries. For example, `/api/v1/query_range?extra_label=user_id=123&extra_label=group_id=456&query=<query>`
|
||||
would automatically add `{user_id="123",group_id="456"}` label filters to the given `<query>`.
|
||||
This functionality can be used for limiting the scope of time series visible to the given tenant.
|
||||
It is expected that the `extra_label` query args are automatically set by auth proxy sitting in front of VictoriaMetrics.
|
||||
See [vmauth](https://docs.victoriametrics.com/vmauth.html) and [vmgateway](https://docs.victoriametrics.com/vmgateway.html) as examples of such proxies.
|
||||
|
||||
VictoriaMetrics accepts optional `extra_filters[]=series_selector` query arg, which can be used for enforcing arbitrary label filters for queries. For example,
|
||||
`/api/v1/query_range?extra_filters[]={env=~"prod|staging",user="xyz"}&query=<query>` would automatically add `{env=~"prod|staging",user="xyz"}` label filters to the given `<query>`. This functionality can be used for limiting the scope of time series visible to the given tenant. It is expected that the `extra_filters[]` query args are automatically set by auth proxy sitting in front of VictoriaMetrics. See [vmauth](https://docs.victoriametrics.com/vmauth.html) and [vmgateway](https://docs.victoriametrics.com/vmgateway.html) as examples of such proxies.
|
||||
VictoriaMetrics accepts optional `extra_filters[]=series_selector` query arg, which can be used for enforcing arbitrary label filters for queries.
|
||||
For example, `/api/v1/query_range?extra_filters[]={env=~"prod|staging",user="xyz"}&query=<query>` would automatically
|
||||
add `{env=~"prod|staging",user="xyz"}` label filters to the given `<query>`. This functionality can be used for limiting
|
||||
the scope of time series visible to the given tenant. It is expected that the `extra_filters[]` query args are automatically
|
||||
set by auth proxy sitting in front of VictoriaMetrics.
|
||||
See [vmauth](https://docs.victoriametrics.com/vmauth.html) and [vmgateway](https://docs.victoriametrics.com/vmgateway.html) as examples of such proxies.
|
||||
|
||||
VictoriaMetrics accepts relative times in `time`, `start` and `end` query args additionally to unix timestamps and [RFC3339](https://www.ietf.org/rfc/rfc3339.txt).
|
||||
For example, the following query would return data for the last 30 minutes: `/api/v1/query_range?start=-30m&query=...`.
|
||||
VictoriaMetrics accepts multiple formats for `time`, `start` and `end` query args - see [these docs](#timestamp-formats).
|
||||
|
||||
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
|
||||
VictoriaMetrics accepts `round_digits` query arg for [/api/v1/query](https://docs.victoriametrics.com/keyConcepts.html#instant-query)
|
||||
and [/api/v1/query_range](https://docs.victoriametrics.com/keyConcepts.html#range-query) handlers. It can be used for rounding response values
|
||||
to the given number of digits after the decimal point.
|
||||
For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
|
||||
|
||||
VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label/<labelName>/values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used.
|
||||
VictoriaMetrics accepts `limit` query arg for [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels)
|
||||
and [`/api/v1/label/<labelName>/values`](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) handlers for limiting the number of returned entries.
|
||||
For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels.
|
||||
If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values,
|
||||
then limits specified in the command-line flags are used.
|
||||
|
||||
By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, `/api/v1/labels` and `/api/v1/label/<labelName>/values` while the Prometheus API defaults to all time. Explicitly set `start` and `end` to select the desired time range.
|
||||
VictoriaMetrics accepts `limit` query arg for `/api/v1/series` handlers for limiting the number of returned entries. For example, the query to `/api/v1/series?limit=5` returns a sample of up to 5 series, while ignoring the rest. If the provided `limit` value exceeds the corresponding `-search.maxSeries` command-line flag values, then limits specified in the command-line flags are used.
|
||||
By default, VictoriaMetrics returns time series for the last day starting at 00:00 UTC
|
||||
from [/api/v1/series](https://docs.victoriametrics.com/url-examples.html#apiv1series),
|
||||
[/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and
|
||||
[`/api/v1/label/<labelName>/values`](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues),
|
||||
while the Prometheus API defaults to all time. Explicitly set `start` and `end` to select the desired time range.
|
||||
VictoriaMetrics rounds the specified `start..end` time range to day granularity because of performance optimization concerns.
|
||||
If you need the exact set of label names and label values on the given time range, then send queries
|
||||
to [/api/v1/query](https://docs.victoriametrics.com/keyConcepts.html#instant-query) or to [/api/v1/query_range](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
|
||||
VictoriaMetrics accepts `limit` query arg at [/api/v1/series](https://docs.victoriametrics.com/url-examples.html#apiv1series)
|
||||
for limiting the number of returned entries. For example, the query to `/api/v1/series?limit=5` returns a sample of up to 5 series, while ignoring the rest of series.
|
||||
If the provided `limit` value exceeds the corresponding `-search.maxSeries` command-line flag values, then limits specified in the command-line flags are used.
|
||||
|
||||
Additionally, VictoriaMetrics provides the following handlers:
|
||||
|
||||
@@ -727,6 +814,20 @@ Additionally, VictoriaMetrics provides the following handlers:
|
||||
For example, request to `/api/v1/status/top_queries?topN=5&maxLifetime=30s` would return up to 5 queries per list, which were executed during the last 30 seconds.
|
||||
VictoriaMetrics tracks the last `-search.queryStats.lastQueriesCount` queries with durations at least `-search.queryStats.minQueryDuration`.
|
||||
|
||||
### Timestamp formats
|
||||
|
||||
VictoriaMetrics accepts the following formats for `time`, `start` and `end` query args
|
||||
in [query APIs](https://docs.victoriametrics.com/#prometheus-querying-api-usage) and
|
||||
in [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series).
|
||||
|
||||
- Unix timestamps in seconds with optional milliseconds after the point. For example, `1562529662.678`.
|
||||
- [RFC3339](https://www.ietf.org/rfc/rfc3339.txt). For example, `2022-03-29T01:02:03Z` or `2022-03-29T01:02:03+02:30`.
|
||||
- Partial RFC3339. Examples: `2022`, `2022-03`, `2022-03-29`, `2022-03-29T01`, `2022-03-29T01:02`, `2022-03-29T01:02:03`.
|
||||
The partial RFC3339 time is in UTC timezone by default. It is possible to specify timezone there by adding `+hh:mm` or `-hh:mm` suffix to partial time.
|
||||
For example, `2022-03-01+06:30` is `2022-03-01` at `06:30` timezone.
|
||||
- Relative duration comparing to the current time. For example, `1h5m`, `-1h5m` or `now-1h5m` means `one hour and five minutes ago`, while `now` means `now`.
|
||||
|
||||
|
||||
## Graphite API usage
|
||||
|
||||
VictoriaMetrics supports data ingestion in Graphite protocol - see [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details.
|
||||
@@ -746,10 +847,10 @@ VictoriaMetrics supports `__graphite__` pseudo-label for filtering time series w
|
||||
|
||||
### Graphite Render API usage
|
||||
|
||||
[VictoriaMetrics Enterprise](https://docs.victoriametrics.com/enterprise.html) supports [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) subset
|
||||
VictoriaMetrics supports [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) subset
|
||||
at `/render` endpoint, which is used by [Graphite datasource in Grafana](https://grafana.com/docs/grafana/latest/datasources/graphite/).
|
||||
When configuring Graphite datasource in Grafana, the `Storage-Step` http request header must be set to a step between Graphite data points stored in VictoriaMetrics. For example, `Storage-Step: 10s` would mean 10 seconds distance between Graphite datapoints stored in VictoriaMetrics.
|
||||
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
|
||||
When configuring Graphite datasource in Grafana, the `Storage-Step` http request header must be set to a step between Graphite data points
|
||||
stored in VictoriaMetrics. For example, `Storage-Step: 10s` would mean 10 seconds distance between Graphite datapoints stored in VictoriaMetrics.
|
||||
|
||||
### Graphite Metrics API usage
|
||||
|
||||
@@ -761,7 +862,7 @@ VictoriaMetrics supports the following handlers from [Graphite Metrics API](http
|
||||
|
||||
VictoriaMetrics accepts the following additional query args at `/metrics/find` and `/metrics/expand`:
|
||||
|
||||
* `label` - for selecting arbitrary label values. By default `label=__name__`, i.e. metric names are selected.
|
||||
* `label` - for selecting arbitrary label values. By default, `label=__name__`, i.e. metric names are selected.
|
||||
* `delimiter` - for using different delimiters in metric name hierarchy. For example, `/metrics/find?delimiter=_&query=node_*` would return all the metric name prefixes
|
||||
that start with `node_`. By default `delimiter=.`.
|
||||
|
||||
@@ -887,7 +988,7 @@ Note that background merges may never occur for data from previous months, so st
|
||||
In this case [forced merge](#forced-merge) may help freeing up storage space.
|
||||
|
||||
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
|
||||
before actually deleting the metrics. By default this query will only scan series in the past 5 minutes, so you may need to
|
||||
before actually deleting the metrics. By default, this query will only scan series in the past 5 minutes, so you may need to
|
||||
adjust `start` and `end` to a suitable range to achieve match hits.
|
||||
|
||||
The `/api/v1/admin/tsdb/delete_series` handler may be protected with `authKey` if `-deleteAuthKey` command-line flag is set.
|
||||
@@ -946,12 +1047,13 @@ Each JSON line contains samples for a single time series. An example output:
|
||||
{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
|
||||
```
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
|
||||
Optional `max_rows_per_line` arg may be added to the request for limiting the maximum number of rows exported per each JSON line.
|
||||
@@ -994,12 +1096,13 @@ where:
|
||||
* `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
|
||||
for metrics to export.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
|
||||
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
||||
@@ -1021,12 +1124,13 @@ wget -O- -q 'http://your_victoriametrics_instance:8428/api/v1/series/count' | jq
|
||||
# relaunch victoriametrics with search.maxExportSeries more than value from previous command
|
||||
```
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
|
||||
The exported data can be imported to VictoriaMetrics via [/api/v1/import/native](#how-to-import-data-in-native-format).
|
||||
@@ -1037,7 +1141,9 @@ The [deduplication](#deduplication) isn't applied for the data exported in nativ
|
||||
|
||||
## How to import time series data
|
||||
|
||||
Time series data can be imported into VictoriaMetrics via any supported data ingestion protocol:
|
||||
VictoriaMetrics can discover and scrape metrics from Prometheus-compatible targets (aka "pull" protocol) -
|
||||
see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
Additionally, VictoriaMetrics can accept metrics via the following popular data ingestion protocols (aka "push" protocols):
|
||||
|
||||
* [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). See [these docs](#prometheus-setup) for details.
|
||||
* DataDog `submit metrics` API. See [these docs](#how-to-send-data-from-datadog-agent) for details.
|
||||
@@ -1248,7 +1354,8 @@ Example contents for `-relabelConfig` file:
|
||||
VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling.
|
||||
See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details.
|
||||
|
||||
The relabeling can be debugged at `http://victoriametrics:8428/metric-relabel-debug` page.
|
||||
The relabeling can be debugged at `http://victoriametrics:8428/metric-relabel-debug` page
|
||||
or at our [public playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/relabeling).
|
||||
See [these docs](https://docs.victoriametrics.com/vmagent.html#relabel-debug) for more details.
|
||||
|
||||
|
||||
@@ -1258,11 +1365,12 @@ VictoriaMetrics exports [Prometheus-compatible federation data](https://promethe
|
||||
at `http://<victoriametrics-addr>:8428/federate?match[]=<timeseries_selector_for_federation>`.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval.
|
||||
`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
By default, the last point on the interval `[now - max_lookback ... now]` is scraped for each time series. The default value for `max_lookback` is `5m` (5 minutes), but it can be overridden with `max_lookback` query arg.
|
||||
For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation
|
||||
@@ -1294,7 +1402,7 @@ See also [resource usage limits docs](#resource-usage-limits).
|
||||
|
||||
## Resource usage limits
|
||||
|
||||
By default VictoriaMetrics is tuned for an optimal resource usage under typical workloads. Some workloads may need fine-grained resource usage limits. In these cases the following command-line flags may be useful:
|
||||
By default, VictoriaMetrics is tuned for an optimal resource usage under typical workloads. Some workloads may need fine-grained resource usage limits. In these cases the following command-line flags may be useful:
|
||||
|
||||
- `-memory.allowedPercent` and `-memory.allowedBytes` limit the amounts of memory, which may be used for various internal caches at VictoriaMetrics. Note that VictoriaMetrics may use more memory, since these flags don't limit additional memory, which may be needed on a per-query basis.
|
||||
- `-search.maxMemoryPerQuery` limits the amounts of memory, which can be used for processing a single query. Queries, which need more memory, are rejected. Heavy queries, which select big number of time series, may exceed the per-query memory limit by a small percent. The total memory limit for concurrently executed queries can be estimated as `-search.maxMemoryPerQuery` multiplied by `-search.maxConcurrentRequests`.
|
||||
@@ -1305,6 +1413,7 @@ By default VictoriaMetrics is tuned for an optimal resource usage under typical
|
||||
- `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
|
||||
- `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
- `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
|
||||
- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
|
||||
- `-search.maxSeries` limits the number of time series, which may be returned from [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers). This endpoint is used mostly by Grafana for auto-completion of metric names, label names and label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxSeries` to quite low value in order limit CPU and memory usage.
|
||||
- `-search.maxTagKeys` limits the number of items, which may be returned from [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names). This endpoint is used mostly by Grafana for auto-completion of label names. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagKeys` to quite low value in order to limit CPU and memory usage.
|
||||
- `-search.maxTagValues` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage.
|
||||
@@ -1382,12 +1491,14 @@ can be configured with the `-inmemoryDataFlushInterval` command-line flag (note
|
||||
In-memory parts are persisted to disk into `part` directories under the `<-storageDataPath>/data/small/YYYY_MM/` folder,
|
||||
where `YYYY_MM` is the month partition for the stored data. For example, `2022_11` is the partition for `parts`
|
||||
with [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) from `November 2022`.
|
||||
Each partition directory contains `parts.json` file with the actual list of parts in the partition.
|
||||
|
||||
The `part` directory has the following name pattern: `rowsCount_blocksCount_minTimestamp_maxTimestamp`, where:
|
||||
Every `part` directory contains `metadata.json` file with the following fields:
|
||||
|
||||
- `rowsCount` - the number of [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) stored in the part
|
||||
- `blocksCount` - the number of blocks stored in the part (see details about blocks below)
|
||||
- `minTimestamp` and `maxTimestamp` - minimum and maximum timestamps across raw samples stored in the part
|
||||
- `RowsCount` - the number of [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) stored in the part
|
||||
- `BlocksCount` - the number of blocks stored in the part (see details about blocks below)
|
||||
- `MinTimestamp` and `MaxTimestamp` - minimum and maximum timestamps across raw samples stored in the part
|
||||
- `MinDedupInterval` - the [deduplication interval](#deduplication) applied to the given part.
|
||||
|
||||
Each `part` consists of `blocks` sorted by internal time series id (aka `TSID`).
|
||||
Each `block` contains up to 8K [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples),
|
||||
@@ -1409,11 +1520,10 @@ for fast block lookups, which belong to the given `TSID` and cover the given tim
|
||||
and [freeing up disk space for the deleted time series](#how-to-delete-time-series) are performed during the merge
|
||||
|
||||
Newly added `parts` either successfully appear in the storage or fail to appear.
|
||||
The newly added `parts` are being created in a temporary directory under `<-storageDataPath>/data/{small,big}/YYYY_MM/tmp` folder.
|
||||
When the newly added `part` is fully written and [fsynced](https://man7.org/linux/man-pages/man2/fsync.2.html)
|
||||
to a temporary directory, then it is atomically moved to the storage directory.
|
||||
Thanks to this alogrithm, storage never contains partially created parts, even if hardware power off
|
||||
occurrs in the middle of writing the `part` to disk - such incompletely written `parts`
|
||||
The newly added `part` is atomically registered in the `parts.json` file under the corresponding partition
|
||||
after it is fully written and [fsynced](https://man7.org/linux/man-pages/man2/fsync.2.html) to the storage.
|
||||
Thanks to this algorithm, storage never contains partially created parts, even if hardware power off
|
||||
occurs in the middle of writing the `part` to disk - such incompletely written `parts`
|
||||
are automatically deleted on the next VictoriaMetrics start.
|
||||
|
||||
The same applies to merge process — `parts` are either fully merged into a new `part` or fail to merge,
|
||||
@@ -1426,8 +1536,8 @@ This increases overhead during data querying, since VictoriaMetrics needs to rea
|
||||
bigger number of parts per each request. That's why it is recommended to have at least 20%
|
||||
of free disk space under directory pointed by `-storageDataPath` command-line flag.
|
||||
|
||||
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [the dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176).
|
||||
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [the dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/).
|
||||
See more details in [monitoring docs](#monitoring).
|
||||
|
||||
See [this article](https://valyala.medium.com/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) for more details.
|
||||
@@ -1440,15 +1550,14 @@ Retention is configured with the `-retentionPeriod` command-line flag, which tak
|
||||
|
||||
Data is split in per-month partitions inside `<-storageDataPath>/data/{small,big}` folders.
|
||||
Data partitions outside the configured retention are deleted on the first day of the new month.
|
||||
Each partition consists of one or more data parts with the following name pattern `rowsCount_blocksCount_minTimestamp_maxTimestamp`.
|
||||
Data parts outside of the configured retention are eventually deleted during
|
||||
Each partition consists of one or more data parts. Data parts outside the configured retention are eventually deleted during
|
||||
[background merge](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
|
||||
The maximum disk space usage for a given `-retentionPeriod` is going to be (`-retentionPeriod` + 1) months.
|
||||
For example, if `-retentionPeriod` is set to 1, data for January is deleted on March 1st.
|
||||
|
||||
Please note, the time range covered by data part is not limited by retention period unit. Hence, data part may contain data
|
||||
for multiple days and will be deleted only when fully outside of the configured retention.
|
||||
for multiple days and will be deleted only when fully outside the configured retention.
|
||||
|
||||
It is safe to extend `-retentionPeriod` on existing data. If `-retentionPeriod` is set to a lower
|
||||
value than before, then data outside the configured period will be eventually deleted.
|
||||
@@ -1492,7 +1601,7 @@ For example, the following config sets 3 days retention for time series with `te
|
||||
|
||||
Important notes:
|
||||
|
||||
- The data outside of the configured retention isn't deleted instantly - it is deleted eventually during [background merges](https://docs.victoriametrics.com/#storage).
|
||||
- The data outside the configured retention isn't deleted instantly - it is deleted eventually during [background merges](https://docs.victoriametrics.com/#storage).
|
||||
- The `-retentionFilter` doesn't remove old data from `indexdb` (aka inverted index) until the configured [-retentionPeriod](#retention).
|
||||
So the `indexdb` size can grow big under [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)
|
||||
even for small retentions configured via `-retentionFilter`.
|
||||
@@ -1565,7 +1674,9 @@ VictoriaMetrics provides the following security-related command-line flags:
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
|
||||
|
||||
VictoriaMetrics has achieved security certifications for Database Software Development and Software-Based Monitoring Services. We apply strict security measures in everything we do. See our [Security page](https://victoriametrics.com/security/) for more details.
|
||||
See also [security recommendation for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#security)
|
||||
and [the general security page at VictoriaMetrics website](https://victoriametrics.com/security/).
|
||||
|
||||
|
||||
## Tuning
|
||||
|
||||
@@ -1594,8 +1705,8 @@ Alternatively, single-node VictoriaMetrics can self-scrape the metrics when `-se
|
||||
set to duration greater than 0. For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page
|
||||
with 10 seconds interval.
|
||||
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/dashboards/10229)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176) VictoriaMetrics.
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/) VictoriaMetrics.
|
||||
See an [alternative dashboard for clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11831)
|
||||
created by community.
|
||||
|
||||
@@ -1832,7 +1943,14 @@ are added to all the metrics before sending them to the remote storage:
|
||||
|
||||
## Cache removal
|
||||
|
||||
VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details.
|
||||
VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown
|
||||
(e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup.
|
||||
Sometimes it is needed to remove such caches on the next startup. This can be done in the following ways:
|
||||
|
||||
- By manually removing the `<-storageDataPath>/cache` directory when VictoriaMetrics is stopped.
|
||||
- By placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics.
|
||||
In this case VictoriaMetrics will automatically remove all the caches on the next start.
|
||||
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details.
|
||||
|
||||
## Cache tuning
|
||||
|
||||
@@ -1844,8 +1962,8 @@ The following metrics for each type of cache are exported at [`/metrics` page](#
|
||||
* `vm_cache_misses_total` - the number of cache misses
|
||||
* `vm_cache_entries` - the number of entries in the cache
|
||||
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
contain `Caches` section with cache metrics visualized. The panels show the current
|
||||
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
|
||||
then cache efficiency is already very high and does not need any tuning.
|
||||
@@ -1937,9 +2055,9 @@ Enterprise binaries can be downloaded and evaluated for free from [the releases
|
||||
A single-node VictoriaMetrics is capable of proxying requests to [vmalert](https://docs.victoriametrics.com/vmalert.html)
|
||||
when `-vmalert.proxyURL` flag is set. Use this feature for the following cases:
|
||||
* for proxying requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
|
||||
* for accessing vmalert's UI through single-node VictoriaMetrics Web interface.
|
||||
* for accessing vmalerts UI through single-node VictoriaMetrics Web interface.
|
||||
|
||||
For accessing vmalert's UI through single-node VictoriaMetrics configure `-vmalert.proxyURL` flag and visit
|
||||
For accessing vmalerts UI through single-node VictoriaMetrics configure `-vmalert.proxyURL` flag and visit
|
||||
`http://<victoriametrics-addr>:8428/vmalert/` link.
|
||||
|
||||
## Benchmarks
|
||||
@@ -1984,17 +2102,10 @@ It is safe sharing the collected profiles from security point of view, since the
|
||||
|
||||
## Integrations
|
||||
|
||||
* [Helm charts for single-node and cluster versions of VictoriaMetrics](https://github.com/VictoriaMetrics/helm-charts).
|
||||
* [Kubernetes operator for VictoriaMetrics](https://github.com/VictoriaMetrics/operator).
|
||||
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
|
||||
See [these docs](https://github.com/netdata/netdata#integrations).
|
||||
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend.
|
||||
See [this example](https://github.com/go-graphite/carbonapi/blob/main/cmd/carbonapi/carbonapi.example.victoriametrics.yaml).
|
||||
* [Ansible role for installing cluster VictoriaMetrics (by VictoriaMetrics)](https://github.com/VictoriaMetrics/ansible-playbooks).
|
||||
* [Ansible role for installing cluster VictoriaMetrics (by community)](https://github.com/Slapper/ansible-victoriametrics-cluster-role).
|
||||
* [Ansible role for installing single-node VictoriaMetrics (by community)](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
|
||||
|
||||
* [Snap package for VictoriaMetrics](https://snapcraft.io/victoriametrics).
|
||||
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
|
||||
See [these docs](https://github.com/netdata/netdata#integrations).
|
||||
* [vmalert-cli](https://github.com/aorfanos/vmalert-cli) - a CLI application for managing [vmalert](https://docs.victoriametrics.com/vmalert.html).
|
||||
|
||||
## Third-party contributions
|
||||
@@ -2077,7 +2188,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
|
||||
```
|
||||
-bigMergeConcurrency int
|
||||
The maximum number of CPU cores to use for big merges. Default value is used if set to 0
|
||||
Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
|
||||
-cacheExpireDuration duration
|
||||
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
||||
-configAuthKey string
|
||||
@@ -2086,7 +2197,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-dedup.minScrapeInterval duration
|
||||
@@ -2094,16 +2205,16 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-deleteAuthKey string
|
||||
authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series and /tags/delSeries
|
||||
-denyQueriesOutsideRetention
|
||||
Whether to deny queries outside of the configured -retentionPeriod. When set, then /api/v1/query_range would return '503 Service Unavailable' error for queries with 'from' value outside -retentionPeriod. This may be useful when multiple data sources with distinct retentions are hidden behind query-tee
|
||||
Whether to deny queries outside the configured -retentionPeriod. When set, then /api/v1/query_range would return '503 Service Unavailable' error for queries with 'from' value outside -retentionPeriod. This may be useful when multiple data sources with distinct retentions are hidden behind query-tee
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-downsampling.period array
|
||||
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-dryRun
|
||||
Whether to check only -promscrape.config and then exit. Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag
|
||||
Whether to check config files without running VictoriaMetrics. The following config files are checked: -promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
@@ -2119,15 +2230,17 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-forceMergeAuthKey string
|
||||
authKey, which must be passed in query string to /internal/force_merge pages
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-graphiteListenAddr string
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol
|
||||
-graphiteListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -graphiteListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-graphiteTrimTimestamp duration
|
||||
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default compression is enabled to save network bandwidth
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
@@ -2141,32 +2254,42 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections (default ":8428")
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8428")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 104857600)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
|
||||
-influx.databaseNames array
|
||||
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-influx.maxLineSize size
|
||||
The maximum size in bytes for a single InfluxDB line during parsing
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 262144)
|
||||
-influxDBLabel string
|
||||
Default label for the DB name sent over '?db={db_name}' query parameter (default "db")
|
||||
-influxListenAddr string
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write . See also -influxListenAddr.useProxyProtocol
|
||||
-influxListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -influxListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-influxMeasurementFieldSeparator string
|
||||
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
|
||||
-influxSkipMeasurement
|
||||
Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'
|
||||
-influxSkipSingleField
|
||||
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field
|
||||
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metric name if InfluxDB line contains only a single field
|
||||
-influxTrimTimestamp duration
|
||||
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-inmemoryDataFlushInterval duration
|
||||
The interval for guaranteed saving of in-memory data to disk. The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). Smaller intervals increase disk IO load. Minimum supported value is 1s (default 5s)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-logNewSeries
|
||||
Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics
|
||||
-loggerDisableTimestamps
|
||||
@@ -2175,6 +2298,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -2184,30 +2309,34 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
-maxLabelValueLen int
|
||||
The maximum length of label values in the accepted time series. Longer label values are truncated. In this case the vm_too_long_label_values_total metric at /metrics page is incremented (default 16384)
|
||||
-maxLabelsPerTimeseries int
|
||||
The maximum number of labels accepted per time series. Superfluous labels are dropped. In this case the vm_metrics_with_dropped_labels_total metric at /metrics page is incremented (default 30)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
|
||||
-opentsdbHTTPListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbListenAddr string
|
||||
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for OpenTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol
|
||||
-opentsdbListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-opentsdbhttp.maxInsertRequestSize size
|
||||
The maximum size of OpenTSDB HTTP put request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
-opentsdbhttpTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-pprofAuthKey string
|
||||
@@ -2219,9 +2348,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-promscrape.azureSDCheckInterval duration
|
||||
Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#azure_sd_configs for details (default 1m0s)
|
||||
-promscrape.cluster.memberNum string
|
||||
The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0")
|
||||
The number of number in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0")
|
||||
-promscrape.cluster.membersCount int
|
||||
The number of members in a cluster of scrapers. Each member must have an unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets
|
||||
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets
|
||||
-promscrape.cluster.name string
|
||||
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679
|
||||
-promscrape.cluster.replicationFactor int
|
||||
@@ -2233,7 +2362,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-promscrape.config.strictParse
|
||||
Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields (default true)
|
||||
-promscrape.configCheckInterval duration
|
||||
Interval for checking for changes in '-promscrape.config' file. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
Interval for checking for changes in '-promscrape.config' file. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
@@ -2241,9 +2370,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-promscrape.digitaloceanSDCheckInterval duration
|
||||
Interval for checking for changes in digital ocean. This works only if digitalocean_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#digitalocean_sd_configs for details (default 1m0s)
|
||||
-promscrape.disableCompression
|
||||
Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control
|
||||
-promscrape.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets
|
||||
Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets
|
||||
-promscrape.discovery.concurrency int
|
||||
The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.) (default 100)
|
||||
-promscrape.discovery.concurrentWaitTime duration
|
||||
@@ -2270,25 +2399,31 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
How frequently to reload the full state from Kubernetes API server (default 30m0s)
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs for details (default 30s)
|
||||
-promscrape.kumaSDCheckInterval duration
|
||||
Interval for checking for changes in kuma service discovery. This works only if kuma_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kuma_sd_configs for details (default 30s)
|
||||
-promscrape.maxDroppedTargets int
|
||||
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
|
||||
-promscrape.maxResponseHeadersSize size
|
||||
The maximum size of http response headers from Prometheus scrape targets
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 4096)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 4096)
|
||||
-promscrape.maxScrapeSize size
|
||||
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16777216)
|
||||
-promscrape.minResponseSizeForStreamParse size
|
||||
The minimum target response size for automatic switching to stream parsing mode, which can reduce memory usage. See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 1000000)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 1000000)
|
||||
-promscrape.noStaleMarkers
|
||||
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
|
||||
-promscrape.nomad.waitTime duration
|
||||
Wait time used by Nomad service discovery. Default value is used if not set
|
||||
-promscrape.nomadSDCheckInterval duration
|
||||
Interval for checking for changes in Nomad. This works only if nomad_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#nomad_sd_configs for details (default 30s)
|
||||
-promscrape.openstackSDCheckInterval duration
|
||||
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#openstack_sd_configs for details (default 30s)
|
||||
-promscrape.seriesLimitPerTarget int
|
||||
Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info
|
||||
-promscrape.streamParse
|
||||
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is possible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control
|
||||
-promscrape.suppressDuplicateScrapeTargetErrors
|
||||
Whether to suppress 'duplicate scrape target' errors; see https://docs.victoriametrics.com/vmagent.html#troubleshooting for details
|
||||
-promscrape.suppressScrapeErrors
|
||||
@@ -2303,7 +2438,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-relabelConfig string
|
||||
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. The path can point either to local file or to http url. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
|
||||
@@ -2322,13 +2457,16 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.disableCache
|
||||
Whether to disable response caching. This may be useful during data backfilling
|
||||
-search.graphiteMaxPointsPerSeries int
|
||||
The maximum number of points per series Graphite render API can return. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 1000000)
|
||||
The maximum number of points per series Graphite render API can return (default 1000000)
|
||||
-search.graphiteStorageStep duration
|
||||
The interval between datapoints stored in the database. It is used at Graphite Render API handler for normalizing the interval between datapoints in case it isn't normalized. It can be overridden by sending 'storage_step' query arg to /render API or by sending the desired interval via 'Storage-Step' http header during querying /render API. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 10s)
|
||||
The interval between datapoints stored in the database. It is used at Graphite Render API handler for normalizing the interval between datapoints in case it isn't normalized. It can be overridden by sending 'storage_step' query arg to /render API or by sending the desired interval via 'Storage-Step' http header during querying /render API (default 10s)
|
||||
-search.latencyOffset duration
|
||||
The time when data points become visible in query results after the collection. Too small value can result in incomplete last points for query results (default 30s)
|
||||
The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
|
||||
-search.logQueryMemoryUsage size
|
||||
Log queries, which require more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.logSlowQueryDuration duration
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging (default 5s)
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
|
||||
-search.maxConcurrentRequests int
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration and -search.maxMemoryPerQuery (default 8)
|
||||
-search.maxExportDuration duration
|
||||
@@ -2338,12 +2476,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.maxFederateSeries int
|
||||
The maximum number of time series, which can be returned from /federate. This option allows limiting memory usage (default 1000000)
|
||||
-search.maxGraphiteSeries int
|
||||
The maximum number of time series, which can be scanned during queries to Graphite Render API. See https://docs.victoriametrics.com/#graphite-render-api-usage . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 300000)
|
||||
The maximum number of time series, which can be scanned during queries to Graphite Render API. See https://docs.victoriametrics.com/#graphite-render-api-usage (default 300000)
|
||||
-search.maxLookback duration
|
||||
Synonym to -search.lookback-delta from Prometheus. The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. See also '-search.maxStalenessInterval' flag, which has the same meaining due to historical reasons
|
||||
Synonym to -search.lookback-delta from Prometheus. The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. See also '-search.maxStalenessInterval' flag, which has the same meaning due to historical reasons
|
||||
-search.maxMemoryPerQuery size
|
||||
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests . See also -search.logQueryMemoryUsage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.maxPointsPerTimeseries int
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph (default 30000)
|
||||
-search.maxPointsSubqueryPerTimeseries int
|
||||
@@ -2352,7 +2490,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The maximum duration for query execution (default 30s)
|
||||
-search.maxQueryLen size
|
||||
The maximum search query length in bytes
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
-search.maxQueueDuration duration
|
||||
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
|
||||
-search.maxSamplesPerQuery int
|
||||
@@ -2361,8 +2499,10 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The maximum number of raw samples a single query can scan per each time series. This option allows limiting memory usage (default 30000000)
|
||||
-search.maxSeries int
|
||||
The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage (default 30000)
|
||||
-search.maxSeriesPerAggrFunc int
|
||||
The maximum number of time series an aggregate MetricsQL function can generate (default 1000000)
|
||||
-search.maxStalenessInterval duration
|
||||
The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.setLookbackToStep' flag
|
||||
The maximum interval for staleness calculations. By default, it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.setLookbackToStep' flag
|
||||
-search.maxStatusRequestDuration duration
|
||||
The maximum duration for /api/v1/status/* requests (default 5m0s)
|
||||
-search.maxStepForPointsAdjustment duration
|
||||
@@ -2398,9 +2538,11 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-selfScrapeJob string
|
||||
Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics")
|
||||
-smallMergeConcurrency int
|
||||
The maximum number of CPU cores to use for small merges. Default value is used if set to 0
|
||||
The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
|
||||
-snapshotAuthKey string
|
||||
authKey, which must be passed in query string to /snapshot* pages
|
||||
-snapshotCreateTimeout duration
|
||||
The timeout for creating new snapshot. If set, make sure that timeout is lower than backup period
|
||||
-snapshotsMaxAge value
|
||||
Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
|
||||
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
|
||||
@@ -2408,25 +2550,31 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-storage.cacheSizeIndexDBDataBlocks size
|
||||
Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.cacheSizeIndexDBIndexBlocks size
|
||||
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.cacheSizeIndexDBTagFilters size
|
||||
Overrides max size for indexdb/tagFiltersToMetricIDs cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.cacheSizeStorageTSID size
|
||||
Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.maxDailySeries int
|
||||
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/#cardinality-limiter . See also -storage.maxHourlySeries
|
||||
-storage.maxHourlySeries int
|
||||
The maximum number of unique series can be added to the storage during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/#cardinality-limiter . See also -storage.maxDailySeries
|
||||
-storage.minFreeDiskSpaceBytes size
|
||||
The minimum free disk space at -storageDataPath after which the storage stops accepting new data
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 10000000)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000)
|
||||
-storageDataPath string
|
||||
Path to storage data (default "victoria-metrics-data")
|
||||
-streamAggr.config string
|
||||
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html . See also -remoteWrite.streamAggr.keepInput and -streamAggr.dedupInterval
|
||||
-streamAggr.dedupInterval duration
|
||||
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero
|
||||
-streamAggr.keepInput
|
||||
Whether to keep input samples after the aggregation with -streamAggr.config. By default, the input is dropped after the aggregation, so only the aggregate data is stored. See https://docs.victoriametrics.com/stream-aggregation.html
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
@@ -2444,4 +2592,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Show VictoriaMetrics version
|
||||
-vmalert.proxyURL string
|
||||
Optional URL for proxying requests to vmalert. For example, if -vmalert.proxyURL=http://vmalert:8880 , then alerting API requests such as /api/v1/rules from Grafana will be proxied to http://vmalert:8880/api/v1/rules
|
||||
-vmui.customDashboardsPath string
|
||||
Optional path to vmui dashboards. See https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmui/packages/vmui/public/dashboards
|
||||
```
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
|
||||
| Version | Supported |
|
||||
|---------|--------------------|
|
||||
| 1.81.x | :white_check_mark: |
|
||||
| 1.80.x | :x: |
|
||||
| 1.79.x | :white_check_mark: |
|
||||
| < 1.78 | :x: |
|
||||
| [latest release](https://docs.victoriametrics.com/CHANGELOG.html) | :white_check_mark: |
|
||||
| v1.87.x LTS release | :white_check_mark: |
|
||||
| v1.79.x LTS release | :white_check_mark: |
|
||||
| other releases | :x: |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
|
||||
@@ -39,6 +39,9 @@ victoria-metrics-freebsd-amd64-prod:
|
||||
victoria-metrics-openbsd-amd64-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-openbsd-amd64
|
||||
|
||||
victoria-metrics-windows-amd64-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
package-victoria-metrics:
|
||||
APP_NAME=victoria-metrics $(MAKE) package-via-docker
|
||||
|
||||
@@ -82,6 +85,9 @@ victoria-metrics-linux-arm64:
|
||||
victoria-metrics-linux-ppc64le:
|
||||
APP_NAME=victoria-metrics CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-metrics-linux-s390x:
|
||||
APP_NAME=victoria-metrics CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-metrics-linux-386:
|
||||
APP_NAME=victoria-metrics CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
@@ -97,6 +103,9 @@ victoria-metrics-freebsd-amd64:
|
||||
victoria-metrics-openbsd-amd64:
|
||||
APP_NAME=victoria-metrics CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-metrics-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=victoria-metrics $(MAKE) app-local-windows-goarch
|
||||
|
||||
victoria-metrics-pure:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-local-pure
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
|
||||
vminsertcommon "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
vminsertrelabel "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
@@ -24,11 +26,15 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
|
||||
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only -promscrape.config and then exit. "+
|
||||
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running VictoriaMetrics. The following config files are checked: "+
|
||||
"-promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. "+
|
||||
"This can be changed with -promscrape.config.strictParse=false command-line flag")
|
||||
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
|
||||
"The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. "+
|
||||
"Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
|
||||
@@ -51,7 +57,13 @@ func main() {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("-promscrape.config is ok; exitting with 0 status code")
|
||||
if err := vminsertrelabel.CheckRelabelConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -relabelConfig: %s", err)
|
||||
}
|
||||
if err := vminsertcommon.CheckStreamAggrConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -streamAggr.config: %s", err)
|
||||
}
|
||||
logger.Infof("-promscrape.config is ok; exiting with 0 status code")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -64,7 +76,7 @@ func main() {
|
||||
vminsert.Init()
|
||||
startSelfScraper()
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started VictoriaMetrics in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
@@ -90,7 +102,7 @@ func main() {
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != "GET" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
@@ -102,6 +114,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"targets", "status for discovered active targets"},
|
||||
{"service-discovery", "labels before and after relabeling for discovered targets"},
|
||||
{"metric-relabel-debug", "debug metric relabeling"},
|
||||
{"expand-with-exprs", "WITH expressions' tutorial"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"metrics", "available service metrics"},
|
||||
|
||||
@@ -129,10 +129,10 @@ func setUp() {
|
||||
storagePath = filepath.Join(os.TempDir(), testStorageSuffix)
|
||||
processFlags()
|
||||
logger.Init()
|
||||
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsert.Init()
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
go httpserver.Serve(*httpListenAddr, false, requestHandler)
|
||||
readyStorageCheckFunc := func() bool {
|
||||
resp, err := http.Get(testHealthHTTPPath)
|
||||
if err != nil {
|
||||
@@ -189,10 +189,8 @@ func tearDown() {
|
||||
|
||||
func TestWriteRead(t *testing.T) {
|
||||
t.Run("write", testWrite)
|
||||
vmstorage.Storage.DebugFlush()
|
||||
time.Sleep(1 * time.Second)
|
||||
vmstorage.Stop()
|
||||
// open storage after stop in write
|
||||
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
|
||||
t.Run("read", testRead)
|
||||
}
|
||||
|
||||
@@ -261,7 +259,7 @@ func testRead(t *testing.T) {
|
||||
for _, q := range test.Query {
|
||||
q = testutil.PopulateTimeTplString(q, insertionTime)
|
||||
if test.Issue != "" {
|
||||
test.Issue = "Regression in " + test.Issue
|
||||
test.Issue = "\nRegression in " + test.Issue
|
||||
}
|
||||
switch true {
|
||||
case strings.HasPrefix(q, "/api/v1/export"):
|
||||
@@ -284,7 +282,7 @@ func testRead(t *testing.T) {
|
||||
queryResult := Query{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryResult(queryResult, test.ResultQuery); err != nil {
|
||||
t.Fatalf("Query. %s fails with error %s.%s", q, err, test.Issue)
|
||||
t.Fatalf("Query. %s fails with error: %s.%s", q, err, test.Issue)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported read query %s", q)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -12,7 +12,7 @@ func TestPopulateTimeTplString(t *testing.T) {
|
||||
}
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
result := PopulateTimeTplString(s, now)
|
||||
result := PopulateTimeTplString(s, now.UTC())
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result; got %q; want %q", result, resultExpected)
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"forms_daily_count;item=x 2 {TIME_S-2m}",
|
||||
"forms_daily_count;item=y 3 {TIME_S-1m}",
|
||||
"forms_daily_count;item=y 4 {TIME_S-2m}"],
|
||||
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}"],
|
||||
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}&latency_offset=1ms"],
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"vector","result":[{"metric":{"item":"x"},"value":["{TIME_S-1m}","2"]},{"metric":{"item":"y"},"value":["{TIME_S-1m}","4"]}]}
|
||||
|
||||
@@ -85,6 +85,9 @@ vmagent-linux-arm64:
|
||||
vmagent-linux-ppc64le:
|
||||
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmagent-linux-s390x:
|
||||
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmagent-linux-386:
|
||||
APP_NAME=vmagent CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ See [Quick Start](#quick-start) for details.
|
||||
|
||||
While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
|
||||
and RAM friendly to scrape metrics from Prometheus-compatible exporters into VictoriaMetrics.
|
||||
Also, we found that our user's infrastructure are like snowflakes in that no two are alike. Therefore we decided to add more flexibility
|
||||
Also, we found that our user's infrastructure are like snowflakes in that no two are alike. Therefore, we decided to add more flexibility
|
||||
to `vmagent` such as the ability to [accept metrics via popular push protocols](#how-to-push-data-to-vmagent)
|
||||
additionally to [discovering Prometheus-compatible targets and scraping metrics from them](#how-to-collect-metrics-in-prometheus-format).
|
||||
|
||||
@@ -24,8 +24,10 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
|
||||
see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
|
||||
* Can accept data via all the ingestion protocols supported by VictoriaMetrics - see [these docs](#how-to-push-data-to-vmagent).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems -
|
||||
see [these docs](#replication-and-high-availability).
|
||||
* Can aggregate incoming samples by time and by labels before sending them to remote storage - see [these docs](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
* Can replicate collected metrics simultaneously to multiple Prometheus-compatible remote storage systems - see [these docs](#replication-and-high-availability).
|
||||
* Can save egress network bandwidth usage costs when [VictoriaMetrics remote write protocol](#victoriametrics-remote-write-protocol)
|
||||
is used for sending the data to VictoriaMetrics.
|
||||
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as the connection
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
@@ -45,16 +47,26 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
|
||||
|
||||
Please download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) (
|
||||
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags)),
|
||||
unpack it and pass the following flags to the `vmagent` binary in order to start scraping Prometheus-compatible targets:
|
||||
unpack it and pass the following flags to the `vmagent` binary in order to start scraping Prometheus-compatible targets
|
||||
and sending the data to the Prometheus-compatible remote storage:
|
||||
|
||||
* `-promscrape.config` with the path to Prometheus config file (usually located at `/etc/prometheus/prometheus.yml`).
|
||||
* `-promscrape.config` with the path to [Prometheus config file](https://docs.victoriametrics.com/sd_configs.html) (usually located at `/etc/prometheus/prometheus.yml`).
|
||||
The path can point either to local file or to http url. `vmagent` doesn't support some sections of Prometheus config file,
|
||||
so you may need either to delete these sections or to run `vmagent` with `-promscrape.config.strictParse=false` command-line flag.
|
||||
In this case `vmagent` ignores unsupported sections. See [the list of unsupported sections](#unsupported-prometheus-config-sections).
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics, the `-remoteWrite.url` argument can be specified
|
||||
multiple times to replicate data concurrently to an arbitrary number of remote storage systems. See [various use cases](#use-cases).
|
||||
* `-remoteWrite.url` with Prometheus-compatible remote storage endpoint such as VictoriaMetrics.
|
||||
|
||||
Example command line:
|
||||
Example command for writing the data received via [supported push-based protocols](#how-to-push-data-to-vmagent)
|
||||
to [single-node VictoriaMetrics](https://docs.victoriametrics.com/) located at `victoria-metrics-host:8428`:
|
||||
|
||||
```console
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) if you need writing
|
||||
the data to [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
|
||||
Example command for scraping Prometheus targets and writing the data to single-node VictoriaMetrics:
|
||||
|
||||
```console
|
||||
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
@@ -65,16 +77,12 @@ See [how to scrape Prometheus-compatible targets](#how-to-collect-metrics-in-pro
|
||||
If you use single-node VictoriaMetrics, then you can discover and scrape Prometheus-compatible targets directly from VictoriaMetrics
|
||||
without the need to use `vmagent` - see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
|
||||
If you don't need to scrape Prometheus-compatible targets, then the `-promscrape.config` option isn't needed.
|
||||
For example, the following command is sufficient for accepting data via [supported push-based protocols](#how-to-push-data-to-vmagent)
|
||||
and sending it to the provided `-remoteWrite.url`:
|
||||
|
||||
```console
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
`vmagent` can save network bandwidth usage costs under high load when [VictoriaMetrics remote write protocol is used](#victoriametrics-remote-write-protocol).
|
||||
|
||||
See [troubleshooting docs](#troubleshooting) if you encounter common issues with `vmagent`.
|
||||
|
||||
See [various use cases](#use-cases) for vmagent.
|
||||
|
||||
Pass `-help` to `vmagent` in order to see [the full list of supported command-line flags with their descriptions](#advanced-usage).
|
||||
|
||||
## How to push data to vmagent
|
||||
@@ -96,7 +104,7 @@ additionally to pull-based Prometheus-compatible targets' scraping:
|
||||
|
||||
`vmagent` should be restarted in order to update config options set via command-line args.
|
||||
`vmagent` supports multiple approaches for reloading configs from updated config files such as
|
||||
`-promscrape.config`, `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig`:
|
||||
`-promscrape.config`, `-remoteWrite.relabelConfig`, `-remoteWrite.urlRelabelConfig` and `-remoteWrite.streamAggr.config`:
|
||||
|
||||
* Sending `SIGHUP` signal to `vmagent` process:
|
||||
|
||||
@@ -118,7 +126,8 @@ data to the remote storage. It re-tries sending the data to remote storage until
|
||||
The maximum on-disk size for the buffered metrics can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
`vmagent` works on various architectures from the IoT world - 32-bit arm, 64-bit arm, ppc64, 386, amd64.
|
||||
See [the corresponding Makefile rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/Makefile) for details.
|
||||
|
||||
The `vmagent` can save network bandwidth usage costs by using [VictoriaMetrics remote write protocol](#victoriametrics-remote-write-protocol).
|
||||
|
||||
### Drop-in replacement for Prometheus
|
||||
|
||||
@@ -126,6 +135,12 @@ If you use Prometheus only for scraping metrics from various targets and forward
|
||||
then `vmagent` can replace Prometheus. Typically, `vmagent` requires lower amounts of RAM, CPU and network bandwidth compared with Prometheus.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
### Statsd alternative
|
||||
|
||||
`vmagent` can be used as an alternative to [statsd](https://github.com/statsd/statsd)
|
||||
when [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) is enabled.
|
||||
See [these docs](https://docs.victoriametrics.com/stream-aggregation.html#statsd-alternative) for details.
|
||||
|
||||
### Flexible metrics relay
|
||||
|
||||
`vmagent` can accept metrics in [various popular data ingestion protocols](#how-to-push-data-to-vmagent), apply [relabeling](#relabeling)
|
||||
@@ -136,9 +151,14 @@ to other remote storage systems, which support Prometheus `remote_write` protoco
|
||||
|
||||
`vmagent` replicates the collected metrics among multiple remote storage instances configured via `-remoteWrite.url` args.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instance.
|
||||
`vmagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again
|
||||
`vmagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again,
|
||||
and then it sends the buffered data to the remote storage in order to prevent data gaps.
|
||||
|
||||
[VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) already supports replication,
|
||||
so there is no need in specifying multiple `-remoteWrite.url` flags when writing data to the same cluster.
|
||||
See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#replication-and-data-safety).
|
||||
|
||||
|
||||
### Relabeling and filtering
|
||||
|
||||
`vmagent` can add, remove or update labels on the collected data before sending it to the remote storage. Additionally,
|
||||
@@ -150,7 +170,7 @@ Please see [these docs](#relabeling) for details.
|
||||
`vmagent` supports splitting the collected data between multiple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
which is applied independently for each configured `-remoteWrite.url` destination. For example, it is possible to replicate or split
|
||||
data among long-term remote storage, short-term remote storage and a real-time analytical system [built on top of Kafka](https://github.com/Telefonica/prometheus-kafka-adapter).
|
||||
Note that each destination can receive it's own subset of the collected data due to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
Note that each destination can receive its own subset of the collected data due to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
|
||||
### Prometheus remote_write proxy
|
||||
|
||||
@@ -162,11 +182,37 @@ Also, Basic Auth can be enabled for the incoming `remote_write` requests with `-
|
||||
### remote_write for clustered version
|
||||
|
||||
While `vmagent` can accept data in several supported protocols (OpenTSDB, Influx, Prometheus, Graphite) and scrape data from various targets,
|
||||
writes are always performed in Promethes remote_write protocol. Therefore for the [clustered version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html),
|
||||
writes are always performed in Prometheus remote_write protocol. Therefore, for the [clustered version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html),
|
||||
the `-remoteWrite.url` command-line flag should be configured as `<schema>://<vminsert-host>:8480/insert/<accountID>/prometheus/api/v1/write`
|
||||
according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format).
|
||||
There is also support for multitenant writes. See [these docs](#multitenancy).
|
||||
|
||||
## VictoriaMetrics remote write protocol
|
||||
|
||||
`vmagent` supports sending data to the configured `-remoteWrite.url` either via Prometheus remote write protocol
|
||||
or via VictoriaMetrics remote write protocol.
|
||||
|
||||
VictoriaMetrics remote write protocol provides the following benefits comparing to Prometheus remote write protocol:
|
||||
|
||||
- Reduced network bandwidth usage by 2x-5x. This allows saving network bandwidth usage costs when `vmagent` and
|
||||
the configured remote storage systems are located in different datacenters, availability zones or regions.
|
||||
|
||||
- Reduced disk read/write IO and disk space usage at `vmagent` when the remote storage is temporarily unavailable.
|
||||
In this case `vmagent` buffers the incoming data to disk using the VictoriaMetrics remote write format.
|
||||
This reduces disk read/write IO and disk space usage by 2x-5x comparing to Prometheus remote write format.
|
||||
|
||||
`vmagent` automatically switches to VictoriaMetrics remote write protocol when it sends data to VictoriaMetrics components such as other `vmagent` instances,
|
||||
[single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html)
|
||||
or `vminsert` at [cluster version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
It is possible to force switch to VictoriaMetrics remote write protocol by specifying `-remoteWrite.forceVMProto`
|
||||
command-line flag for the corresponding `-remoteWrite.url`.
|
||||
It is possible to tune the compression level for VictoriaMetrics remote write protocol with `-remoteWrite.vmProtoCompressLevel` command-line flag.
|
||||
Bigger values reduce network usage at the cost of higher CPU usage. Negative values reduce CPU usage at the cost of higher network usage.
|
||||
|
||||
`vmagent` automatically switches to Prometheus remote write protocol when it sends data to old versions of VictoriaMetrics components
|
||||
or to other Prometheus-compatible remote storage systems. It is possible to force switch to Prometheus remote write protocol
|
||||
by specifying `-remoteWrite.forcePromProto` command-line flag for the corresponding `-remoteWrite.url`.
|
||||
|
||||
## Multitenancy
|
||||
|
||||
By default `vmagent` collects the data without tenant identifiers and routes it to the configured `-remoteWrite.url`.
|
||||
@@ -190,7 +236,7 @@ This allows using a single `vmagent` instance in front of multiple VictoriaMetri
|
||||
If `-remoteWrite.multitenantURL` command-line flag is set and `vmagent` is configured to scrape Prometheus-compatible targets
|
||||
(e.g. if `-promscrape.config` command-line flag is set) then `vmagent` reads tenantID from `__tenant_id__` label
|
||||
for the discovered targets and routes all the metrics from this target to the given `__tenant_id__`,
|
||||
e.g. to the url `<-remoteWrite.multitnenatURL>/insert/<__tenant_id__>/prometheus/api/v1/write`.
|
||||
e.g. to the url `<-remoteWrite.multitenantURL>/insert/<__tenant_id__>/prometheus/api/v1/write`.
|
||||
|
||||
For example, the following relabeling rule instructs sending metrics to tenantID defined in the `prometheus.io/tenant` annotation of Kubernetes pod deployment:
|
||||
|
||||
@@ -236,14 +282,14 @@ scrape_configs:
|
||||
- "My-Auth: TopSecret"
|
||||
```
|
||||
|
||||
* `disable_compression: true` for disabling response compression on a per-job basis. By default `vmagent` requests compressed responses
|
||||
* `disable_compression: true` for disabling response compression on a per-job basis. By default, `vmagent` requests compressed responses
|
||||
from scrape targets for saving network bandwidth.
|
||||
* `disable_keepalive: true` for disabling [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection)
|
||||
on a per-job basis. By default `vmagent` uses keep-alive connections to scrape targets for reducing overhead on connection re-establishing.
|
||||
on a per-job basis. By default, `vmagent` uses keep-alive connections to scrape targets for reducing overhead on connection re-establishing.
|
||||
* `series_limit: N` for limiting the number of unique time series a single scrape target can expose. See [these docs](#cardinality-limiter).
|
||||
* `stream_parse: true` for scraping targets in a streaming manner. This may be useful when targets export big number of metrics. See [these docs](#stream-parsing-mode).
|
||||
* `scrape_align_interval: duration` for aligning scrapes to the given interval instead of using random offset
|
||||
in the range `[0 ... scrape_interval]` for scraping each target. The random offset helps spreading scrapes evenly in time.
|
||||
in the range `[0 ... scrape_interval]` for scraping each target. The random offset helps to spread scrapes evenly in time.
|
||||
* `scrape_offset: duration` for specifying the exact offset for scraping instead of using random offset in the range `[0 ... scrape_interval]`.
|
||||
|
||||
See [scrape_configs docs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) for more details on all the supported options.
|
||||
@@ -292,7 +338,7 @@ There is no need in specifying top-level `scrape_configs` section in these files
|
||||
|
||||
The list of supported service discovery types is available [here](#how-to-collect-metrics-in-prometheus-format).
|
||||
|
||||
Additionally `vmagent` doesn't support `refresh_interval` option at service discovery sections.
|
||||
Additionally, `vmagent` doesn't support `refresh_interval` option at service discovery sections.
|
||||
This option is substituted with `-promscrape.*CheckInterval` command-line options, which are specific per each service discovery type.
|
||||
See [the full list of command-line flags for vmagent](#advanced-usage).
|
||||
|
||||
@@ -316,7 +362,7 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
|
||||
## Automatically generated metrics
|
||||
|
||||
`vmagent` automatically generates the following metrics per each scrape of every [Prometheus-compatible target](#how-to-collect-metrics-in-prometheus-format)
|
||||
and attaches target-specific `instance` and `job` labels to these metrics:
|
||||
and attaches `instance`, `job` and other target-specific labels to these metrics:
|
||||
|
||||
* `up` - this metric exposes `1` value on successful scrape and `0` value on unsuccessful scrape. This allows monitoring
|
||||
failing scrapes with the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html):
|
||||
@@ -414,7 +460,7 @@ VictoriaMetrics components support [Prometheus-compatible relabeling](https://pr
|
||||
with [additional enhancements](#relabeling-enhancements). The relabeling can be defined in the following places processed by `vmagent`:
|
||||
|
||||
* At the `scrape_config -> relabel_configs` section in `-promscrape.config` file.
|
||||
This relabeling is used for modifying labels in discovered targets and for dropping unneded targets.
|
||||
This relabeling is used for modifying labels in discovered targets and for dropping unneeded targets.
|
||||
See [relabeling cookbook](https://docs.victoriametrics.com/relabeling.html) for details.
|
||||
|
||||
This relabeling can be debugged by clicking the `debug` link at the corresponding target on the `http://vmagent:8429/targets` page
|
||||
@@ -501,7 +547,7 @@ The following articles contain useful information about Prometheus relabeling:
|
||||
* VictoriaMetrics provides the following additional relabeling actions on top of standard actions
|
||||
from the [Prometheus relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config):
|
||||
|
||||
* `replace_all` replaces all of the occurrences of `regex` in the values of `source_labels` with the `replacement`
|
||||
* `replace_all` replaces all the occurrences of `regex` in the values of `source_labels` with the `replacement`
|
||||
and stores the results in the `target_label`. For example, the following relabeling config replaces all the occurrences
|
||||
of `-` char in metric names with `_` char (e.g. `foo-bar-baz` metric name is transformed into `foo_bar_baz`):
|
||||
|
||||
@@ -513,7 +559,7 @@ The following articles contain useful information about Prometheus relabeling:
|
||||
replacement: "_"
|
||||
```
|
||||
|
||||
* `labelmap_all` replaces all of the occurrences of `regex` in all the label names with the `replacement`.
|
||||
* `labelmap_all` replaces all the occurrences of `regex` in all the label names with the `replacement`.
|
||||
For example, the following relabeling config replaces all the occurrences of `-` char in all the label names
|
||||
with `_` char (e.g. `foo-bar-baz` label name is transformed into `foo_bar_baz`):
|
||||
|
||||
@@ -568,7 +614,7 @@ Note that the `name` field must be substituted with explicit `__name__` option u
|
||||
If `__name__` option is missing under `labels` section, then the original Graphite-style metric name is left unchanged.
|
||||
|
||||
For example, the following relabeling rule generates `requests_total{job="app42",instance="host124:8080"}` metric
|
||||
from "app42.host123.requests.total" Graphite-style metric:
|
||||
from `app42.host123.requests.total` Graphite-style metric:
|
||||
|
||||
```yaml
|
||||
- action: graphite
|
||||
@@ -596,23 +642,22 @@ Additionally, the `action: graphite` relabeling rules usually work much faster t
|
||||
`vmagent` and [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html)
|
||||
provide the following tools for debugging target-level and metric-level relabeling:
|
||||
|
||||
- Target-level relabeling (e.g. `relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs))
|
||||
- Target-level debugging (e.g. `relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs))
|
||||
can be performed by navigating to `http://vmagent:8429/targets` page (`http://victoriametrics:8428/targets` page for single-node VictoriaMetrics)
|
||||
and clicking the `debug` link at the target, which must be debugged.
|
||||
The opened page will show step-by-step results for the actual relabeling rules applied to the target labels.
|
||||
and clicking the `debug target relabeling` link at the target, which must be debugged.
|
||||
The opened page shows step-by-step results for the actual target relabeling rules applied to the discovered target labels.
|
||||
The page shows also the target URL generated after applying all the relabeling rules.
|
||||
|
||||
The `http://vmagent:8429/targets` page shows only active targets. If you need to understand why some target
|
||||
is dropped during the relabeling, then navigate to `http://vmagent:8428/service-discovery` page
|
||||
(`http://victoriametrics:8428/service-discovery` for single-node VictoriaMetrics), find the dropped target
|
||||
and click the `debug` link there. The opened page will show step-by-step results for the actual relabeling rules,
|
||||
and click the `debug` link there. The opened page shows step-by-step results for the actual relabeling rules,
|
||||
which result to target drop.
|
||||
|
||||
- Metric-level relabeling (e.g. `metric_relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs)
|
||||
and all the relabeling, which can be set up via `-relabelConfig`, `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig`
|
||||
command-line flags) can be performed by navigating to `http://vmagent:8429/metric-relabel-debug` page
|
||||
(`http://victoriametrics:8428/metric-relabel-debug` page for single-node VictoriaMetrics)
|
||||
and submitting there relabeling rules together with the metric to be relabeled.
|
||||
The page will show step-by-step results for the entered relabeling rules executed against the entered metric.
|
||||
- Metric-level debugging (e.g. `metric_relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs)
|
||||
can be performed by navigating to `http://vmagent:8429/targets` page (`http://victoriametrics:8428/targets` page for single-node VictoriaMetrics)
|
||||
and clicking the `debug metrics relabeling` link at the target, which must be debugged.
|
||||
The opened page shows step-by-step results for the actual metric relabeling rules applied to the given target labels.
|
||||
|
||||
## Prometheus staleness markers
|
||||
|
||||
@@ -634,7 +679,7 @@ e.g. it sets `scrape_series_added` metric to zero. See [these docs](#automatical
|
||||
|
||||
## Stream parsing mode
|
||||
|
||||
By default `vmagent` reads the full response body from scrape target into memory, then parses it, applies [relabeling](#relabeling)
|
||||
By default, `vmagent` reads the full response body from scrape target into memory, then parses it, applies [relabeling](#relabeling)
|
||||
and then pushes the resulting metrics to the configured `-remoteWrite.url`. This mode works good for the majority of cases
|
||||
when the scrape target exposes small number of metrics (e.g. less than 10 thousand). But this mode may take big amounts of memory
|
||||
when the scrape target exposes big number of metrics. In this case it is recommended enabling stream parsing mode.
|
||||
@@ -662,8 +707,8 @@ scrape_configs:
|
||||
stream_parse: true
|
||||
static_configs:
|
||||
- targets:
|
||||
- big-prometeus1
|
||||
- big-prometeus2
|
||||
- big-prometheus1
|
||||
- big-prometheus2
|
||||
honor_labels: true
|
||||
metrics_path: /federate
|
||||
params:
|
||||
@@ -671,7 +716,7 @@ scrape_configs:
|
||||
```
|
||||
|
||||
Note that `vmagent` in stream parsing mode stores up to `sample_limit` samples to the configured `-remoteStorage.url`
|
||||
instead of droping all the samples read from the target, because the parsed data is sent to the remote storage
|
||||
instead of dropping all the samples read from the target, because the parsed data is sent to the remote storage
|
||||
as soon as it is parsed in stream parsing mode.
|
||||
|
||||
## Scraping big number of targets
|
||||
@@ -691,7 +736,7 @@ spread scrape targets among a cluster of two `vmagent` instances:
|
||||
The `-promscrape.cluster.memberNum` can be set to a StatefulSet pod name when `vmagent` runs in Kubernetes.
|
||||
The pod name must end with a number in the range `0 ... promscrape.cluster.memberNum-1`. For example, `-promscrape.cluster.memberNum=vmagent-0`.
|
||||
|
||||
By default each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
|
||||
By default, each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
|
||||
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
|
||||
start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances:
|
||||
|
||||
@@ -757,7 +802,8 @@ scrape_configs:
|
||||
|
||||
## Cardinality limiter
|
||||
|
||||
By default `vmagent` doesn't limit the number of time series each scrape target can expose. The limit can be enforced in the following places:
|
||||
By default, `vmagent` doesn't limit the number of time series each scrape target can expose.
|
||||
The limit can be enforced in the following places:
|
||||
|
||||
* Via `-promscrape.seriesLimitPerTarget` command-line option. This limit is applied individually
|
||||
to all the scrape targets defined in the file pointed by `-promscrape.config`.
|
||||
@@ -768,10 +814,7 @@ By default `vmagent` doesn't limit the number of time series each scrape target
|
||||
via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) for targets,
|
||||
which may expose too high number of time series.
|
||||
|
||||
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
|
||||
|
||||
Scraped metrics are dropped for time series exceeding the given limit.
|
||||
|
||||
Scraped metrics are dropped for time series exceeding the given limit on the time window of 24h.
|
||||
`vmagent` creates the following additional per-target metrics for targets with non-zero series limit:
|
||||
|
||||
- `scrape_series_limit_samples_dropped` - the number of dropped samples during the scrape when the unique series limit is exceeded.
|
||||
@@ -785,8 +828,9 @@ These metrics allow building the following alerting rules:
|
||||
- `scrape_series_current / scrape_series_limit > 0.9` - alerts when the number of series exposed by the target reaches 90% of the limit.
|
||||
- `sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0` - alerts when some samples are dropped because the series limit on a particular target is reached.
|
||||
|
||||
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
|
||||
|
||||
By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`.
|
||||
By default, `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`.
|
||||
The limit can be enforced by setting the following command-line flags:
|
||||
|
||||
* `-remoteWrite.maxHourlySeries` - limits the number of unique time series `vmagent` can write to remote storage systems during the last hour.
|
||||
@@ -816,7 +860,7 @@ See also [cardinality explorer docs](https://docs.victoriametrics.com/#cardinali
|
||||
We recommend setting up regular scraping of this page either through `vmagent` itself or by Prometheus
|
||||
so that the exported metrics may be analyzed later.
|
||||
|
||||
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview.
|
||||
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683-victoriametrics-vmagent/) for `vmagent` state overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add a review to the dashboard.
|
||||
|
||||
@@ -829,7 +873,7 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
* `http://vmagent-host:8429/api/v1/targets`. This handler returns JSON response
|
||||
compatible with [the corresponding page from Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets).
|
||||
* `http://vmagent-host:8429/ready`. This handler returns http 200 status code when `vmagent` finishes
|
||||
it's initialization for all the [service_discovery configs](https://docs.victoriametrics.com/sd_configs.html).
|
||||
its initialization for all the [service_discovery configs](https://docs.victoriametrics.com/sd_configs.html).
|
||||
It may be useful to perform `vmagent` rolling update without any scrape loss.
|
||||
|
||||
## Troubleshooting
|
||||
@@ -857,9 +901,9 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
|
||||
* The `/service-discovery` page could be useful for debugging relabeling process for scrape targets.
|
||||
This page contains original labels for targets dropped during relabeling.
|
||||
By default the `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling,
|
||||
By default, the `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling,
|
||||
then increase `-promscrape.maxDroppedTargets` command-line flag value to see all the dropped targets.
|
||||
Note that tracking each dropped target requires up to 10Kb of RAM. Therefore big values for `-promscrape.maxDroppedTargets`
|
||||
Note that tracking each dropped target requires up to 10Kb of RAM. Therefore, big values for `-promscrape.maxDroppedTargets`
|
||||
may result in increased memory usage if a big number of scrape targets are dropped during relabeling.
|
||||
|
||||
* We recommend you increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported
|
||||
@@ -869,13 +913,13 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
|
||||
* If you see gaps in the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set,
|
||||
try increasing `-remoteWrite.queues`. Such gaps may appear because `vmagent` cannot keep up with sending the collected data to remote storage.
|
||||
Therefore it starts dropping the buffered data if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
|
||||
Therefore, it starts dropping the buffered data if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
* `vmagent` drops data blocks if remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses.
|
||||
The number of dropped blocks can be monitored via `vmagent_remotewrite_packets_dropped_total` metric exported at [/metrics page](#monitoring).
|
||||
|
||||
* Use `-remoteWrite.queues=1` when `-remoteWrite.url` points to remote storage, which doesn't accept out-of-order samples (aka data backfilling).
|
||||
Such storage systems include Prometheus, Cortex and Thanos, which typically emit `out of order sample` errors.
|
||||
Such storage systems include Prometheus, Mimir, Cortex and Thanos, which typically emit `out of order sample` errors.
|
||||
The best solution is to use remote storage with [backfilling support](https://docs.victoriametrics.com/#backfilling) such as VictoriaMetrics.
|
||||
|
||||
* `vmagent` buffers scraped data at the `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
@@ -935,7 +979,7 @@ See also [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting
|
||||
* [Writing metrics to Kafka](#writing-metrics-to-kafka)
|
||||
|
||||
The enterprise version of vmagent is available for evaluation at [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) page
|
||||
in `vmutils-...-enteprise.tar.gz` archives and in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
|
||||
in `vmutils-...-enterprise.tar.gz` archives and in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
|
||||
|
||||
### Reading metrics from Kafka
|
||||
|
||||
@@ -983,7 +1027,7 @@ data_format = "influx"
|
||||
|
||||
These command-line flags are available only in [enterprise](https://docs.victoriametrics.com/enterprise.html) version of `vmagent`,
|
||||
which can be downloaded for evaluation from [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) page
|
||||
(see `vmutils-...-enteprise.tar.gz` archives) and from [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
|
||||
(see `vmutils-...-enterprise.tar.gz` archives) and from [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
|
||||
|
||||
```
|
||||
-kafka.consumer.topic array
|
||||
@@ -1119,7 +1163,7 @@ It is safe sharing the collected profiles from security point of view, since the
|
||||
|
||||
## Advanced usage
|
||||
|
||||
`vmagent` can be fine-tuned with various command-line flags. Run `./vmagent -help` in order to see the full list of these flags with their desciptions and default values:
|
||||
`vmagent` can be fine-tuned with various command-line flags. Run `./vmagent -help` in order to see the full list of these flags with their descriptions and default values:
|
||||
|
||||
```
|
||||
./vmagent -help
|
||||
@@ -1136,15 +1180,15 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-dryRun
|
||||
Whether to check only config files without running vmagent. The following files are checked: -promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag
|
||||
Whether to check config files without running vmagent. The following files are checked: -promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
@@ -1154,15 +1198,17 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-graphiteListenAddr string
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol
|
||||
-graphiteListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -graphiteListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-graphiteTrimTimestamp duration
|
||||
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default compression is enabled to save network bandwidth
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
@@ -1176,30 +1222,40 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr='' (default ":8429")
|
||||
TCP address to listen for http connections. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol (default ":8429")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 104857600)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
|
||||
-influx.databaseNames array
|
||||
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-influx.maxLineSize size
|
||||
The maximum size in bytes for a single InfluxDB line during parsing
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 262144)
|
||||
-influxDBLabel string
|
||||
Default label for the DB name sent over '?db={db_name}' query parameter (default "db")
|
||||
-influxListenAddr string
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . See also -influxListenAddr.useProxyProtocol
|
||||
-influxListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -influxListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-influxMeasurementFieldSeparator string
|
||||
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
|
||||
-influxSkipMeasurement
|
||||
Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'
|
||||
-influxSkipSingleField
|
||||
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field
|
||||
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metric name if InfluxDB line contains only a single field
|
||||
-influxTrimTimestamp duration
|
||||
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-kafka.consumer.topic array
|
||||
Kafka topic names for data consumption. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -1232,6 +1288,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -1241,26 +1299,30 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
|
||||
-opentsdbHTTPListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbListenAddr string
|
||||
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for OpenTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol
|
||||
-opentsdbListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-opentsdbhttp.maxInsertRequestSize size
|
||||
The maximum size of OpenTSDB HTTP put request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
-opentsdbhttpTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-pprofAuthKey string
|
||||
@@ -1270,9 +1332,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-promscrape.azureSDCheckInterval duration
|
||||
Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#azure_sd_configs for details (default 1m0s)
|
||||
-promscrape.cluster.memberNum string
|
||||
The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0")
|
||||
The number of number in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0")
|
||||
-promscrape.cluster.membersCount int
|
||||
The number of members in a cluster of scrapers. Each member must have an unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets
|
||||
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets
|
||||
-promscrape.cluster.name string
|
||||
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679
|
||||
-promscrape.cluster.replicationFactor int
|
||||
@@ -1284,17 +1346,19 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-promscrape.config.strictParse
|
||||
Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields (default true)
|
||||
-promscrape.configCheckInterval duration
|
||||
Interval for checking for changes in '-promscrape.config' file. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
Interval for checking for changes in '-promscrape.config' file. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
Interval for checking for changes in Consul. This works only if consul_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#consul_sd_configs for details (default 30s)
|
||||
-promscrape.consulagentSDCheckInterval duration
|
||||
Interval for checking for changes in Consul Agent. This works only if consulagent_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#consulagent_sd_configs for details (default 30s)
|
||||
-promscrape.digitaloceanSDCheckInterval duration
|
||||
Interval for checking for changes in digital ocean. This works only if digitalocean_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#digitalocean_sd_configs for details (default 1m0s)
|
||||
-promscrape.disableCompression
|
||||
Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control
|
||||
-promscrape.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets
|
||||
Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets
|
||||
-promscrape.discovery.concurrency int
|
||||
The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.) (default 100)
|
||||
-promscrape.discovery.concurrentWaitTime duration
|
||||
@@ -1321,25 +1385,31 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
How frequently to reload the full state from Kubernetes API server (default 30m0s)
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs for details (default 30s)
|
||||
-promscrape.kumaSDCheckInterval duration
|
||||
Interval for checking for changes in kuma service discovery. This works only if kuma_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kuma_sd_configs for details (default 30s)
|
||||
-promscrape.maxDroppedTargets int
|
||||
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
|
||||
-promscrape.maxResponseHeadersSize size
|
||||
The maximum size of http response headers from Prometheus scrape targets
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 4096)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 4096)
|
||||
-promscrape.maxScrapeSize size
|
||||
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16777216)
|
||||
-promscrape.minResponseSizeForStreamParse size
|
||||
The minimum target response size for automatic switching to stream parsing mode, which can reduce memory usage. See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 1000000)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 1000000)
|
||||
-promscrape.noStaleMarkers
|
||||
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
|
||||
-promscrape.nomad.waitTime duration
|
||||
Wait time used by Nomad service discovery. Default value is used if not set
|
||||
-promscrape.nomadSDCheckInterval duration
|
||||
Interval for checking for changes in Nomad. This works only if nomad_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#nomad_sd_configs for details (default 30s)
|
||||
-promscrape.openstackSDCheckInterval duration
|
||||
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#openstack_sd_configs for details (default 30s)
|
||||
-promscrape.seriesLimitPerTarget int
|
||||
Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info
|
||||
-promscrape.streamParse
|
||||
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
|
||||
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is possible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control
|
||||
-promscrape.suppressDuplicateScrapeTargetErrors
|
||||
Whether to suppress 'duplicate scrape target' errors; see https://docs.victoriametrics.com/vmagent.html#troubleshooting for details
|
||||
-promscrape.suppressScrapeErrors
|
||||
@@ -1354,7 +1424,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.aws.accessKey array
|
||||
Optional AWS AccessKey to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set
|
||||
@@ -1397,6 +1467,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.flushInterval duration
|
||||
Interval for flushing the data to remote storage. This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url (default 1s)
|
||||
-remoteWrite.forcePromProto array
|
||||
Whether to force Prometheus remote write protocol for sending data to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.forceVMProto array
|
||||
Whether to force VictoriaMetrics remote write protocol for sending data to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.headers array
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -1405,12 +1481,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.maxBlockSize size
|
||||
The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxRowsPerBlock
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 8388608)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 8388608)
|
||||
-remoteWrite.maxDailySeries int
|
||||
The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
|
||||
-remoteWrite.maxDiskUsagePerURL array
|
||||
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500MB. Disk usage is unlimited if the value is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB.
|
||||
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. Disk usage is unlimited if the value is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB.
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.maxHourlySeries int
|
||||
The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
|
||||
@@ -1440,12 +1516,14 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-remoteWrite.queues int
|
||||
The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues isn't enough for sending high volume of collected data to remote storage. Default value is 2 * numberOfAvailableCPUs (default 8)
|
||||
-remoteWrite.rateLimit array
|
||||
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data is sent after temporary unavailability of the remote storage
|
||||
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data is sent after temporary unavailability of the remote storage
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.relabelConfig string
|
||||
Optional path to file with relabel_config entries. The path can point either to local file or to http url. These entries are applied to all the metrics before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details
|
||||
Optional path to file with relabeling configs, which are applied to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent.html#relabeling
|
||||
-remoteWrite.keepDanglingQueues
|
||||
Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.
|
||||
-remoteWrite.roundDigits array
|
||||
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics
|
||||
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.sendTimeout array
|
||||
Timeout for sending a single block of data to the corresponding -remoteWrite.url
|
||||
@@ -1455,8 +1533,17 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-remoteWrite.significantFigures array
|
||||
The number of significant figures to leave in metric values before writing them to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.streamAggr.config array
|
||||
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html . See also -remoteWrite.streamAggr.keepInput and -remoteWrite.streamAggr.dedupInterval
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.streamAggr.dedupInterval array
|
||||
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.streamAggr.keepInput array
|
||||
Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config. By default, the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. See https://docs.victoriametrics.com/stream-aggregation.html
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. By default system CA is used
|
||||
Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. By default, system CA is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to the corresponding -remoteWrite.url
|
||||
@@ -1468,16 +1555,18 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsServerName array
|
||||
Optional TLS server name to use for connections to the corresponding -remoteWrite.url. By default the server name from -remoteWrite.url is used
|
||||
Optional TLS server name to use for connections to the corresponding -remoteWrite.url. By default, the server name from -remoteWrite.url is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tmpDataPath string
|
||||
Path to directory where temporary data for remote write component is stored. See also -remoteWrite.maxDiskUsagePerURL (default "vmagent-remotewrite-data")
|
||||
-remoteWrite.url array
|
||||
Remote storage URL to write data to. It must support Prometheus remote_write API. It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . Pass multiple -remoteWrite.url flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.multitenantURL
|
||||
Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. See also -remoteWrite.multitenantURL
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.urlRelabelConfig array
|
||||
Optional path to relabel config for the corresponding -remoteWrite.url. The path can point either to local file or to http url
|
||||
Optional path to relabel configs for the corresponding -remoteWrite.url. See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent.html#relabeling
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.vmProtoCompressLevel int
|
||||
The compression level for VictoriaMetrics remote write protocol. Higher values reduce network traffic at the cost of higher CPU usage. Negative values reduce CPU usage at the cost of increased network traffic. See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-tls
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -26,10 +26,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
return stream.Parse(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -28,11 +28,9 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -20,9 +20,7 @@ var (
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
})
|
||||
return stream.Parse(r, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
|
||||
@@ -15,14 +15,14 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol")
|
||||
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field")
|
||||
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metric name if InfluxDB line contains only a single field")
|
||||
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
|
||||
dbLabel = flag.String("influxDBLabel", "db", "Default label for the DB name sent over '?db={db_name}' query parameter")
|
||||
)
|
||||
@@ -37,10 +37,8 @@ var (
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
})
|
||||
return stream.Parse(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -52,15 +50,13 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, extraLabels)
|
||||
})
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return stream.Parse(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -38,24 +38,37 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write")
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . "+
|
||||
"See also -influxListenAddr.useProxyProtocol")
|
||||
influxUseProxyProtocol = flag.Bool("influxListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -influxListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. "+
|
||||
"See also -graphiteListenAddr.useProxyProtocol")
|
||||
graphiteUseProxyProtocol = flag.Bool("graphiteListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -graphiteListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpenTSDB metrics. "+
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
"Usually :4242 must be set. Doesn't work if empty")
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
|
||||
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . "+
|
||||
"Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol")
|
||||
opentsdbUseProxyProtocol = flag.Bool("opentsdbListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. "+
|
||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
||||
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag")
|
||||
)
|
||||
|
||||
@@ -86,17 +99,20 @@ func main() {
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("-promscrape.config is ok; exitting with 0 status code")
|
||||
logger.Infof("-promscrape.config is ok; exiting with 0 status code")
|
||||
return
|
||||
}
|
||||
if *dryRun {
|
||||
if err := remotewrite.CheckRelabelConfigs(); err != nil {
|
||||
logger.Fatalf("error when checking relabel configs: %s", err)
|
||||
}
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking -promscrape.config: %s", err)
|
||||
}
|
||||
logger.Infof("all the configs are ok; exitting with 0 status code")
|
||||
if err := remotewrite.CheckRelabelConfigs(); err != nil {
|
||||
logger.Fatalf("error when checking relabel configs: %s", err)
|
||||
}
|
||||
if err := remotewrite.CheckStreamAggrConfigs(); err != nil {
|
||||
logger.Fatalf("error when checking -remoteWrite.streamAggr.config: %s", err)
|
||||
}
|
||||
logger.Infof("all the configs are ok; exiting with 0 status code")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -104,28 +120,27 @@ func main() {
|
||||
startTime := time.Now()
|
||||
remotewrite.Init()
|
||||
common.StartUnmarshalWorkers()
|
||||
writeconcurrencylimiter.Init()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, func(r io.Reader) error {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
})
|
||||
}
|
||||
if len(*graphiteListenAddr) > 0 {
|
||||
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, graphite.InsertHandler)
|
||||
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, *graphiteUseProxyProtocol, graphite.InsertHandler)
|
||||
}
|
||||
if len(*opentsdbListenAddr) > 0 {
|
||||
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
|
||||
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, opentsdb.InsertHandler, httpInsertHandler)
|
||||
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, *opentsdbUseProxyProtocol, opentsdb.InsertHandler, httpInsertHandler)
|
||||
}
|
||||
if len(*opentsdbHTTPListenAddr) > 0 {
|
||||
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, httpInsertHandler)
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(remotewrite.Push)
|
||||
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
}
|
||||
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
@@ -197,7 +212,7 @@ func getAuthTokenFromPath(path string) (*auth.Token, error) {
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != "GET" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
@@ -225,7 +240,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
statusCode := http.StatusNoContent
|
||||
if strings.HasPrefix(path, "/prometheus/api/v1/import/prometheus/metrics/job/") ||
|
||||
strings.HasPrefix(path, "/api/v1/import/prometheus/metrics/job/") {
|
||||
// Return 200 status code for pushgateway requests.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
|
||||
statusCode = http.StatusOK
|
||||
}
|
||||
w.WriteHeader(statusCode)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "datadog/") {
|
||||
@@ -235,6 +257,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
switch path {
|
||||
case "/prometheus/api/v1/write", "/api/v1/write":
|
||||
if common.HandleVMProtoServerHandshake(w, r) {
|
||||
return true
|
||||
}
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(nil, r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
@@ -349,12 +374,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return true
|
||||
case "/prometheus/config", "/config":
|
||||
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeConfigRequests.Inc()
|
||||
@@ -363,12 +383,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/prometheus/api/v1/status/config", "/api/v1/status/config":
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#config
|
||||
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeStatusConfigRequests.Inc()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -10,9 +10,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -31,14 +30,12 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
return err
|
||||
}
|
||||
isGzip := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
|
||||
return insertRows(at, block, extraLabels)
|
||||
})
|
||||
return stream.Parse(req.Body, isGzip, func(block *stream.Block) error {
|
||||
return insertRows(at, block, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, block *parser.Block, extraLabels []prompbmarshal.Label) error {
|
||||
func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -20,9 +20,7 @@ var (
|
||||
//
|
||||
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
})
|
||||
return stream.Parse(r, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -25,10 +25,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
return stream.Parse(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
package prometheusimport
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -31,20 +31,11 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, nil)
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader with optional gzip format
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, 0, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
}, nil)
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return stream.Parse(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, func(s string) {
|
||||
httpserver.LogError(req, s)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
60
app/vmagent/prometheusimport/request_handler_test.go
Normal file
60
app/vmagent/prometheusimport/request_handler_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package prometheusimport
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
)
|
||||
|
||||
var (
|
||||
srv *httptest.Server
|
||||
testOutput *bytes.Buffer
|
||||
)
|
||||
|
||||
func TestInsertHandler(t *testing.T) {
|
||||
setUp()
|
||||
defer tearDown()
|
||||
req := httptest.NewRequest(http.MethodPost, "/insert/0/api/v1/import/prometheus", bytes.NewBufferString(`{"foo":"bar"}
|
||||
go_memstats_alloc_bytes_total 1`))
|
||||
if err := InsertHandler(nil, req); err != nil {
|
||||
t.Errorf("unxepected error %s", err)
|
||||
}
|
||||
expectedMsg := "cannot unmarshal Prometheus line"
|
||||
if !strings.Contains(testOutput.String(), expectedMsg) {
|
||||
t.Errorf("output %q should contain %q", testOutput.String(), expectedMsg)
|
||||
}
|
||||
}
|
||||
|
||||
func setUp() {
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(204)
|
||||
}))
|
||||
flag.Parse()
|
||||
remoteWriteFlag := "remoteWrite.url"
|
||||
if err := flag.Lookup(remoteWriteFlag).Value.Set(srv.URL); err != nil {
|
||||
log.Fatalf("unable to set %q with value %q, err: %v", remoteWriteFlag, srv.URL, err)
|
||||
}
|
||||
logger.Init()
|
||||
common.StartUnmarshalWorkers()
|
||||
remotewrite.Init()
|
||||
testOutput = &bytes.Buffer{}
|
||||
logger.SetOutputForTests(testOutput)
|
||||
}
|
||||
|
||||
func tearDown() {
|
||||
common.StopUnmarshalWorkers()
|
||||
srv.Close()
|
||||
logger.ResetOutputForTest()
|
||||
tmpDataDir := flag.Lookup("remoteWrite.tmpDataPath").Value.String()
|
||||
fs.MustRemoveAll(tmpDataDir)
|
||||
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
package promremotewrite
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
@@ -11,9 +10,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -29,19 +27,9 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, nil)
|
||||
})
|
||||
isVMRemoteWrite := req.Header.Get("Content-Encoding") == "zstd"
|
||||
return stream.Parse(req.Body, isVMRemoteWrite, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -15,13 +15,19 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
forcePromProto = flagutil.NewArrayBool("remoteWrite.forcePromProto", "Whether to force Prometheus remote write protocol for sending data "+
|
||||
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol")
|
||||
forceVMProto = flagutil.NewArrayBool("remoteWrite.forceVMProto", "Whether to force VictoriaMetrics remote write protocol for sending data "+
|
||||
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol")
|
||||
|
||||
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
|
||||
"By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"is sent after temporary unavailability of the remote storage")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
|
||||
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
@@ -32,9 +38,9 @@ var (
|
||||
"to the corresponding -remoteWrite.url")
|
||||
tlsKeyFile = flagutil.NewArrayString("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCAFile = flagutil.NewArrayString("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. "+
|
||||
"By default system CA is used")
|
||||
"By default, system CA is used")
|
||||
tlsServerName = flagutil.NewArrayString("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to the corresponding -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
"By default, the server name from -remoteWrite.url is used")
|
||||
|
||||
headers = flagutil.NewArrayString("remoteWrite.headers", "Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. "+
|
||||
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. "+
|
||||
@@ -69,8 +75,12 @@ var (
|
||||
type client struct {
|
||||
sanitizedURL string
|
||||
remoteWriteURL string
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
// Whether to use VictoriaMetrics remote write protocol for sending the data to remoteWriteURL
|
||||
useVMProto bool
|
||||
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
sendBlock func(block []byte) bool
|
||||
authCfg *promauth.Config
|
||||
@@ -122,19 +132,39 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
|
||||
}
|
||||
tr.Proxy = http.ProxyURL(pu)
|
||||
}
|
||||
hc := &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
|
||||
}
|
||||
c := &client{
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
authCfg: authCfg,
|
||||
awsCfg: awsCfg,
|
||||
fq: fq,
|
||||
hc: &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
|
||||
},
|
||||
stopCh: make(chan struct{}),
|
||||
hc: hc,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.sendBlock = c.sendBlockHTTP
|
||||
|
||||
useVMProto := forceVMProto.GetOptionalArg(argIdx)
|
||||
usePromProto := forcePromProto.GetOptionalArg(argIdx)
|
||||
if useVMProto && usePromProto {
|
||||
logger.Fatalf("-remoteWrite.useVMProto and -remoteWrite.usePromProto cannot be set simultaneously for -remoteWrite.url=%s", sanitizedURL)
|
||||
}
|
||||
if !useVMProto && !usePromProto {
|
||||
// Auto-detect whether the remote storage supports VictoriaMetrics remote write protocol.
|
||||
doRequest := func(url string) (*http.Response, error) {
|
||||
return c.doRequest(url, nil)
|
||||
}
|
||||
useVMProto = common.HandleVMProtoClientHandshake(c.remoteWriteURL, doRequest)
|
||||
if !useVMProto {
|
||||
logger.Infof("the remote storage at %q doesn't support VictoriaMetrics remote write protocol. Switching to Prometheus remote write protocol. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol", sanitizedURL)
|
||||
}
|
||||
}
|
||||
c.useVMProto = useVMProto
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
@@ -291,38 +321,45 @@ func (c *client) runWorker() {
|
||||
}
|
||||
}
|
||||
|
||||
// sendBlockHTTP returns false only if c.stopCh is closed.
|
||||
// Otherwise it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.register(len(block), c.stopCh)
|
||||
retryDuration := time.Second
|
||||
retriesCount := 0
|
||||
c.bytesSent.Add(len(block))
|
||||
c.blocksSent.Inc()
|
||||
sigv4Hash := ""
|
||||
if c.awsCfg != nil {
|
||||
sigv4Hash = awsapi.HashHex(block)
|
||||
}
|
||||
|
||||
again:
|
||||
req, err := http.NewRequest("POST", c.remoteWriteURL, bytes.NewBuffer(block))
|
||||
func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
|
||||
reqBody := bytes.NewBuffer(body)
|
||||
req, err := http.NewRequest(http.MethodPost, url, reqBody)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.sanitizedURL, err)
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
|
||||
}
|
||||
c.authCfg.SetHeaders(req, true)
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vmagent")
|
||||
h.Set("Content-Type", "application/x-protobuf")
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
if c.useVMProto {
|
||||
h.Set("Content-Encoding", "zstd")
|
||||
h.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
|
||||
} else {
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
}
|
||||
if c.awsCfg != nil {
|
||||
sigv4Hash := awsapi.HashHex(body)
|
||||
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
|
||||
// there is no need in retry, request will be rejected by client.Do and retried by code below
|
||||
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
|
||||
}
|
||||
}
|
||||
return c.hc.Do(req)
|
||||
}
|
||||
|
||||
// sendBlockHTTP sends the given block to c.remoteWriteURL.
|
||||
//
|
||||
// The function returns false only if c.stopCh is closed.
|
||||
// Otherwise it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.register(len(block), c.stopCh)
|
||||
retryDuration := time.Second
|
||||
retriesCount := 0
|
||||
|
||||
again:
|
||||
startTime := time.Now()
|
||||
resp, err := c.hc.Do(req)
|
||||
resp, err := c.doRequest(c.remoteWriteURL, block)
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
@@ -347,6 +384,8 @@ again:
|
||||
if statusCode/100 == 2 {
|
||||
_ = resp.Body.Close()
|
||||
c.requestsOKCount.Inc()
|
||||
c.bytesSent.Add(len(block))
|
||||
c.blocksSent.Inc()
|
||||
return true
|
||||
}
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.sanitizedURL, statusCode)).Inc()
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -23,6 +24,9 @@ var (
|
||||
"This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url")
|
||||
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxRowsPerBlock")
|
||||
maxRowsPerBlock = flag.Int("remoteWrite.maxRowsPerBlock", 10000, "The maximum number of samples to send in each block to remote storage. Higher number may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxBlockSize")
|
||||
vmProtoCompressLevel = flag.Int("remoteWrite.vmProtoCompressLevel", 0, "The compression level for VictoriaMetrics remote write protocol. "+
|
||||
"Higher values reduce network traffic at the cost of higher CPU usage. Negative values reduce CPU usage at the cost of increased network traffic. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol")
|
||||
)
|
||||
|
||||
type pendingSeries struct {
|
||||
@@ -33,9 +37,10 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(pushBlock func(block []byte), significantFigures, roundDigits int) *pendingSeries {
|
||||
func newPendingSeries(pushBlock func(block []byte), isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.isVMRemoteWrite = isVMRemoteWrite
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
ps.stopCh = make(chan struct{})
|
||||
@@ -88,6 +93,9 @@ type writeRequest struct {
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite bool
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
significantFigures int
|
||||
|
||||
@@ -104,7 +112,7 @@ type writeRequest struct {
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset pushBlock, significantFigures and roundDigits, since they are re-used.
|
||||
// Do not reset lastFlushTime, pushBlock, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
@@ -126,7 +134,7 @@ func (wr *writeRequest) flush() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock)
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock, wr.isVMRemoteWrite)
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
@@ -188,16 +196,20 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
wr.buf = buf
|
||||
}
|
||||
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte)) {
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte), isVMRemoteWrite bool) {
|
||||
if len(wr.Timeseries) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
}
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
if len(bb.B) <= maxUnpackedBlockSize.N {
|
||||
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
|
||||
zb := snappyBufPool.Get()
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
if isVMRemoteWrite {
|
||||
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, *vmProtoCompressLevel)
|
||||
} else {
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
}
|
||||
writeRequestBufPool.Put(bb)
|
||||
if len(zb.B) <= persistentqueue.MaxBlockSize {
|
||||
pushBlock(zb.B)
|
||||
@@ -221,18 +233,18 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
}
|
||||
n := len(samples) / 2
|
||||
wr.Timeseries[0].Samples = samples[:n]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries[0].Samples = samples[n:]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return
|
||||
}
|
||||
timeseries := wr.Timeseries
|
||||
n := len(timeseries) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries = timeseries[n:]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries = timeseries
|
||||
}
|
||||
|
||||
|
||||
@@ -2,40 +2,48 @@ package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func TestPushWriteRequest(t *testing.T) {
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
rowsCounts := []int{1, 10, 100, 1e3, 1e4}
|
||||
expectedBlockLensProm := []int{216, 1848, 16424, 169882, 1757876}
|
||||
expectedBlockLensVM := []int{138, 492, 3927, 34995, 288476}
|
||||
for i, rowsCount := range rowsCounts {
|
||||
expectedBlockLenProm := expectedBlockLensProm[i]
|
||||
expectedBlockLenVM := expectedBlockLensVM[i]
|
||||
t.Run(fmt.Sprintf("%d", rowsCount), func(t *testing.T) {
|
||||
testPushWriteRequest(t, rowsCount)
|
||||
testPushWriteRequest(t, rowsCount, expectedBlockLenProm, expectedBlockLenVM)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func testPushWriteRequest(t *testing.T, rowsCount int) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expectedBlockLenVM int) {
|
||||
f := func(isVMRemoteWrite bool, expectedBlockLen int, tolerancePrc float64) {
|
||||
t.Helper()
|
||||
wr := newTestWriteRequest(rowsCount, 20)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
|
||||
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
|
||||
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
b := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
zb := snappy.Encode(nil, b)
|
||||
maxPushBlockLen := len(zb)
|
||||
minPushBlockLen := maxPushBlockLen / 2
|
||||
if pushBlockLen < minPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be at least %d bytes", pushBlockLen, minPushBlockLen)
|
||||
}
|
||||
if pushBlockLen > maxPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be smaller or equal to %d bytes", pushBlockLen, maxPushBlockLen)
|
||||
}
|
||||
|
||||
// Check Prometheus remote write
|
||||
f(false, expectedBlockLenProm, 0)
|
||||
|
||||
// Check VictoriaMetrics remote write
|
||||
f(true, expectedBlockLenVM, 15)
|
||||
}
|
||||
|
||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompbmarshal.WriteRequest {
|
||||
|
||||
@@ -15,11 +15,13 @@ import (
|
||||
var (
|
||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. "+
|
||||
"The path can point either to local file or to http url. These entries are applied to all the metrics "+
|
||||
"before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details")
|
||||
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url. "+
|
||||
"The path can point either to local file or to http url")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||
"The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel configs for the corresponding -remoteWrite.url. "+
|
||||
"See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
|
||||
usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+
|
||||
"in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+
|
||||
@@ -86,7 +88,7 @@ func initLabelsGlobal() {
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 && !*usePromCompatibleNaming {
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
|
||||
49
app/vmagent/remotewrite/relabel_test.go
Normal file
49
app/vmagent/remotewrite/relabel_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestApplyRelabeling(t *testing.T) {
|
||||
f := func(extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
|
||||
rctx := &relabelCtx{}
|
||||
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
|
||||
gotTss := rctx.applyRelabeling(tss, extraLabels, pcs)
|
||||
if !reflect.DeepEqual(gotTss, expTss) {
|
||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, gotTss)
|
||||
}
|
||||
}
|
||||
|
||||
f(nil, nil, "up", "up")
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, "up", `up{foo="bar"}`)
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, `up{foo="baz"}`, `up{foo="bar"}`)
|
||||
|
||||
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
- action: labeldrop
|
||||
regex: "env.*"
|
||||
`))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
f(nil, pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
|
||||
|
||||
oldVal := *usePromCompatibleNaming
|
||||
*usePromCompatibleNaming = true
|
||||
f(nil, nil, `foo.bar`, `foo_bar`)
|
||||
*usePromCompatibleNaming = oldVal
|
||||
}
|
||||
|
||||
func parseSeries(data string) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
tss = append(tss, prompbmarshal.TimeSeries{
|
||||
Labels: promutils.MustNewLabelsFromString(data).GetLabels(),
|
||||
})
|
||||
return tss
|
||||
}
|
||||
@@ -4,51 +4,57 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support Prometheus remote_write API. "+
|
||||
"It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.multitenantURL")
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
|
||||
"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. See also -remoteWrite.multitenantURL")
|
||||
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage. Default value is 2 * numberOfAvailableCPUs")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
|
||||
"Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500MB. "+
|
||||
"Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. "+
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
significantFigures = flagutil.NewArrayInt("remoteWrite.significantFigures", "The number of significant figures to leave in metric values before writing them "+
|
||||
"to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. "+
|
||||
"This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits")
|
||||
roundDigits = flagutil.NewArrayInt("remoteWrite.roundDigits", "Round metric values to this number of decimal digits after the point before writing them to remote storage. "+
|
||||
"Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. "+
|
||||
"By default digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. "+
|
||||
"By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. "+
|
||||
"This option may be used for improving data compression for the stored metrics")
|
||||
sortLabels = flag.Bool("sortLabels", false, `Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. `+
|
||||
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
|
||||
@@ -58,6 +64,15 @@ var (
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
|
||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html . "+
|
||||
"See also -remoteWrite.streamAggr.keepInput and -remoteWrite.streamAggr.dedupInterval")
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config. "+
|
||||
"By default, the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", "Input samples are de-duplicated with this interval before being aggregated. "+
|
||||
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -84,6 +99,8 @@ var allRelabelConfigs atomic.Value
|
||||
// since it may lead to high memory usage due to big number of buffers.
|
||||
var maxQueues = cgroup.AvailableCPUs() * 16
|
||||
|
||||
const persistentQueueDirname = "persistent-queue"
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging.
|
||||
func InitSecretFlags() {
|
||||
if !*showRemoteWriteURL {
|
||||
@@ -140,8 +157,8 @@ func Init() {
|
||||
logger.Fatalf("cannot load relabel configs: %s", err)
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
relabelConfigSuccess.Set(1)
|
||||
relabelConfigTimestamp.Set(fasttime.UnixTimestamp())
|
||||
|
||||
if len(*remoteWriteURLs) > 0 {
|
||||
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
|
||||
@@ -154,34 +171,56 @@ func Init() {
|
||||
for {
|
||||
select {
|
||||
case <-sighupCh:
|
||||
case <-stopCh:
|
||||
case <-configReloaderStopCh:
|
||||
return
|
||||
}
|
||||
configReloads.Inc()
|
||||
logger.Infof("SIGHUP received; reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
|
||||
rcs, err := loadRelabelConfigs()
|
||||
if err != nil {
|
||||
configReloadErrors.Inc()
|
||||
configSuccess.Set(0)
|
||||
logger.Errorf("cannot reload relabel configs; preserving the previous configs; error: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
allRelabelConfigs.Store(rcs)
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
logger.Infof("Successfully reloaded relabel configs")
|
||||
reloadRelabelConfigs()
|
||||
reloadStreamAggrConfigs()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func reloadRelabelConfigs() {
|
||||
relabelConfigReloads.Inc()
|
||||
logger.Infof("reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
|
||||
rcs, err := loadRelabelConfigs()
|
||||
if err != nil {
|
||||
relabelConfigReloadErrors.Inc()
|
||||
relabelConfigSuccess.Set(0)
|
||||
logger.Errorf("cannot reload relabel configs; preserving the previous configs; error: %s", err)
|
||||
return
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
relabelConfigSuccess.Set(1)
|
||||
relabelConfigTimestamp.Set(fasttime.UnixTimestamp())
|
||||
logger.Infof("successfully reloaded relabel configs")
|
||||
}
|
||||
|
||||
var (
|
||||
configReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
|
||||
configReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
|
||||
configSuccess = metrics.NewCounter(`vmagent_relabel_config_last_reload_successful`)
|
||||
configTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
|
||||
relabelConfigReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
|
||||
relabelConfigReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
|
||||
relabelConfigSuccess = metrics.NewCounter(`vmagent_relabel_config_last_reload_successful`)
|
||||
relabelConfigTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
func reloadStreamAggrConfigs() {
|
||||
if len(*remoteWriteMultitenantURLs) > 0 {
|
||||
rwctxsMapLock.Lock()
|
||||
for _, rwctxs := range rwctxsMap {
|
||||
reinitStreamAggr(rwctxs)
|
||||
}
|
||||
rwctxsMapLock.Unlock()
|
||||
} else {
|
||||
reinitStreamAggr(rwctxsDefault)
|
||||
}
|
||||
}
|
||||
|
||||
func reinitStreamAggr(rwctxs []*remoteWriteCtx) {
|
||||
for _, rwctx := range rwctxs {
|
||||
rwctx.reinitStreamAggr()
|
||||
}
|
||||
}
|
||||
|
||||
func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
|
||||
if len(urls) == 0 {
|
||||
logger.Panicf("BUG: urls must be non-empty")
|
||||
@@ -214,17 +253,44 @@ func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
|
||||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, at, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
}
|
||||
|
||||
if !*keepDanglingQueues {
|
||||
// Remove dangling queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
existingQueues := make(map[string]struct{}, len(rwctxs))
|
||||
for _, rwctx := range rwctxs {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxs))
|
||||
}
|
||||
}
|
||||
|
||||
return rwctxs
|
||||
}
|
||||
|
||||
var stopCh = make(chan struct{})
|
||||
var configReloaderStopCh = make(chan struct{})
|
||||
var configReloaderWG sync.WaitGroup
|
||||
|
||||
// Stop stops remotewrite.
|
||||
//
|
||||
// It is expected that nobody calls Push during and after the call to this func.
|
||||
func Stop() {
|
||||
close(stopCh)
|
||||
close(configReloaderStopCh)
|
||||
configReloaderWG.Wait()
|
||||
|
||||
for _, rwctx := range rwctxsDefault {
|
||||
@@ -435,9 +501,13 @@ var (
|
||||
)
|
||||
|
||||
type remoteWriteCtx struct {
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
|
||||
sas atomic.Pointer[streamaggr.Aggregators]
|
||||
streamAggrKeepInput bool
|
||||
|
||||
pss []*pendingSeries
|
||||
pssNextIdx uint64
|
||||
|
||||
@@ -451,8 +521,13 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
pqURL.RawQuery = ""
|
||||
pqURL.Fragment = ""
|
||||
h := xxhash.Sum64([]byte(pqURL.String()))
|
||||
queuePath := fmt.Sprintf("%s/persistent-queue/%d_%016X", *tmpDataPath, argIdx+1, h)
|
||||
queuePath := filepath.Join(*tmpDataPath, persistentQueueDirname, fmt.Sprintf("%d_%016X", argIdx+1, h))
|
||||
maxPendingBytes := maxPendingBytesPerURL.GetOptionalArgOrDefault(argIdx, 0)
|
||||
if maxPendingBytes != 0 && maxPendingBytes < persistentqueue.DefaultChunkFileSize {
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4195
|
||||
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
|
||||
maxPendingBytes = persistentqueue.DefaultChunkFileSize
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
@@ -460,6 +535,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
case "http", "https":
|
||||
@@ -469,6 +545,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
}
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
|
||||
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
|
||||
pssLen := *queues
|
||||
@@ -479,9 +556,10 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
}
|
||||
pss := make([]*pendingSeries, pssLen)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, sf, rd)
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, c.useVMProto, sf, rd)
|
||||
}
|
||||
return &remoteWriteCtx{
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
@@ -490,6 +568,22 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
}
|
||||
|
||||
// Initialize sas
|
||||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(argIdx, 0)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggr.config=%q: %s", sasFile, err)
|
||||
}
|
||||
rwctx.sas.Store(sas)
|
||||
rwctx.streamAggrKeepInput = streamAggrKeepInput.GetOptionalArg(argIdx)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(1)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, sasFile)).Set(fasttime.UnixTimestamp())
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) MustStop() {
|
||||
@@ -501,6 +595,10 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.fq.UnblockAllReaders()
|
||||
rwctx.c.MustStop()
|
||||
rwctx.c = nil
|
||||
|
||||
sas := rwctx.sas.Swap(nil)
|
||||
sas.MustStop()
|
||||
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
|
||||
@@ -509,6 +607,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
@@ -526,11 +625,18 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
rowsCountAfterRelabel := getRowsCount(tss)
|
||||
rwctx.rowsDroppedByRelabel.Add(rowsCountBeforeRelabel - rowsCountAfterRelabel)
|
||||
}
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
rowsCount := getRowsCount(tss)
|
||||
rwctx.rowsPushedAfterRelabel.Add(rowsCount)
|
||||
pss[idx].Push(tss)
|
||||
|
||||
// Apply stream aggregation if any
|
||||
sas := rwctx.sas.Load()
|
||||
sas.Push(tss)
|
||||
if sas == nil || rwctx.streamAggrKeepInput {
|
||||
// Push samples to the remote storage
|
||||
rwctx.pushInternal(tss)
|
||||
}
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssRelabelPool.Put(v)
|
||||
@@ -538,6 +644,42 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
sas := rwctx.sas.Load()
|
||||
if sas == nil {
|
||||
// There is no stream aggregation for rwctx
|
||||
return
|
||||
}
|
||||
|
||||
sasFile := streamAggrConfig.GetOptionalArg(rwctx.idx)
|
||||
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", sasFile)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, sasFile)).Inc()
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(rwctx.idx, 0)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
if err != nil {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, sasFile)).Inc()
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(0)
|
||||
logger.Errorf("cannot reload stream aggregation config from -remoteWrite.streamAggr.config=%q; continue using the previously loaded config; error: %s", sasFile, err)
|
||||
return
|
||||
}
|
||||
if !sasNew.Equal(sas) {
|
||||
sasOld := rwctx.sas.Swap(sasNew)
|
||||
sasOld.MustStop()
|
||||
logger.Infof("successfully reloaded stream aggregation configs at -remoteWrite.streamAggr.config=%q", sasFile)
|
||||
} else {
|
||||
sasNew.MustStop()
|
||||
logger.Infof("the config at -remoteWrite.streamAggr.config=%q wasn't changed", sasFile)
|
||||
}
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(1)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_success_timestamp_seconds{path=%q}`, sasFile)).Set(fasttime.UnixTimestamp())
|
||||
}
|
||||
|
||||
var tssRelabelPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
a := []prompbmarshal.TimeSeries{}
|
||||
@@ -552,3 +694,20 @@ func getRowsCount(tss []prompbmarshal.TimeSeries) int {
|
||||
}
|
||||
return rowsCount
|
||||
}
|
||||
|
||||
// CheckStreamAggrConfigs checks configs pointed by -remoteWrite.streamAggr.config
|
||||
func CheckStreamAggrConfigs() error {
|
||||
pushNoop := func(tss []prompbmarshal.TimeSeries) {}
|
||||
for idx, sasFile := range *streamAggrConfig {
|
||||
if sasFile == "" {
|
||||
continue
|
||||
}
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(idx, 0)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, pushNoop, dedupInterval)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load -remoteWrite.streamAggr.config=%q: %w", sasFile, err)
|
||||
}
|
||||
sas.MustStop()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package vmimport
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
@@ -12,8 +11,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -31,20 +30,9 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
})
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return stream.Parse(req.Body, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -114,6 +114,9 @@ vmalert-linux-arm64:
|
||||
vmalert-linux-ppc64le:
|
||||
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-linux-s390x:
|
||||
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
vmalert-linux-386:
|
||||
APP_NAME=vmalert CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
|
||||
@@ -9,6 +9,13 @@ protocol and require `-remoteWrite.url` to be configured.
|
||||
Vmalert is heavily inspired by [Prometheus](https://prometheus.io/docs/alerting/latest/overview/)
|
||||
implementation and aims to be compatible with its syntax.
|
||||
|
||||
A [single-node](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmalert)
|
||||
or [cluster version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmalert)
|
||||
of VictoriaMetrics are capable of proxying requests to vmalert via `-vmalert.proxyURL` command-line flag.
|
||||
Use this feature for the following cases:
|
||||
* for proxying requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
|
||||
* for accessing vmalerts UI through VictoriaMetrics Web interface.
|
||||
|
||||
## Features
|
||||
|
||||
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
|
||||
@@ -16,12 +23,14 @@ implementation and aims to be compatible with its syntax.
|
||||
support and expressions validation;
|
||||
* Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
|
||||
support;
|
||||
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager) starting from [Alertmanager v0.16.0-aplha](https://github.com/prometheus/alertmanager/releases/tag/v0.16.0-alpha.0);
|
||||
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager) starting from [Alertmanager v0.16.0-alpha](https://github.com/prometheus/alertmanager/releases/tag/v0.16.0-alpha.0);
|
||||
* Keeps the alerts [state on restarts](#alerts-state-on-restarts);
|
||||
* Graphite datasource can be used for alerting and recording rules. See [these docs](#graphite);
|
||||
* Recording and Alerting rules backfilling (aka `replay`). See [these docs](#rules-backfilling);
|
||||
* Lightweight and without extra dependencies.
|
||||
* Supports [reusable templates](#reusable-templates) for annotations.
|
||||
* Supports [reusable templates](#reusable-templates) for annotations;
|
||||
* Load of recording and alerting rules from local filesystem, URL, GCS and S3;
|
||||
* Detect alerting rules which [don't match any series](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||
|
||||
## Limitations
|
||||
|
||||
@@ -69,16 +78,17 @@ Then configure `vmalert` accordingly:
|
||||
-external.label=replica=a # Multiple external labels may be set
|
||||
```
|
||||
|
||||
Note there's a separate `remoteWrite.url` to allow writing results of
|
||||
Note there's a separate `-remoteWrite.url` command-line flag to allow writing results of
|
||||
alerting/recording rules into a different storage than the initial data that's
|
||||
queried. This allows using `vmalert` to aggregate data from a short-term,
|
||||
high-frequency, high-cardinality storage into a long-term storage with
|
||||
decreased cardinality and a bigger interval between samples.
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
|
||||
See the full list of configuration flags in [configuration](#configuration) section.
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
to specify different `-external.label` command-line flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
@@ -135,6 +145,15 @@ params:
|
||||
headers:
|
||||
[ <string>, ...]
|
||||
|
||||
# Optional list of HTTP headers in form `header-name: value`
|
||||
# applied for all alert notifications sent to notifiers
|
||||
# generated by rules of this group.
|
||||
# For example:
|
||||
# notifier_headers:
|
||||
# - "TenantID: foo"
|
||||
notifier_headers:
|
||||
[ <string>, ...]
|
||||
|
||||
# Optional list of labels added to every rule within a group.
|
||||
# It has priority over the external labels.
|
||||
# Labels are commonly used for adding environment
|
||||
@@ -191,6 +210,11 @@ expr: <string>
|
||||
# Is applicable to alerting rules only.
|
||||
[ debug: <bool> | default = false ]
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
|
||||
# Labels to add or overwrite for each alert.
|
||||
labels:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
@@ -213,7 +237,8 @@ The following variables are available in templating:
|
||||
| $labels or .Labels | The list of labels of the current alert. Use as ".Labels.<label_name>". | {% raw %}Too high number of connections for {{ .Labels.instance }}{% endraw %} |
|
||||
| $alertID or .AlertID | The current alert's ID generated by vmalert. | {% raw %}Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}{% endraw %} |
|
||||
| $groupID or .GroupID | The current alert's group ID generated by vmalert. | {% raw %}Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}{% endraw %} |
|
||||
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}/api/v1/query?query={{ $expr|queryEscape }}{% endraw %} |
|
||||
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}/api/v1/query?query={{ $expr|queryEscape }}{% endraw %} |
|
||||
| $for or .For | Alert's configured for param. | {% raw %}Number of connections is too high for more than {{ .For }}{% endraw %} |
|
||||
| $externalLabels or .ExternalLabels | List of labels configured via `-external.label` command-line flag. | {% raw %}Issues with {{ $labels.instance }} (datacenter-{{ $externalLabels.dc }}){% endraw %} |
|
||||
| $externalURL or .ExternalURL | URL configured via `-external.url` command-line flag. Used for cases when vmalert is hidden behind proxy. | {% raw %}Visit {{ $externalURL }} for more details{% endraw %} |
|
||||
|
||||
@@ -246,7 +271,7 @@ Additionally, `vmalert` provides some extra templating functions listed [here](#
|
||||
query at `-datasource.url` and returns the first result.
|
||||
- `queryEscape` - escapes the input string, so it can be safely put inside [query arg](https://en.wikipedia.org/wiki/Percent-encoding) part of URL.
|
||||
- `quotesEscape` - escapes the input string, so it can be safely embedded into JSON string.
|
||||
- `reReplaceAll regex repl` - replaces all the occurences of the `regex` in input string with the `repl`.
|
||||
- `reReplaceAll regex repl` - replaces all the occurrences of the `regex` in input string with the `repl`.
|
||||
- `safeHtml` - marks the input string as safe to use in HTML context without the need to html-escape it.
|
||||
- `sortByLabel name` - sorts the input query results by the label with the given `name`.
|
||||
- `stripDomain` - leaves the first part of the domain. For example, `foo.bar.baz` is converted to `foo`.
|
||||
@@ -318,6 +343,12 @@ expr: <string>
|
||||
# Labels to add or overwrite before storing the result.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
```
|
||||
|
||||
For recording rules to work `-remoteWrite.url` must be specified.
|
||||
@@ -384,6 +415,25 @@ The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz`
|
||||
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) and in `*-enterprise`
|
||||
tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags).
|
||||
|
||||
### Reading rules from object storage
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/enterprise.html) of `vmalert` may read alerting and recording rules
|
||||
from object storage:
|
||||
|
||||
- `./bin/vmalert -rule=s3://bucket/dir/alert.rules` would read rules from the given path at S3 bucket
|
||||
- `./bin/vmalert -rule=gs://bucket/bir/alert.rules` would read rules from the given path at GCS bucket
|
||||
|
||||
S3 and GCS paths support only matching by prefix, e.g. `s3://bucket/dir/rule_` matches
|
||||
all files with prefix `rule_` in the folder `dir`.
|
||||
|
||||
The following [command-line flags](#flags) can be used for fine-tuning access to S3 and GCS:
|
||||
|
||||
- `-s3.credsFilePath` - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
- `-s3.configFilePath` - path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
- `-s3.configProfile` - profile name for S3 configs. If no set, the value of the environment variable will be loaded (`AWS_PROFILE` or `AWS_DEFAULT_PROFILE`).
|
||||
- `-s3.customEndpoint` - custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set.
|
||||
- `-s3.forcePathStyle` - prefixing endpoint with bucket name when set false, true by default.
|
||||
|
||||
### Topology examples
|
||||
|
||||
The following sections are showing how `vmalert` may be used and configured
|
||||
@@ -433,7 +483,7 @@ Cluster mode could have multiple `vminsert` and `vmselect` components.
|
||||
<img alt="vmalert cluster" src="vmalert_cluster.png">
|
||||
|
||||
In case when you want to spread the load on these components - add balancers before them and configure
|
||||
`vmalert` with balancer's addresses. Please, see more about VM's cluster architecture
|
||||
`vmalert` with balancer addresses. Please, see more about VM's cluster architecture
|
||||
[here](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#architecture-overview).
|
||||
|
||||
#### HA vmalert
|
||||
@@ -458,7 +508,7 @@ Alertmanagers.
|
||||
|
||||
To avoid recording rules results and alerts state duplication in VictoriaMetrics server
|
||||
don't forget to configure [deduplication](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#deduplication).
|
||||
The recommended value for `-dedup.minScrapeInterval` must be greater or equal to vmalert's `evaluation_interval`.
|
||||
The recommended value for `-dedup.minScrapeInterval` must be greater or equal to vmalert `evaluation_interval`.
|
||||
If you observe inconsistent or "jumping" values in series produced by vmalert, try disabling `-datasource.queryTimeAlignment`
|
||||
command line flag. Because of alignment, two or more vmalert HA pairs will produce results with the same timestamps.
|
||||
But due of backfilling (data delivered to the datasource with some delay) values of such results may differ,
|
||||
@@ -468,7 +518,8 @@ Alertmanager will automatically deduplicate alerts with identical labels, so ens
|
||||
all `vmalert`s are having the same config.
|
||||
|
||||
Don't forget to configure [cluster mode](https://prometheus.io/docs/alerting/latest/alertmanager/)
|
||||
for Alertmanagers for better reliability.
|
||||
for Alertmanagers for better reliability. List all Alertmanager URLs in vmalert `-notifier.url`
|
||||
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
|
||||
|
||||
This example uses single-node VM server for the sake of simplicity.
|
||||
Check how to replace it with [cluster VictoriaMetrics](#cluster-victoriametrics) if needed.
|
||||
@@ -501,8 +552,8 @@ groups:
|
||||
expr: avg_over_time(http_requests[5m])
|
||||
```
|
||||
|
||||
Ability of `vmalert` to be configured with different `datasource.url` and `remoteWrite.url` allows
|
||||
reading data from one data source and backfilling results to another. This helps to build a system
|
||||
Ability of `vmalert` to be configured with different `-datasource.url` and `-remoteWrite.url` command-line flags
|
||||
allows reading data from one data source and backfilling results to another. This helps to build a system
|
||||
for aggregating and downsampling the data.
|
||||
|
||||
The following example shows how to build a topology where `vmalert` will process data from one cluster
|
||||
@@ -526,7 +577,7 @@ Please note, [replay](#rules-backfilling) feature may be used for transforming h
|
||||
|
||||
Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only recording rules are used.
|
||||
|
||||
See also [downsampling docs](https://docs.victoriametrics.com/#downsampling).
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) and [downsampling](https://docs.victoriametrics.com/#downsampling).
|
||||
|
||||
#### Multiple remote writes
|
||||
|
||||
@@ -559,7 +610,7 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vm
|
||||
|
||||
`vmalert` web UI can be accessed from [single-node version of VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html)
|
||||
and from [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
This may be used for better integraion with Grafana unified alerting system. See the following docs for details:
|
||||
This may be used for better integration with Grafana unified alerting system. See the following docs for details:
|
||||
|
||||
* [How to query vmalert from single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmalert)
|
||||
* [How to query vmalert from VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmalert)
|
||||
@@ -580,6 +631,12 @@ can read the same rules configuration as normal, evaluate them on the given time
|
||||
results via remote write to the configured storage. vmalert supports any PromQL/MetricsQL compatible
|
||||
data source for backfilling.
|
||||
|
||||
Please note, that response caching may lead to unexpected results during and after backfilling process.
|
||||
In order to avoid this you need to reset cache contents or disable caching when using backfilling
|
||||
as described in [backfilling docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#backfilling).
|
||||
|
||||
See a blogpost about [Rules backfilling via vmalert](https://victoriametrics.com/blog/rules-replay/).
|
||||
|
||||
### How it works
|
||||
|
||||
In `replay` mode vmalert works as a cli-tool and exits immediately after work is done.
|
||||
@@ -677,38 +734,49 @@ The default list of alerting rules for these metric can be found [here](https://
|
||||
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus so that the exported
|
||||
metrics may be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview.
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950-victoriametrics-vmalert/) for `vmalert` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
vmalert executes configured rules within certain intervals. It is expected that at the moment when rule is executed,
|
||||
the data is already present in configured `-datasource.url`:
|
||||
### Data delay
|
||||
|
||||
Data delay is one of the most common issues with rules execution.
|
||||
vmalert executes configured rules within certain intervals at specifics timestamps.
|
||||
It expects that the data is already present in configured `-datasource.url` at the moment of time when rule is executed:
|
||||
|
||||
<img alt="vmalert expected evaluation" src="vmalert_ts_normal.gif">
|
||||
|
||||
Usually, troubles start to appear when data in `-datasource.url` is delayed or absent. In such cases, evaluations
|
||||
may get empty response from datasource and produce empty recording rules or reset alerts state:
|
||||
may get empty response from the datasource and produce empty recording rules or reset alerts state:
|
||||
|
||||
<img alt="vmalert evaluation when data is delayed" src="vmalert_ts_data_delay.gif">
|
||||
|
||||
_By default recently written samples to VictoriaMetrics aren't visible for queries for up to 30s.
|
||||
This behavior is controlled by `-search.latencyOffset` command-line flag on vmselect. Usually, this results into
|
||||
a 30s shift for recording rules results.
|
||||
Note that too small value passed to `-search.latencyOffset` may lead to incomplete query results._
|
||||
Try the following recommendations to reduce the chance of hitting the data delay issue:
|
||||
|
||||
Try the following recommendations in such cases:
|
||||
* Always configure group's `evaluationInterval` to be bigger or at least equal to
|
||||
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution);
|
||||
* Ensure that `[duration]` value is at least twice bigger than
|
||||
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution). For example,
|
||||
if expression is `rate(my_metric[2m]) > 0` then ensure that `my_metric` resolution is at least `1m` or better `30s`.
|
||||
If you use VictoriaMetrics as datasource, `[duration]` can be omitted and VictoriaMetrics will adjust it automatically.
|
||||
* If you know in advance, that data in datasource is delayed - try changing vmalerts `-datasource.lookback`
|
||||
command-line flag to add a time shift for evaluations. Or extend `[duration]` to tolerate the delay.
|
||||
For example, `max_over_time(errors_total[10m]) > 0` will be active even if there is no data in datasource for last `9m`.
|
||||
* If [time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution)
|
||||
in datasource is inconsistent or `>=5min` - try changing vmalerts `-datasource.queryStep` command-line flag to specify
|
||||
how far search query can lookback for the recent datapoint. The recommendation is to have the step
|
||||
at least two times bigger than the resolution.
|
||||
|
||||
* Always configure group's `evaluationInterval` to be bigger or equal to `scrape_interval` at which metrics
|
||||
are delivered to the datasource;
|
||||
* If you know in advance, that data in datasource is delayed - try changing vmalert's `-datasource.lookback`
|
||||
command-line flag to add a time shift for evaluations;
|
||||
* If time intervals between datapoints in datasource are irregular or `>=5min` - try changing vmalert's
|
||||
`-datasource.queryStep` command-line flag to specify how far search query can lookback for the recent datapoint.
|
||||
The recommendation is to have the step at least two times bigger than `scrape_interval`, since
|
||||
there are no guarantees that scrape will not fail.
|
||||
> Please note, data delay is inevitable in distributed systems. And it is better to account for it instead of ignoring.
|
||||
|
||||
By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s
|
||||
(see `-search.latencyOffset` command-line flag at vmselect). Such delay is needed to eliminate risk of incomplete
|
||||
data on the moment of querying, since metrics collectors won't be able to deliver the data in time.
|
||||
|
||||
### Alerts state
|
||||
|
||||
Sometimes, it is not clear why some specific alert fired or didn't fire. It is very important to remember, that
|
||||
alerts with `for: 0` fire immediately when their expression becomes true. And alerts with `for > 0` will fire only
|
||||
@@ -720,8 +788,9 @@ If `-remoteWrite.url` command-line flag is configured, vmalert will persist aler
|
||||
[vmui](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmui) or Grafana to track how alerts state
|
||||
changed in time.
|
||||
|
||||
vmalert also stores last N state updates for each rule. To check updates, click on `Details` link next to rule's name
|
||||
on `/vmalert/groups` page and check the `Last updates` section:
|
||||
vmalert stores last `-rule.updateEntriesLimit` (or `update_entries_limit` [per-rule config](https://docs.victoriametrics.com/vmalert.html#alerting-rules))
|
||||
state updates for each rule. To check updates, click on `Details` link next to rule's name on `/vmalert/groups` page
|
||||
and check the `Last updates` section:
|
||||
|
||||
<img alt="vmalert state" src="vmalert_state.png">
|
||||
|
||||
@@ -730,7 +799,9 @@ HTTP request sent by vmalert to the `-datasource.url` during evaluation. If spec
|
||||
no samples returned and curl command returns data - then it is very likely there was no data in datasource on the
|
||||
moment when rule was evaluated.
|
||||
|
||||
vmalert also alows configuring more detailed logging for specific rule. Just set `debug: true` in rule's configuration
|
||||
### Debug mode
|
||||
|
||||
vmalert allows configuring more detailed logging for specific alerting rule. Just set `debug: true` in rule's configuration
|
||||
and vmalert will start printing additional log messages:
|
||||
```terminal
|
||||
2022-09-15T13:35:41.155Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:41+02:00: query returned 0 samples (elapsed: 5.896041ms)
|
||||
@@ -742,6 +813,22 @@ and vmalert will start printing additional log messages:
|
||||
2022-09-15T13:36:56.153Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:36:56+02:00: alert 10705778000901301787 {alertgroup="TestGroup",alertname="Conns",cluster="east-1",instance="localhost:8429",replica="a"} PENDING => FIRING: 1m0s since becoming active at 2022-09-15 15:35:56.126006 +0200 CEST m=+39.384575417
|
||||
```
|
||||
|
||||
### Never-firing alerts
|
||||
|
||||
vmalert can detect if alert's expression doesn't match any time series in runtime. This problem usually happens
|
||||
when alerting expression selects time series which aren't present in the datasource (i.e. wrong `job` label)
|
||||
or there is a typo in the series selector (i.e. `env=rpod`). Such alerting rules will be marked with special icon in
|
||||
vmalerts UI and exposed via `vmalert_alerting_rules_last_evaluation_series_fetched` metric. The metric value will
|
||||
show how many time series were matched before the filtering by rule's expression. If metric value is `-1`, then
|
||||
this feature is not supported by the datasource (old versions of VictoriaMetrics). The following expression can be
|
||||
used to detect rules matching no series:
|
||||
```
|
||||
max(vmalert_alerting_rules_last_evaluation_series_fetched) by(group, alertname) == 0
|
||||
```
|
||||
|
||||
See more details [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||
This feature is available only if vmalert is using VictoriaMetrics v1.90 or higher as a datasource.
|
||||
|
||||
|
||||
## Profiling
|
||||
|
||||
@@ -785,7 +872,7 @@ The shortlist of configuration flags is the following:
|
||||
-clusterMode
|
||||
If clusterMode is enabled, then vmalert automatically adds the tenant specified in config groups to -datasource.url, -remoteWrite.url and -remoteRead.url. See https://docs.victoriametrics.com/vmalert.html#multitenancy . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-configCheckInterval duration
|
||||
Interval for checking for changes in '-rule' or '-notifier.config' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes.
|
||||
Interval for checking for changes in '-rule' or '-notifier.config' files. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.
|
||||
-datasource.appendTypePrefix
|
||||
Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.
|
||||
-datasource.basicAuth.password string
|
||||
@@ -819,7 +906,7 @@ The shortlist of configuration flags is the following:
|
||||
-datasource.queryStep duration
|
||||
How far a value can fallback to when evaluating queries. For example, if -datasource.queryStep=15s then param "step" with value "15s" will be added to every query. If set to 0, rule's evaluation interval will be used instead. (default 5m0s)
|
||||
-datasource.queryTimeAlignment
|
||||
Whether to align "time" parameter with evaluation interval.Alignment supposed to produce deterministic results despite of number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
|
||||
Whether to align "time" parameter with evaluation interval.Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
|
||||
-datasource.roundDigits int
|
||||
Adds "round_digits" GET param to datasource requests. In VM "round_digits" limits the number of digits after the decimal point in response values.
|
||||
-datasource.showURL
|
||||
@@ -845,7 +932,7 @@ The shortlist of configuration flags is the following:
|
||||
-dryRun
|
||||
Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
@@ -855,7 +942,7 @@ The shortlist of configuration flags is the following:
|
||||
-evaluationInterval duration
|
||||
How often to evaluate the rules (default 1m0s)
|
||||
-external.alert.source string
|
||||
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service. Supports templating - see https://docs.victoriametrics.com/vmalert.html#templating . For example, link to Grafana: -external.alert.source='explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr":{{$expr|jsonEscape|queryEscape}} },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]' . If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.
|
||||
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service. Supports templating - see https://docs.victoriametrics.com/vmalert.html#templating . For example, link to Grafana: -external.alert.source='explore?orgId=1&left={"datasource":"VictoriaMetrics","queries":[{"expr":{{$expr|jsonEscape|queryEscape}},"refId":"A"}],"range":{"from":"now-1h","to":"now"}}'. Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.
|
||||
-external.label array
|
||||
Optional label in the form 'Name=value' to add to all generated recording rules and alerts. Pass multiple -label flags in order to add multiple label sets.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -864,11 +951,11 @@ The shortlist of configuration flags is the following:
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default compression is enabled to save network bandwidth
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
@@ -882,13 +969,21 @@ The shortlist of configuration flags is the following:
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8880")
|
||||
Address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8880")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500)
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -897,9 +992,11 @@ The shortlist of configuration flags is the following:
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
@@ -939,7 +1036,7 @@ The shortlist of configuration flags is the following:
|
||||
-notifier.suppressDuplicateTargetErrors
|
||||
Whether to suppress 'duplicate target' errors during discovery
|
||||
-notifier.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to -notifier.url. By default system CA is used
|
||||
Optional path to TLS CA file to use for verifying connections to -notifier.url. By default, system CA is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to -notifier.url
|
||||
@@ -951,10 +1048,10 @@ The shortlist of configuration flags is the following:
|
||||
Optional path to client-side TLS certificate key to use when connecting to -notifier.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsServerName array
|
||||
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used
|
||||
Optional TLS server name to use for connections to -notifier.url. By default, the server name from -notifier.url is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-notifier.url array
|
||||
Prometheus alertmanager URL, e.g. http://127.0.0.1:9093
|
||||
Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
@@ -974,7 +1071,7 @@ The shortlist of configuration flags is the following:
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteRead.basicAuth.password string
|
||||
Optional basic auth password for -remoteRead.url
|
||||
@@ -991,7 +1088,7 @@ The shortlist of configuration flags is the following:
|
||||
-remoteRead.headers string
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteRead.url. For example, -remoteRead.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteRead.url. Multiple headers must be delimited by '^^': -remoteRead.headers='header1:value1^^header2:value2'
|
||||
-remoteRead.ignoreRestoreErrors
|
||||
Whether to ignore errors from remote storage when restoring alerts state on startup. (default true)
|
||||
Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases. (default true)
|
||||
-remoteRead.lookback duration
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteRead.oauth2.clientID string
|
||||
@@ -1007,7 +1104,7 @@ The shortlist of configuration flags is the following:
|
||||
-remoteRead.showURL
|
||||
Whether to show -remoteRead.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-remoteRead.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteRead.url. By default system CA is used
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteRead.url. By default, system CA is used
|
||||
-remoteRead.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url
|
||||
-remoteRead.tlsInsecureSkipVerify
|
||||
@@ -1015,7 +1112,7 @@ The shortlist of configuration flags is the following:
|
||||
-remoteRead.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url
|
||||
-remoteRead.tlsServerName string
|
||||
Optional TLS server name to use for connections to -remoteRead.url. By default the server name from -remoteRead.url is used
|
||||
Optional TLS server name to use for connections to -remoteRead.url. By default, the server name from -remoteRead.url is used
|
||||
-remoteRead.url vmalert
|
||||
Optional URL to datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect.Remote read is used to restore alerts state.This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428. See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.
|
||||
-remoteWrite.basicAuth.password string
|
||||
@@ -1037,7 +1134,7 @@ The shortlist of configuration flags is the following:
|
||||
-remoteWrite.headers string
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
|
||||
-remoteWrite.maxBatchSize int
|
||||
Defines defines max number of timeseries to be flushed at once (default 1000)
|
||||
Defines max number of timeseries to be flushed at once (default 1000)
|
||||
-remoteWrite.maxQueueSize int
|
||||
Defines the max number of pending datapoints to remote write endpoint (default 100000)
|
||||
-remoteWrite.oauth2.clientID string
|
||||
@@ -1055,7 +1152,7 @@ The shortlist of configuration flags is the following:
|
||||
-remoteWrite.showURL
|
||||
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-remoteWrite.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default system CA is used
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default, system CA is used
|
||||
-remoteWrite.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsInsecureSkipVerify
|
||||
@@ -1063,34 +1160,42 @@ The shortlist of configuration flags is the following:
|
||||
-remoteWrite.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsServerName string
|
||||
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used
|
||||
Optional TLS server name to use for connections to -remoteWrite.url. By default, the server name from -remoteWrite.url is used
|
||||
-remoteWrite.url string
|
||||
Optional URL to VictoriaMetrics or vminsert where to persist alerts state and recording rules results in form of timeseries. For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend, '-remoteWrite.showURL'.
|
||||
-replay.disableProgressBar
|
||||
Whether to disable rendering progress bars during the replay. Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.
|
||||
-replay.maxDatapointsPerQuery int
|
||||
Max number of data points expected in one request. The higher the value, the less requests will be made during replay. (default 1000)
|
||||
-replay.maxDatapointsPerQuery /query_range
|
||||
Max number of data points expected in one request. It affects the max time range for every /query_range request during the replay. The higher the value, the less requests will be made during replay. (default 1000)
|
||||
-replay.ruleRetryAttempts int
|
||||
Defines how many retries to make before giving up on rule if request for it returns an error. (default 5)
|
||||
-replay.rulesDelay duration
|
||||
Delay between rules evaluation within the group. Could be important if there are chained rules inside of the groupand processing need to wait for previous rule results to be persisted by remote storage before evaluating the next rule.Keep it equal or bigger than -remoteWrite.flushInterval. (default 1s)
|
||||
Delay between rules evaluation within the group. Could be important if there are chained rules inside the group and processing need to wait for previous rule results to be persisted by remote storage before evaluating the next rule.Keep it equal or bigger than -remoteWrite.flushInterval. (default 1s)
|
||||
-replay.timeFrom string
|
||||
The time filter in RFC3339 format to select time series with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'
|
||||
-replay.timeTo string
|
||||
The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'
|
||||
-rule array
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Path to the files with alerting and/or recording rules.
|
||||
Supports hierarchical patterns and regexpes.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
-rule="/path/to/file". Path to a single file with alerting rules.
|
||||
-rule="http://<some-server-addr>/path/to/rules". HTTP URL to a page with alerting rules.
|
||||
-rule="dir/*.yaml" -rule="/*.yaml" -rule="gcs://vmalert-rules/tenant_%{TENANT_ID}/prod".
|
||||
-rule="dir/**/*.yaml". Includes all the .yaml files in "dir" subfolders recursively.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
|
||||
|
||||
Enterprise version of vmalert supports S3 and GCS paths to rules.
|
||||
For example: gs://bucket/path/to/rules, s3://bucket/path/to/rules
|
||||
S3 and GCS paths support only matching by prefix, e.g. s3://bucket/dir/rule_ matches
|
||||
all files with prefix rule_ in folder dir.
|
||||
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
|
||||
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-rule.configCheckInterval duration
|
||||
Interval for checking for changes in '-rule' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead
|
||||
Interval for checking for changes in '-rule' files. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead
|
||||
-rule.maxResolveDuration duration
|
||||
Limits the maximum duration for automatic alert expiration, which is by default equal to 3 evaluation intervals of the parent group.
|
||||
Limits the maximum duration for automatic alert expiration, which by default is 4 times evaluationInterval of the parent group.
|
||||
-rule.resendDelay duration
|
||||
Minimum amount of time to wait before resending an alert to notifier
|
||||
-rule.templates array
|
||||
@@ -1100,11 +1205,26 @@ The shortlist of configuration flags is the following:
|
||||
-rule.templates="/path/to/file". Path to a single file with go templates
|
||||
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
|
||||
absolute path to all .tpl files in root.
|
||||
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-rule.updateEntriesLimit int
|
||||
Defines the max number of rule's state updates stored in-memory. Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param. (default 20)
|
||||
-rule.validateExpressions
|
||||
Whether to validate rules expressions via MetricsQL engine (default true)
|
||||
-rule.validateTemplates
|
||||
Whether to validate annotation and label templates (default true)
|
||||
-s3.configFilePath string
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.configProfile string
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.customEndpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.forcePathStyle
|
||||
Prefixing endpoint with bucket name when set false, true by default. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default true)
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
@@ -1184,6 +1304,8 @@ dns_sd_configs:
|
||||
```
|
||||
|
||||
The list of configured or discovered Notifiers can be explored via [UI](#Web).
|
||||
If Alertmanager runs in cluster mode then all its URLs needs to be available during discovery
|
||||
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
|
||||
|
||||
The configuration file [specification](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/config.go)
|
||||
is the following:
|
||||
|
||||
@@ -47,10 +47,11 @@ type AlertingRule struct {
|
||||
}
|
||||
|
||||
type alertingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
errors *utils.Gauge
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
seriesFetched *utils.Gauge
|
||||
}
|
||||
|
||||
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
|
||||
@@ -74,10 +75,15 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
Debug: cfg.Debug,
|
||||
}),
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
metrics: &alertingRuleMetrics{},
|
||||
}
|
||||
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
ar.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
@@ -116,6 +122,21 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
e := ar.state.getLast()
|
||||
return float64(e.samples)
|
||||
})
|
||||
ar.metrics.seriesFetched = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_series_fetched{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.seriesFetched == nil {
|
||||
// means seriesFetched is unsupported
|
||||
return -1
|
||||
}
|
||||
seriesFetched := float64(*e.seriesFetched)
|
||||
if seriesFetched == 0 && e.samples > 0 {
|
||||
// `alert: 0.95` will fetch no series
|
||||
// but will get one time series in response.
|
||||
seriesFetched = float64(e.samples)
|
||||
}
|
||||
return seriesFetched
|
||||
})
|
||||
return ar
|
||||
}
|
||||
|
||||
@@ -125,6 +146,7 @@ func (ar *AlertingRule) Close() {
|
||||
ar.metrics.pending.Unregister()
|
||||
ar.metrics.errors.Unregister()
|
||||
ar.metrics.samples.Unregister()
|
||||
ar.metrics.seriesFetched.Unregister()
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
@@ -229,7 +251,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
// to get time series for backfilling.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
||||
func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
series, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -237,7 +259,7 @@ func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
|
||||
}
|
||||
for _, s := range series {
|
||||
for _, s := range res.Data {
|
||||
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
@@ -277,14 +299,15 @@ const resolvedRetention = 15 * time.Minute
|
||||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
qMetrics, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
res, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(qMetrics),
|
||||
err: err,
|
||||
curl: requestToCurl(req),
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(res.Data),
|
||||
seriesFetched: res.SeriesFetched,
|
||||
err: err,
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
@@ -310,11 +333,11 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res, err
|
||||
return res.Data, err
|
||||
}
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
for _, m := range res.Data {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
curState.err = fmt.Errorf("failed to expand labels: %s", err)
|
||||
@@ -416,7 +439,9 @@ func (ar *AlertingRule) UpdateWith(r Rule) error {
|
||||
ar.Labels = nr.Labels
|
||||
ar.Annotations = nr.Annotations
|
||||
ar.EvalInterval = nr.EvalInterval
|
||||
ar.Debug = nr.Debug
|
||||
ar.q = nr.q
|
||||
ar.state = nr.state
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -456,6 +481,7 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
Value: m.Values[0],
|
||||
ActiveAt: start,
|
||||
Expr: ar.Expr,
|
||||
For: ar.For,
|
||||
}
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
|
||||
return a, err
|
||||
@@ -477,20 +503,23 @@ func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
|
||||
func (ar *AlertingRule) ToAPI() APIRule {
|
||||
lastState := ar.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "alerting",
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
Updates: ar.state.getAll(),
|
||||
Type: "alerting",
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
LastSeriesFetched: lastState.seriesFetched,
|
||||
MaxUpdates: ar.state.size(),
|
||||
Updates: ar.state.getAll(),
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
@@ -597,54 +626,60 @@ func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.Time
|
||||
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
|
||||
}
|
||||
|
||||
// Restore restores the state of active alerts basing on previously written time series.
|
||||
// Restore restores only ActiveAt field. Field State will be always Pending and supposed
|
||||
// to be updated on next Exec, as well as Value field.
|
||||
// Only rules with For > 0 will be restored.
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration, labels map[string]string) error {
|
||||
if q == nil {
|
||||
return fmt.Errorf("querier is nil")
|
||||
// Restore restores the value of ActiveAt field for active alerts,
|
||||
// based on previously written time series `alertForStateMetricName`.
|
||||
// Only rules with For > 0 can be restored.
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||
if ar.For < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ts := time.Now()
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res, err
|
||||
ar.alertsMu.Lock()
|
||||
defer ar.alertsMu.Unlock()
|
||||
|
||||
if len(ar.alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// account for external labels in filter
|
||||
var labelsFilter string
|
||||
for k, v := range labels {
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q%s}[%ds])",
|
||||
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
|
||||
qMetrics, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range qMetrics {
|
||||
ls := &labelSet{
|
||||
origin: make(map[string]string, len(m.Labels)),
|
||||
processed: make(map[string]string, len(m.Labels)),
|
||||
for _, a := range ar.alerts {
|
||||
if a.Restored || a.State != notifier.StatePending {
|
||||
continue
|
||||
}
|
||||
for _, l := range m.Labels {
|
||||
if l.Name == "__name__" {
|
||||
continue
|
||||
}
|
||||
ls.origin[l.Name] = l.Value
|
||||
ls.processed[l.Name] = l.Value
|
||||
|
||||
var labelsFilter []string
|
||||
for k, v := range a.Labels {
|
||||
labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
|
||||
}
|
||||
a, err := ar.newAlert(m, ls, time.Unix(int64(m.Values[0]), 0), qFn)
|
||||
sort.Strings(labelsFilter)
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
|
||||
alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
|
||||
|
||||
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create alert: %w", err)
|
||||
return err
|
||||
}
|
||||
a.ID = hash(ls.processed)
|
||||
a.State = notifier.StatePending
|
||||
|
||||
qMetrics := res.Data
|
||||
if len(qMetrics) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
continue
|
||||
}
|
||||
|
||||
// only one series expected in response
|
||||
m := qMetrics[0]
|
||||
// __name__ supposed to be alertForStateMetricName
|
||||
m.DelLabel("__name__")
|
||||
|
||||
// we assume that restore query contains all label matchers,
|
||||
// so all received labels will match anyway if their number is equal.
|
||||
if len(m.Labels) != len(a.Labels) {
|
||||
ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
|
||||
a.Restored = true
|
||||
ar.alerts[a.ID] = a
|
||||
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -6,12 +6,15 @@ import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestAlertingRule_ToTimeSeries(t *testing.T) {
|
||||
@@ -502,118 +505,156 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_Restore(t *testing.T) {
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
metrics []datasource.Metric
|
||||
expAlerts map[uint64]*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestRuleWithLabels("no extra labels"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(nil): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("metric labels"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
alertNameLabel, "metric labels",
|
||||
alertGroupNameLabel, "groupID",
|
||||
"foo", "bar",
|
||||
"namespace", "baz",
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "metric labels",
|
||||
alertGroupNameLabel: "groupID",
|
||||
"foo": "bar",
|
||||
"namespace": "baz",
|
||||
}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("rule labels", "source", "vm"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"foo", "bar",
|
||||
"namespace", "baz",
|
||||
// extra labels set by rule
|
||||
"source", "vm",
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
"foo": "bar",
|
||||
"namespace": "baz",
|
||||
"source": "vm",
|
||||
}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("multiple alerts"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"host", "localhost-1",
|
||||
),
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(2*time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"host", "localhost-2",
|
||||
),
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(3*time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"host", "localhost-3",
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{"host": "localhost-1"}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
hash(map[string]string{"host": "localhost-2"}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(2 * time.Hour)},
|
||||
hash(map[string]string{"host": "localhost-3"}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(3 * time.Hour)},
|
||||
},
|
||||
},
|
||||
func TestGroup_Restore(t *testing.T) {
|
||||
defaultTS := time.Now()
|
||||
fqr := &fakeQuerierWithRegistry{}
|
||||
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
|
||||
t.Helper()
|
||||
defer fqr.reset()
|
||||
|
||||
for _, r := range rules {
|
||||
fqr.set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
||||
}
|
||||
|
||||
fg := newGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
nts := func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} }
|
||||
fg.start(context.Background(), nts, nil, fqr)
|
||||
wg.Done()
|
||||
}()
|
||||
fg.close()
|
||||
wg.Wait()
|
||||
|
||||
gotAlerts := make(map[uint64]*notifier.Alert)
|
||||
for _, rs := range fg.Rules {
|
||||
alerts := rs.(*AlertingRule).alerts
|
||||
for k, v := range alerts {
|
||||
if !v.Restored {
|
||||
// set not restored alerts to predictable timestamp
|
||||
v.ActiveAt = defaultTS
|
||||
}
|
||||
gotAlerts[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
if len(gotAlerts) != len(expAlerts) {
|
||||
t.Fatalf("expected %d alerts; got %d", len(expAlerts), len(gotAlerts))
|
||||
}
|
||||
for key, exp := range expAlerts {
|
||||
got, ok := gotAlerts[key]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have key %d", key)
|
||||
}
|
||||
if got.State != notifier.StatePending {
|
||||
t.Fatalf("expected state %d; got %d", notifier.StatePending, got.State)
|
||||
}
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
}
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
tc.rule.q = fq
|
||||
fq.add(tc.metrics...)
|
||||
if err := tc.rule.Restore(context.TODO(), fq, time.Hour, nil); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
||||
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
||||
}
|
||||
for key, exp := range tc.expAlerts {
|
||||
got, ok := tc.rule.alerts[key]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have key %d", key)
|
||||
}
|
||||
if got.State != exp.State {
|
||||
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
||||
}
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
}
|
||||
|
||||
stateMetric := func(name string, value time.Time, labels ...string) datasource.Metric {
|
||||
labels = append(labels, "__name__", alertForStateMetricName)
|
||||
labels = append(labels, alertNameLabel, name)
|
||||
labels = append(labels, alertGroupNameLabel, "TestRestore")
|
||||
return metricWithValueAndLabels(t, float64(value.Unix()), labels...)
|
||||
}
|
||||
|
||||
// one active alert, no previous state
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
fqr.reset()
|
||||
|
||||
// one active alert with state restore
|
||||
ts := time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, two with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("bar", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// one active alert but wrong state restore
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
||||
stateMetric("wrong alert", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with labels
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with restore labels missmatch
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
@@ -709,7 +750,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"summary": `{{ $labels.alertname }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||
@@ -749,7 +789,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
||||
@@ -789,7 +828,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1,
|
||||
@@ -820,6 +858,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = newRuleState(10)
|
||||
fq.add(tc.metrics...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
@@ -936,6 +975,6 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||
For: waitFor,
|
||||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,16 +5,14 @@ import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/log"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
@@ -38,7 +36,8 @@ type Group struct {
|
||||
Params url.Values `yaml:"params"`
|
||||
// Headers contains optional HTTP headers added to each rule request
|
||||
Headers []Header `yaml:"headers,omitempty"`
|
||||
|
||||
// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
|
||||
NotifierHeaders []Header `yaml:"notifier_headers,omitempty"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
@@ -114,6 +113,9 @@ type Rule struct {
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
Debug bool `yaml:"debug,omitempty"`
|
||||
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
|
||||
// Overrides `-rule.updateEntriesLimit`.
|
||||
UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"`
|
||||
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
@@ -198,21 +200,45 @@ func (r *Rule) Validate() error {
|
||||
// ValidateTplFn must validate the given annotations
|
||||
type ValidateTplFn func(annotations map[string]string) error
|
||||
|
||||
// cLogger is a logger with support of logs suppressing.
|
||||
// it is used when logs emitted by config package needs
|
||||
// to be suppressed.
|
||||
var cLogger = &log.Logger{}
|
||||
|
||||
// ParseSilent parses rule configs from given file patterns without emitting logs
|
||||
func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
cLogger.Suppress(true)
|
||||
defer cLogger.Suppress(false)
|
||||
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
}
|
||||
return parse(files, validateTplFn, validateExpressions)
|
||||
}
|
||||
|
||||
// Parse parses rule configs from given file patterns
|
||||
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
var fp []string
|
||||
for _, pattern := range pathPatterns {
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading file pattern %s: %w", pattern, err)
|
||||
}
|
||||
fp = append(fp, matches...)
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
}
|
||||
groups, err := parse(files, validateTplFn, validateExpressions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %s: %s", pathPatterns, err)
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
}
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func parse(files map[string][]byte, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
errGroup := new(utils.ErrGroup)
|
||||
var groups []Group
|
||||
for _, file := range fp {
|
||||
for file, data := range files {
|
||||
uniqueGroups := map[string]struct{}{}
|
||||
gr, err := parseFile(file)
|
||||
gr, err := parseConfig(data)
|
||||
if err != nil {
|
||||
errGroup.Add(fmt.Errorf("failed to parse file %q: %w", file, err))
|
||||
continue
|
||||
@@ -234,20 +260,19 @@ func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressio
|
||||
if err := errGroup.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
logger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
}
|
||||
sort.SliceStable(groups, func(i, j int) bool {
|
||||
if groups[i].File != groups[j].File {
|
||||
return groups[i].File < groups[j].File
|
||||
}
|
||||
return groups[i].Name < groups[j].Name
|
||||
})
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func parseFile(path string) ([]Group, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
func parseConfig(data []byte) ([]Group, error) {
|
||||
data, err := envtemplate.ReplaceBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading alert rule file %q: %w", path, err)
|
||||
}
|
||||
data, err = envtemplate.ReplaceBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot expand environment vars in %q: %w", path, err)
|
||||
return nil, fmt.Errorf("cannot expand environment vars: %w", err)
|
||||
}
|
||||
g := struct {
|
||||
Groups []Group `yaml:"groups"`
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
@@ -27,6 +29,40 @@ func TestParseGood(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFromURL(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/bad", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte("foo bar"))
|
||||
})
|
||||
mux.HandleFunc("/good-alert", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte(`
|
||||
groups:
|
||||
- name: TestGroup
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: vm_tcplistener_conns > 0`))
|
||||
})
|
||||
mux.HandleFunc("/good-rr", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Write([]byte(`
|
||||
groups:
|
||||
- name: TestGroup
|
||||
rules:
|
||||
- record: conns
|
||||
expr: max(vm_tcplistener_conns)`))
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
if _, err := Parse([]string{srv.URL + "/good-alert", srv.URL + "/good-rr"}, notifier.ValidateTemplates, true); err != nil {
|
||||
t.Errorf("error parsing URLs %s", err)
|
||||
}
|
||||
|
||||
if _, err := Parse([]string{srv.URL + "/bad"}, notifier.ValidateTemplates, true); err == nil {
|
||||
t.Errorf("expected parsing error: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBad(t *testing.T) {
|
||||
testCases := []struct {
|
||||
path []string
|
||||
@@ -64,6 +100,10 @@ func TestParseBad(t *testing.T) {
|
||||
[]string{"testdata/dir/rules6-bad.rules"},
|
||||
"missing ':' in header",
|
||||
},
|
||||
{
|
||||
[]string{"http://unreachable-url"},
|
||||
"failed to read",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
_, err := Parse(tc.path, notifier.ValidateTemplates, true)
|
||||
@@ -102,7 +142,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "group name must be set",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
@@ -113,7 +154,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
@@ -125,7 +167,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
validateExpressions: true,
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
@@ -139,7 +182,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
@@ -156,7 +200,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
validateAnnotations: true,
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
@@ -171,7 +216,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "duplicate",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
@@ -184,7 +230,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "duplicate",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "record", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
@@ -197,7 +244,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "duplicate",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
@@ -210,7 +258,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test",
|
||||
group: &Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
@@ -223,7 +272,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test thanos",
|
||||
group: &Group{
|
||||
Name: "test thanos",
|
||||
Type: NewRawType("thanos"),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
@@ -235,7 +285,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "unknown datasource type",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test graphite",
|
||||
group: &Group{
|
||||
Name: "test graphite",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
@@ -247,7 +298,8 @@ func TestGroup_Validate(t *testing.T) {
|
||||
expErr: "",
|
||||
},
|
||||
{
|
||||
group: &Group{Name: "test prometheus",
|
||||
group: &Group{
|
||||
Name: "test prometheus",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
@@ -538,6 +590,24 @@ rules:
|
||||
`)
|
||||
})
|
||||
|
||||
t.Run("`notifier_headers` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
notifier_headers:
|
||||
- "TenantID: foo"
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`, `
|
||||
name: TestGroup
|
||||
notifier_headers:
|
||||
- "TenantID: bar"
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`)
|
||||
})
|
||||
|
||||
t.Run("`debug` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
@@ -550,6 +620,20 @@ rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
debug: true
|
||||
`)
|
||||
})
|
||||
t.Run("`update_entries_limit` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
update_entries_limit: 33
|
||||
`)
|
||||
})
|
||||
}
|
||||
|
||||
111
app/vmalert/config/fs.go
Normal file
111
app/vmalert/config/fs.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/fslocal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/fsurl"
|
||||
)
|
||||
|
||||
// FS represent a file system abstract for reading files.
|
||||
type FS interface {
|
||||
// Init initializes FS.
|
||||
Init() error
|
||||
|
||||
// String must return human-readable representation of FS.
|
||||
String() string
|
||||
|
||||
// List returns the list of file names which will be read via Read fn
|
||||
List() ([]string, error)
|
||||
|
||||
// Read returns a list of read files in form of a map
|
||||
// where key is a file name and value is a content of read file.
|
||||
// Read must be called only after the successful Init call.
|
||||
Read(files []string) (map[string][]byte, error)
|
||||
}
|
||||
|
||||
var (
|
||||
fsRegistryMu sync.Mutex
|
||||
fsRegistry = make(map[string]FS)
|
||||
)
|
||||
|
||||
// readFromFS parses the given path list and inits FS for each item.
|
||||
// Once initialed, readFromFS will try to read and return files from each FS.
|
||||
// readFromFS returns an error if at least one FS failed to init.
|
||||
// The function can be called multiple times but each unique path
|
||||
// will be initialed only once.
|
||||
//
|
||||
// It is allowed to mix different FS types in path list.
|
||||
func readFromFS(paths []string) (map[string][]byte, error) {
|
||||
var err error
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range paths {
|
||||
fsRegistryMu.Lock()
|
||||
fs, ok := fsRegistry[path]
|
||||
if !ok {
|
||||
fs, err = newFS(path)
|
||||
if err != nil {
|
||||
fsRegistryMu.Unlock()
|
||||
return nil, fmt.Errorf("error while parsing path %q: %w", path, err)
|
||||
}
|
||||
if err := fs.Init(); err != nil {
|
||||
fsRegistryMu.Unlock()
|
||||
return nil, fmt.Errorf("error while initializing path %q: %w", path, err)
|
||||
}
|
||||
fsRegistry[path] = fs
|
||||
}
|
||||
fsRegistryMu.Unlock()
|
||||
|
||||
list, err := fs.List()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list files from %q", fs)
|
||||
}
|
||||
|
||||
cLogger.Infof("found %d files to read from %q", len(list), fs)
|
||||
|
||||
if len(list) < 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
ts := time.Now()
|
||||
files, err := fs.Read(list)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while reading files from %q: %w", fs, err)
|
||||
}
|
||||
cLogger.Infof("finished reading %d files in %v from %q", len(list), time.Since(ts), fs)
|
||||
|
||||
for k, v := range files {
|
||||
if _, ok := result[k]; ok {
|
||||
return nil, fmt.Errorf("duplicate found for file name %q: file names must be unique", k)
|
||||
}
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// newFS creates FS based on the give path.
|
||||
// Supported file systems are: fs
|
||||
func newFS(originPath string) (FS, error) {
|
||||
scheme := "fs"
|
||||
path := originPath
|
||||
n := strings.Index(path, "://")
|
||||
if n >= 0 {
|
||||
scheme = path[:n]
|
||||
path = path[n+len("://"):]
|
||||
}
|
||||
if len(path) == 0 {
|
||||
return nil, fmt.Errorf("path cannot be empty")
|
||||
}
|
||||
switch scheme {
|
||||
case "fs":
|
||||
return &fslocal.FS{Pattern: path}, nil
|
||||
case "http", "https":
|
||||
return &fsurl.FS{Path: originPath}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported scheme %q", scheme)
|
||||
}
|
||||
}
|
||||
39
app/vmalert/config/fs_test.go
Normal file
39
app/vmalert/config/fs_test.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewFS(t *testing.T) {
|
||||
f := func(path, expStr string) {
|
||||
t.Helper()
|
||||
fs, err := newFS(path)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
if fs.String() != expStr {
|
||||
t.Fatalf("expected FS %q; got %q", expStr, fs.String())
|
||||
}
|
||||
}
|
||||
|
||||
f("/foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
|
||||
f("fs:///foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
|
||||
}
|
||||
|
||||
func TestNewFSNegative(t *testing.T) {
|
||||
f := func(path, expErr string) {
|
||||
t.Helper()
|
||||
_, err := newFS(path)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to have err: %s", expErr)
|
||||
}
|
||||
if !strings.Contains(err.Error(), expErr) {
|
||||
t.Fatalf("expected to have err %q; got %q instead", expErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
f("", "path cannot be empty")
|
||||
f("fs://", "path cannot be empty")
|
||||
f("foobar://baz", `unsupported scheme "foobar"`)
|
||||
}
|
||||
50
app/vmalert/config/fslocal/fslocal.go
Normal file
50
app/vmalert/config/fslocal/fslocal.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package fslocal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
)
|
||||
|
||||
// FS represents a local file system
|
||||
type FS struct {
|
||||
// Pattern is used for matching one or multiple files.
|
||||
// The pattern may describe hierarchical names such as
|
||||
// /usr/*/bin/ed (assuming the Separator is '/').
|
||||
Pattern string
|
||||
}
|
||||
|
||||
// Init verifies that configured Pattern is correct
|
||||
func (fs *FS) Init() error {
|
||||
_, err := doublestar.FilepathGlob(fs.Pattern)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
func (fs *FS) String() string {
|
||||
return fmt.Sprintf("Local FS{MatchPattern: %q}", fs.Pattern)
|
||||
}
|
||||
|
||||
// List returns the list of file names which will be read via Read fn
|
||||
func (fs *FS) List() ([]string, error) {
|
||||
matches, err := doublestar.FilepathGlob(fs.Pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while matching files via pattern %s: %w", fs.Pattern, err)
|
||||
}
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
// Read returns a map of read files where
|
||||
// key is the file name and value is file's content.
|
||||
func (fs *FS) Read(files []string) (map[string][]byte, error) {
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range files {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while reading file %q: %w", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
57
app/vmalert/config/fsurl/url.go
Normal file
57
app/vmalert/config/fsurl/url.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package fsurl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
// FS represents a struct which can read content from URL Path
|
||||
type FS struct {
|
||||
// Path defines the URL to read the data from
|
||||
Path string
|
||||
}
|
||||
|
||||
// Init verifies that configured Path is correct
|
||||
func (fs *FS) Init() error {
|
||||
_, err := url.Parse(fs.Path)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
func (fs *FS) String() string {
|
||||
return fmt.Sprintf("URL {Path: %q}", fs.Path)
|
||||
}
|
||||
|
||||
// List returns the list of file names which will be read via Read fn
|
||||
// List isn't supported by FS and reads from Path only
|
||||
func (fs *FS) List() ([]string, error) {
|
||||
return []string{fs.Path}, nil
|
||||
}
|
||||
|
||||
// Read returns a map of read files where
|
||||
// key is the file name and value is file's content.
|
||||
func (fs *FS) Read(files []string) (map[string][]byte, error) {
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range files {
|
||||
resp, err := http.Get(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from %q: %w", path, err)
|
||||
}
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
if len(data) > 4*1024 {
|
||||
data = data[:4*1024]
|
||||
}
|
||||
return nil, fmt.Errorf("unexpected status code when fetching %q: %d, expecting %d; response: %q",
|
||||
path, resp.StatusCode, http.StatusOK, data)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read %q: %s", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
59
app/vmalert/config/log/logger.go
Normal file
59
app/vmalert/config/log/logger.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package log
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// Logger is using lib/logger for logging
|
||||
// but can be suppressed via Suppress method
|
||||
type Logger struct {
|
||||
mu sync.RWMutex
|
||||
disabled bool
|
||||
}
|
||||
|
||||
// Suppress whether to ignore message logging.
|
||||
// Once suppressed, logging continues to be ignored
|
||||
// until logger is un-suppressed.
|
||||
func (l *Logger) Suppress(v bool) {
|
||||
l.mu.Lock()
|
||||
l.disabled = v
|
||||
l.mu.Unlock()
|
||||
}
|
||||
|
||||
func (l *Logger) isDisabled() bool {
|
||||
l.mu.RLock()
|
||||
defer l.mu.RUnlock()
|
||||
return l.disabled
|
||||
}
|
||||
|
||||
// Errorf logs error message.
|
||||
func (l *Logger) Errorf(format string, args ...interface{}) {
|
||||
if l.isDisabled() {
|
||||
return
|
||||
}
|
||||
logger.Errorf(format, args...)
|
||||
}
|
||||
|
||||
// Warnf logs warning message.
|
||||
func (l *Logger) Warnf(format string, args ...interface{}) {
|
||||
if l.isDisabled() {
|
||||
return
|
||||
}
|
||||
logger.Warnf(format, args...)
|
||||
}
|
||||
|
||||
// Infof logs info message.
|
||||
func (l *Logger) Infof(format string, args ...interface{}) {
|
||||
if l.isDisabled() {
|
||||
return
|
||||
}
|
||||
logger.Infof(format, args...)
|
||||
}
|
||||
|
||||
// Panicf logs panic message and panics.
|
||||
// Panicf can't be suppressed
|
||||
func (l *Logger) Panicf(format string, args ...interface{}) {
|
||||
logger.Panicf(format, args...)
|
||||
}
|
||||
54
app/vmalert/config/log/logger_test.go
Normal file
54
app/vmalert/config/log/logger_test.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package log
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
func TestOutput(t *testing.T) {
|
||||
testOutput := &bytes.Buffer{}
|
||||
logger.SetOutputForTests(testOutput)
|
||||
defer logger.ResetOutputForTest()
|
||||
|
||||
log := &Logger{}
|
||||
|
||||
mustMatch := func(exp string) {
|
||||
t.Helper()
|
||||
if exp == "" {
|
||||
if testOutput.String() != "" {
|
||||
t.Errorf("expected output to be empty; got %q", testOutput.String())
|
||||
return
|
||||
}
|
||||
}
|
||||
if !strings.Contains(testOutput.String(), exp) {
|
||||
t.Errorf("output %q should contain %q", testOutput.String(), exp)
|
||||
}
|
||||
fmt.Println(testOutput.String())
|
||||
testOutput.Reset()
|
||||
}
|
||||
|
||||
log.Warnf("foo")
|
||||
mustMatch("foo")
|
||||
|
||||
log.Infof("info %d", 2)
|
||||
mustMatch("info 2")
|
||||
|
||||
log.Errorf("error %s %d", "baz", 5)
|
||||
mustMatch("error baz 5")
|
||||
|
||||
log.Suppress(true)
|
||||
|
||||
log.Warnf("foo")
|
||||
mustMatch("")
|
||||
|
||||
log.Infof("info %d", 2)
|
||||
mustMatch("")
|
||||
|
||||
log.Errorf("error %q %d", "baz", 5)
|
||||
mustMatch("")
|
||||
|
||||
}
|
||||
@@ -5,6 +5,8 @@ groups:
|
||||
limit: 1000
|
||||
headers:
|
||||
- "MyHeader: foo"
|
||||
notifier_headers:
|
||||
- "MyHeader: foo"
|
||||
params:
|
||||
denyPartialResponse: ["true"]
|
||||
rules:
|
||||
@@ -12,6 +14,7 @@ groups:
|
||||
expr: vm_tcplistener_conns > 0
|
||||
for: 3m
|
||||
debug: true
|
||||
update_entries_limit: 40
|
||||
annotations:
|
||||
labels: "Available labels: {{ $labels }}"
|
||||
summary: Too high connection number for {{ $labels.instance }}
|
||||
@@ -20,6 +23,7 @@ groups:
|
||||
{{ end }}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
update_entries_limit: -1
|
||||
expr: sum by(job)
|
||||
(up == 1)
|
||||
labels:
|
||||
|
||||
@@ -7,6 +7,7 @@ groups:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
|
||||
for: 3m
|
||||
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
|
||||
@@ -13,11 +13,22 @@ type Querier interface {
|
||||
// It returns list of Metric in response, the http.Request used for sending query
|
||||
// and error if any. Returned http.Request can't be reused and its body is already read.
|
||||
// Query should stop once ctx is cancelled.
|
||||
Query(ctx context.Context, query string, ts time.Time) ([]Metric, *http.Request, error)
|
||||
Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error)
|
||||
// QueryRange executes range request with the given query on the given time range.
|
||||
// It returns list of Metric in response and error if any.
|
||||
// QueryRange should stop once ctx is cancelled.
|
||||
QueryRange(ctx context.Context, query string, from, to time.Time) ([]Metric, error)
|
||||
QueryRange(ctx context.Context, query string, from, to time.Time) (Result, error)
|
||||
}
|
||||
|
||||
// Result represents expected response from the datasource
|
||||
type Result struct {
|
||||
// Data contains list of received Metric
|
||||
Data []Metric
|
||||
// SeriesFetched contains amount of time series processed by datasource
|
||||
// during query evaluation.
|
||||
// If nil, then this feature is not supported by the datasource.
|
||||
// SeriesFetched is supported by VictoriaMetrics since v1.90.
|
||||
SeriesFetched *int
|
||||
}
|
||||
|
||||
// QuerierBuilder builds Querier with given params.
|
||||
@@ -72,6 +83,15 @@ func (m *Metric) AddLabel(key, value string) {
|
||||
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
||||
}
|
||||
|
||||
// DelLabel deletes the given label from the label set
|
||||
func (m *Metric) DelLabel(key string) {
|
||||
for i, l := range m.Labels {
|
||||
if l.Name == key {
|
||||
m.Labels = append(m.Labels[:i], m.Labels[i+1:]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Label returns the given label value.
|
||||
// If label is missing empty string will be returned
|
||||
func (m *Metric) Label(key string) string {
|
||||
|
||||
@@ -47,7 +47,7 @@ var (
|
||||
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
|
||||
"If set to 0, rule's evaluation interval will be used instead.")
|
||||
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Whether to align "time" parameter with evaluation interval.`+
|
||||
"Alignment supposed to produce deterministic results despite of number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
|
||||
"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
|
||||
disableKeepAlive = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
|
||||
`If true, disables HTTP keep-alives and will only use the connection to the server for a single HTTP request.`)
|
||||
|
||||
@@ -2,6 +2,7 @@ package datasource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -98,10 +99,10 @@ func NewVMStorage(baseURL string, authCfg *promauth.Config, lookBack time.Durati
|
||||
}
|
||||
|
||||
// Query executes the given query and returns parsed response
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Metric, *http.Request, error) {
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
|
||||
req, err := s.newRequestPOST()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return Result{}, nil, err
|
||||
}
|
||||
|
||||
switch s.dataSourceType {
|
||||
@@ -110,12 +111,12 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Me
|
||||
case datasourceGraphite:
|
||||
s.setGraphiteReqParams(req, query, ts)
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
|
||||
return Result{}, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
|
||||
}
|
||||
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
return nil, req, err
|
||||
return Result{}, req, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
@@ -132,24 +133,24 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Me
|
||||
// QueryRange executes the given query on the given time range.
|
||||
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
|
||||
// Graphite type isn't supported.
|
||||
func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end time.Time) ([]Metric, error) {
|
||||
func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
|
||||
if s.dataSourceType != datasourcePrometheus {
|
||||
return nil, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
|
||||
return res, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
|
||||
}
|
||||
req, err := s.newRequestPOST()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
if start.IsZero() {
|
||||
return nil, fmt.Errorf("start param is missing")
|
||||
return res, fmt.Errorf("start param is missing")
|
||||
}
|
||||
if end.IsZero() {
|
||||
return nil, fmt.Errorf("end param is missing")
|
||||
return res, fmt.Errorf("end param is missing")
|
||||
}
|
||||
s.setPrometheusRangeReqParams(req, query, start, end)
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
@@ -162,6 +163,11 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
|
||||
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, req.URL.RawQuery)
|
||||
}
|
||||
resp, err := s.c.Do(req.WithContext(ctx))
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
resp, err = s.c.Do(req.WithContext(ctx))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting response from %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
@@ -174,7 +180,7 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
|
||||
}
|
||||
|
||||
func (s *VMStorage) newRequestPOST() (*http.Request, error) {
|
||||
req, err := http.NewRequest("POST", s.datasourceURL, nil)
|
||||
req, err := http.NewRequest(http.MethodPost, s.datasourceURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -35,12 +35,12 @@ func (r graphiteResponse) metrics() []Metric {
|
||||
return ms
|
||||
}
|
||||
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
|
||||
r := &graphiteResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
return r.metrics(), nil
|
||||
return Result{Data: r.metrics()}, nil
|
||||
}
|
||||
|
||||
const (
|
||||
|
||||
@@ -22,6 +22,10 @@ type promResponse struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
} `json:"data"`
|
||||
// Stats supported by VictoriaMetrics since v1.90
|
||||
Stats struct {
|
||||
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
||||
} `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type promInstant struct {
|
||||
@@ -96,39 +100,54 @@ const (
|
||||
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
||||
)
|
||||
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||
r := &promResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL.Redacted(), err)
|
||||
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return res, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return nil, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return nil, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
}
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
var pi promInstant
|
||||
if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
|
||||
return nil, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
|
||||
return res, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
return pi.metrics()
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
var pr promRange
|
||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
return pr.metrics()
|
||||
parseFn = pr.metrics
|
||||
case rScalar:
|
||||
var ps promScalar
|
||||
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
return ps.metrics()
|
||||
parseFn = ps.metrics
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||
}
|
||||
|
||||
ms, err := parseFn()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res = Result{Data: ms}
|
||||
if r.Stats.SeriesFetched != nil {
|
||||
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
|
||||
}
|
||||
res.SeriesFetched = &intV
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||
|
||||
@@ -35,13 +35,6 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, request *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 8:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
@@ -62,22 +55,28 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
conn, _, _ := w.(http.Hijacker).Hijack()
|
||||
_ = conn.Close()
|
||||
case 1:
|
||||
w.WriteHeader(500)
|
||||
case 2:
|
||||
case 1:
|
||||
w.Write([]byte("[]"))
|
||||
case 3:
|
||||
case 2:
|
||||
w.Write([]byte(`{"status":"error", "errorType":"type:", "error":"some error msg"}`))
|
||||
case 4:
|
||||
case 3:
|
||||
w.Write([]byte(`{"status":"unknown"}`))
|
||||
case 5:
|
||||
case 4:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
|
||||
case 6:
|
||||
case 5:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
case 7:
|
||||
case 6:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
case 7:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, request *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 8:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -95,24 +94,27 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
ts := time.Now()
|
||||
|
||||
expErr := func(err string) {
|
||||
if _, _, err := pq.Query(ctx, query, ts); err == nil {
|
||||
_, _, gotErr := pq.Query(ctx, query, ts)
|
||||
if gotErr == nil {
|
||||
t.Fatalf("expected %q got nil", err)
|
||||
}
|
||||
if !strings.Contains(gotErr.Error(), err) {
|
||||
t.Fatalf("expected err %q; got %q", err, gotErr)
|
||||
}
|
||||
}
|
||||
|
||||
expErr("connection error") // 0
|
||||
expErr("invalid response status error") // 1
|
||||
expErr("response body error") // 2
|
||||
expErr("error status") // 3
|
||||
expErr("unknown status") // 4
|
||||
expErr("non-vector resultType error") // 5
|
||||
expErr("500") // 0
|
||||
expErr("error parsing prometheus metrics") // 1
|
||||
expErr("response error") // 2
|
||||
expErr("unknown status") // 3
|
||||
expErr("unexpected end of JSON input") // 4
|
||||
|
||||
m, _, err := pq.Query(ctx, query, ts) // 6 - vector
|
||||
res, _, err := pq.Query(ctx, query, ts) // 5 - vector
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 2 {
|
||||
t.Fatalf("expected 2 metrics got %d in %+v", len(m), m)
|
||||
if len(res.Data) != 2 {
|
||||
t.Fatalf("expected 2 metrics got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
expected := []Metric{
|
||||
{
|
||||
@@ -126,17 +128,17 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
Values: []float64{2000},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, m, expected)
|
||||
metricsEqual(t, res.Data, expected)
|
||||
|
||||
m, req, err := pq.Query(ctx, query, ts) // 7 - scalar
|
||||
res, req, err := pq.Query(ctx, query, ts) // 6 - scalar
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if req == nil {
|
||||
t.Fatalf("expected request to be non-nil")
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(m), m)
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
expected = []Metric{
|
||||
{
|
||||
@@ -144,18 +146,44 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
Values: []float64{1},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(m, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m, expected)
|
||||
if !reflect.DeepEqual(res.Data, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", res.Data, expected)
|
||||
}
|
||||
|
||||
if res.SeriesFetched != nil {
|
||||
t.Fatalf("expected `seriesFetched` field to be nil when it is missing in datasource response; got %v instead",
|
||||
res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, query, ts) // 7 - scalar with stats
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(res.Data), res)
|
||||
}
|
||||
expected = []Metric{
|
||||
{
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{1},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(res.Data, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", res.Data, expected)
|
||||
}
|
||||
if *res.SeriesFetched != 42 {
|
||||
t.Fatalf("expected `seriesFetched` field to be 42; got %d instead",
|
||||
*res.SeriesFetched)
|
||||
}
|
||||
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
m, _, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
|
||||
res, _, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
exp := []Metric{
|
||||
{
|
||||
@@ -164,7 +192,77 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
Values: []float64{10},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, m, exp)
|
||||
metricsEqual(t, res.Data, exp)
|
||||
|
||||
}
|
||||
|
||||
func TestVMInstantQueryWithRetry(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.URL.Query().Get("query") != query {
|
||||
t.Errorf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
case 1:
|
||||
conn, _, _ := w.(http.Hijacker).Hijack()
|
||||
_ = conn.Close()
|
||||
case 2:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "2"]}}`))
|
||||
case 3:
|
||||
conn, _, _ := w.(http.Hijacker).Hijack()
|
||||
_ = conn.Close()
|
||||
case 4:
|
||||
conn, _, _ := w.(http.Hijacker).Hijack()
|
||||
_ = conn.Close()
|
||||
}
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
s := NewVMStorage(srv.URL, nil, time.Minute, 0, false, srv.Client())
|
||||
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus)})
|
||||
|
||||
expErr := func(err string) {
|
||||
_, _, gotErr := pq.Query(ctx, query, time.Now())
|
||||
if gotErr == nil {
|
||||
t.Fatalf("expected %q got nil", err)
|
||||
}
|
||||
if !strings.Contains(gotErr.Error(), err) {
|
||||
t.Fatalf("expected err %q; got %q", err, gotErr)
|
||||
}
|
||||
}
|
||||
|
||||
expValue := func(v float64) {
|
||||
res, _, err := pq.Query(ctx, query, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(m), m)
|
||||
}
|
||||
expected := []Metric{
|
||||
{
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{v},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(m, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m, expected)
|
||||
}
|
||||
}
|
||||
|
||||
expValue(1) // 0
|
||||
expValue(2) // 1 - fail, 2 - retry
|
||||
expErr("EOF") // 3, 4 - retries
|
||||
}
|
||||
|
||||
func metricsEqual(t *testing.T, gotM, expectedM []Metric) {
|
||||
@@ -250,10 +348,11 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
|
||||
start, end := time.Now().Add(-time.Minute), time.Now()
|
||||
|
||||
m, err := pq.QueryRange(ctx, query, start, end)
|
||||
res, err := pq.QueryRange(ctx, query, start, end)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net/url"
|
||||
@@ -35,15 +36,19 @@ type Group struct {
|
||||
Checksum string
|
||||
LastEvaluation time.Time
|
||||
|
||||
Labels map[string]string
|
||||
Params url.Values
|
||||
Headers map[string]string
|
||||
Labels map[string]string
|
||||
Params url.Values
|
||||
Headers map[string]string
|
||||
NotifierHeaders map[string]string
|
||||
|
||||
doneCh chan struct{}
|
||||
finishedCh chan struct{}
|
||||
// channel accepts new Group obj
|
||||
// which supposed to update current group
|
||||
updateCh chan *Group
|
||||
// evalCancel stores the cancel fn for interrupting
|
||||
// rules evaluation. Used on groups update() and close().
|
||||
evalCancel context.CancelFunc
|
||||
|
||||
metrics *groupMetrics
|
||||
}
|
||||
@@ -89,16 +94,17 @@ func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[s
|
||||
|
||||
func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
|
||||
g := &Group{
|
||||
Type: cfg.Type,
|
||||
Name: cfg.Name,
|
||||
File: cfg.File,
|
||||
Interval: cfg.Interval.Duration(),
|
||||
Limit: cfg.Limit,
|
||||
Concurrency: cfg.Concurrency,
|
||||
Checksum: cfg.Checksum,
|
||||
Params: cfg.Params,
|
||||
Headers: make(map[string]string),
|
||||
Labels: cfg.Labels,
|
||||
Type: cfg.Type,
|
||||
Name: cfg.Name,
|
||||
File: cfg.File,
|
||||
Interval: cfg.Interval.Duration(),
|
||||
Limit: cfg.Limit,
|
||||
Concurrency: cfg.Concurrency,
|
||||
Checksum: cfg.Checksum,
|
||||
Params: cfg.Params,
|
||||
Headers: make(map[string]string),
|
||||
NotifierHeaders: make(map[string]string),
|
||||
Labels: cfg.Labels,
|
||||
|
||||
doneCh: make(chan struct{}),
|
||||
finishedCh: make(chan struct{}),
|
||||
@@ -113,6 +119,9 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
for _, h := range cfg.Headers {
|
||||
g.Headers[h.Key] = h.Value
|
||||
}
|
||||
for _, h := range cfg.NotifierHeaders {
|
||||
g.NotifierHeaders[h.Key] = h.Value
|
||||
}
|
||||
g.metrics = newGroupMetrics(g)
|
||||
rules := make([]Rule, len(cfg.Rules))
|
||||
for i, r := range cfg.Rules {
|
||||
@@ -158,23 +167,23 @@ func (g *Group) ID() uint64 {
|
||||
}
|
||||
|
||||
// Restore restores alerts state for group rules
|
||||
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, lookback time.Duration, labels map[string]string) error {
|
||||
labels = mergeLabels(g.Name, "", labels, g.Labels)
|
||||
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
||||
for _, rule := range g.Rules {
|
||||
rr, ok := rule.(*AlertingRule)
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if rr.For < 1 {
|
||||
if ar.For < 1 {
|
||||
continue
|
||||
}
|
||||
// ignore QueryParams on purpose, because they could contain
|
||||
// query filters. This may affect the restore procedure.
|
||||
q := qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: g.Type.String(),
|
||||
Headers: g.Headers,
|
||||
DataSourceType: g.Type.String(),
|
||||
EvaluationInterval: g.Interval,
|
||||
QueryParams: g.Params,
|
||||
Headers: g.Headers,
|
||||
Debug: ar.Debug,
|
||||
})
|
||||
if err := rr.Restore(ctx, q, lookback, labels); err != nil {
|
||||
if err := ar.Restore(ctx, q, ts, lookback); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
||||
}
|
||||
}
|
||||
@@ -226,6 +235,7 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
g.Concurrency = newGroup.Concurrency
|
||||
g.Params = newGroup.Params
|
||||
g.Headers = newGroup.Headers
|
||||
g.NotifierHeaders = newGroup.NotifierHeaders
|
||||
g.Labels = newGroup.Labels
|
||||
g.Limit = newGroup.Limit
|
||||
g.Checksum = newGroup.Checksum
|
||||
@@ -233,11 +243,24 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// interruptEval interrupts in-flight rules evaluations
|
||||
// within the group. It is expected that g.evalCancel
|
||||
// will be repopulated after the call.
|
||||
func (g *Group) interruptEval() {
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
|
||||
if g.evalCancel != nil {
|
||||
g.evalCancel()
|
||||
}
|
||||
}
|
||||
|
||||
func (g *Group) close() {
|
||||
if g.doneCh == nil {
|
||||
return
|
||||
}
|
||||
close(g.doneCh)
|
||||
g.interruptEval()
|
||||
<-g.finishedCh
|
||||
|
||||
g.metrics.iterationDuration.Unregister()
|
||||
@@ -251,14 +274,9 @@ func (g *Group) close() {
|
||||
|
||||
var skipRandSleepOnGroupStart bool
|
||||
|
||||
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client) {
|
||||
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
|
||||
e := &executor{
|
||||
rw: rw,
|
||||
notifiers: nts,
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label)}
|
||||
|
||||
// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
|
||||
if !skipRandSleepOnGroupStart {
|
||||
randSleep := uint64(float64(g.Interval) * (float64(g.ID()) / (1 << 64)))
|
||||
@@ -279,11 +297,18 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||
}
|
||||
}
|
||||
|
||||
e := &executor{
|
||||
rw: rw,
|
||||
notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
evalTS := time.Now()
|
||||
|
||||
logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
|
||||
eval := func(ts time.Time) {
|
||||
eval := func(ctx context.Context, ts time.Time) {
|
||||
g.metrics.iterationTotal.Inc()
|
||||
|
||||
start := time.Now()
|
||||
@@ -305,10 +330,26 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||
g.LastEvaluation = start
|
||||
}
|
||||
|
||||
eval(evalTS)
|
||||
evalCtx, cancel := context.WithCancel(ctx)
|
||||
g.mu.Lock()
|
||||
g.evalCancel = cancel
|
||||
g.mu.Unlock()
|
||||
defer g.evalCancel()
|
||||
|
||||
eval(evalCtx, evalTS)
|
||||
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
err := g.Restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -319,6 +360,14 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||
return
|
||||
case ng := <-g.updateCh:
|
||||
g.mu.Lock()
|
||||
|
||||
// it is expected that g.evalCancel will be evoked
|
||||
// somewhere else to unblock group from the rules evaluation.
|
||||
// we recreate the evalCtx and g.evalCancel, so it can
|
||||
// be called again.
|
||||
evalCtx, cancel = context.WithCancel(ctx)
|
||||
g.evalCancel = cancel
|
||||
|
||||
err := g.updateWith(ng)
|
||||
if err != nil {
|
||||
logger.Errorf("group %q: failed to update: %s", g.Name, err)
|
||||
@@ -329,6 +378,8 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||
// ensure that staleness is tracked or existing rules only
|
||||
e.purgeStaleSeries(g.Rules)
|
||||
|
||||
e.notifierHeaders = g.NotifierHeaders
|
||||
|
||||
if g.Interval != ng.Interval {
|
||||
g.Interval = ng.Interval
|
||||
t.Stop()
|
||||
@@ -343,7 +394,7 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||
}
|
||||
evalTS = evalTS.Add((missed + 1) * g.Interval)
|
||||
|
||||
eval(evalTS)
|
||||
eval(evalCtx, evalTS)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -362,8 +413,10 @@ func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Du
|
||||
}
|
||||
|
||||
type executor struct {
|
||||
notifiers func() []notifier.Notifier
|
||||
rw *remotewrite.Client
|
||||
notifiers func() []notifier.Notifier
|
||||
notifierHeaders map[string]string
|
||||
|
||||
rw *remotewrite.Client
|
||||
|
||||
previouslySentSeriesToRWMu sync.Mutex
|
||||
// previouslySentSeriesToRW stores series sent to RW on previous iteration
|
||||
@@ -417,6 +470,11 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||
|
||||
tss, err := rule.Exec(ctx, ts, limit)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
// the context can be cancelled on graceful shutdown
|
||||
// or on group update. So no need to handle the error as usual.
|
||||
return nil
|
||||
}
|
||||
execErrors.Inc()
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
|
||||
}
|
||||
@@ -458,7 +516,7 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||
for _, nt := range e.notifiers() {
|
||||
wg.Add(1)
|
||||
go func(nt notifier.Notifier) {
|
||||
if err := nt.Send(ctx, alerts); err != nil {
|
||||
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", rule, nt.Addr(), err))
|
||||
}
|
||||
wg.Done()
|
||||
|
||||
@@ -209,7 +209,7 @@ func TestGroupStart(t *testing.T) {
|
||||
fs.add(m1)
|
||||
fs.add(m2)
|
||||
go func() {
|
||||
g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil)
|
||||
g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
@@ -460,7 +460,7 @@ func TestFaultyRW(t *testing.T) {
|
||||
|
||||
r := &RecordingRule{
|
||||
Name: "test",
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
q: fq,
|
||||
}
|
||||
|
||||
@@ -474,3 +474,31 @@ func TestFaultyRW(t *testing.T) {
|
||||
t.Fatalf("expected to get an error from faulty RW client, got nil instead")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCloseWithEvalInterruption(t *testing.T) {
|
||||
groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse rules: %s", err)
|
||||
}
|
||||
|
||||
const delay = time.Second * 2
|
||||
fq := &fakeQuerierWithDelay{delay: delay}
|
||||
|
||||
const evalInterval = time.Millisecond
|
||||
g := newGroup(groups[0], fq, evalInterval, nil)
|
||||
|
||||
go g.start(context.Background(), nil, nil, nil)
|
||||
|
||||
time.Sleep(evalInterval * 20)
|
||||
|
||||
go func() {
|
||||
g.close()
|
||||
}()
|
||||
|
||||
deadline := time.Tick(delay / 2)
|
||||
select {
|
||||
case <-deadline:
|
||||
t.Fatalf("deadline for close exceeded")
|
||||
case <-g.finishedCh:
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,21 +44,82 @@ func (fq *fakeQuerier) BuildWithParams(_ datasource.QuerierParams) datasource.Qu
|
||||
return fq
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) ([]datasource.Metric, error) {
|
||||
func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
|
||||
req, _, err := fq.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) ([]datasource.Metric, *http.Request, error) {
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) (datasource.Result, *http.Request, error) {
|
||||
fq.Lock()
|
||||
defer fq.Unlock()
|
||||
if fq.err != nil {
|
||||
return nil, nil, fq.err
|
||||
return datasource.Result{}, nil, fq.err
|
||||
}
|
||||
cp := make([]datasource.Metric, len(fq.metrics))
|
||||
copy(cp, fq.metrics)
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
return cp, req, nil
|
||||
return datasource.Result{Data: cp}, req, nil
|
||||
}
|
||||
|
||||
type fakeQuerierWithRegistry struct {
|
||||
sync.Mutex
|
||||
registry map[string][]datasource.Metric
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) set(key string, metrics ...datasource.Metric) {
|
||||
fqr.Lock()
|
||||
if fqr.registry == nil {
|
||||
fqr.registry = make(map[string][]datasource.Metric)
|
||||
}
|
||||
fqr.registry[key] = metrics
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) reset() {
|
||||
fqr.Lock()
|
||||
fqr.registry = nil
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
|
||||
return fqr
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
|
||||
req, _, err := fqr.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (datasource.Result, *http.Request, error) {
|
||||
fqr.Lock()
|
||||
defer fqr.Unlock()
|
||||
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
metrics, ok := fqr.registry[expr]
|
||||
if !ok {
|
||||
return datasource.Result{}, req, nil
|
||||
}
|
||||
cp := make([]datasource.Metric, len(metrics))
|
||||
copy(cp, metrics)
|
||||
return datasource.Result{Data: cp}, req, nil
|
||||
}
|
||||
|
||||
type fakeQuerierWithDelay struct {
|
||||
fakeQuerier
|
||||
delay time.Duration
|
||||
}
|
||||
|
||||
func (fqd *fakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (datasource.Result, *http.Request, error) {
|
||||
timer := time.NewTimer(fqd.delay)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-timer.C:
|
||||
}
|
||||
return fqd.fakeQuerier.Query(ctx, expr, ts)
|
||||
}
|
||||
|
||||
func (fqd *fakeQuerierWithDelay) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
|
||||
return fqd
|
||||
}
|
||||
|
||||
type fakeNotifier struct {
|
||||
@@ -70,7 +131,7 @@ type fakeNotifier struct {
|
||||
|
||||
func (*fakeNotifier) Close() {}
|
||||
func (*fakeNotifier) Addr() string { return "" }
|
||||
func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert) error {
|
||||
func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert, _ map[string]string) error {
|
||||
fn.Lock()
|
||||
defer fn.Unlock()
|
||||
fn.counter += len(alerts)
|
||||
@@ -94,7 +155,7 @@ type faultyNotifier struct {
|
||||
fakeNotifier
|
||||
}
|
||||
|
||||
func (fn *faultyNotifier) Send(ctx context.Context, _ []notifier.Alert) error {
|
||||
func (fn *faultyNotifier) Send(ctx context.Context, _ []notifier.Alert, _ map[string]string) error {
|
||||
d, ok := ctx.Deadline()
|
||||
if ok {
|
||||
time.Sleep(time.Until(d))
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
@@ -24,52 +26,67 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rulePath = flagutil.NewArrayString("rule", `Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
rulePath = flagutil.NewArrayString("rule", `Path to the files or http url with alerting and/or recording rules.
|
||||
Supports hierarchical patterns and regexpes.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.`)
|
||||
-rule="/path/to/file". Path to a single file with alerting rules.
|
||||
-rule="http://<some-server-addr>/path/to/rules". HTTP URL to a page with alerting rules.
|
||||
-rule="dir/*.yaml" -rule="/*.yaml" -rule="gcs://vmalert-rules/tenant_%{TENANT_ID}/prod".
|
||||
-rule="dir/**/*.yaml". Includes all the .yaml files in "dir" subfolders recursively.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
|
||||
|
||||
Enterprise version of vmalert supports S3 and GCS paths to rules.
|
||||
For example: gs://bucket/path/to/rules, s3://bucket/path/to/rules
|
||||
S3 and GCS paths support only matching by prefix, e.g. s3://bucket/dir/rule_ matches
|
||||
all files with prefix rule_ in folder dir.
|
||||
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
|
||||
`)
|
||||
|
||||
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions
|
||||
for rules annotations templating. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule.templates="/path/to/file". Path to a single file with go templates
|
||||
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
|
||||
absolute path to all .tpl files in root.`)
|
||||
absolute path to all .tpl files in root.
|
||||
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
|
||||
`)
|
||||
|
||||
rulesCheckInterval = flag.Duration("rule.configCheckInterval", 0, "Interval for checking for changes in '-rule' files. "+
|
||||
"By default the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead")
|
||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead")
|
||||
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||
"By default the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
|
||||
|
||||
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
|
||||
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
||||
"which is by default equal to 3 evaluation intervals of the parent group.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
|
||||
"which by default is 4 times evaluationInterval of the parent group.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
|
||||
`for cases where you want to build a custom link to Grafana, Prometheus or any other service. `+
|
||||
`Supports templating - see https://docs.victoriametrics.com/vmalert.html#templating . `+
|
||||
`For example, link to Grafana: -external.alert.source='explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr":{{$expr|jsonEscape|queryEscape}} },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]' . `+
|
||||
`For example, link to Grafana: -external.alert.source='explore?orgId=1&left={"datasource":"VictoriaMetrics","queries":[{"expr":{{$expr|jsonEscape|queryEscape}},"refId":"A"}],"range":{"from":"now-1h","to":"now"}}'. `+
|
||||
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
|
||||
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
||||
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
||||
"Pass multiple -label flags in order to add multiple label sets.")
|
||||
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup.")
|
||||
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases.")
|
||||
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
|
||||
@@ -90,6 +107,10 @@ func main() {
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
logger.Warnf("flag `remoteRead.ignoreRestoreErrors` is deprecated and will be removed in next releases.")
|
||||
}
|
||||
|
||||
err := templates.Load(*ruleTemplatesPath, true)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to parse %q: %s", *ruleTemplatesPath, err)
|
||||
@@ -168,7 +189,7 @@ func main() {
|
||||
go configReload(ctx, manager, groupsCfg, sighupCh)
|
||||
|
||||
rh := &requestHandler{m: manager}
|
||||
go httpserver.Serve(*httpListenAddr, rh.handler)
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, rh.handler)
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("service received signal %s", sig)
|
||||
@@ -266,7 +287,10 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
||||
"tpl": externalAlertSource,
|
||||
}
|
||||
return func(alert notifier.Alert) string {
|
||||
templated, err := alert.ExecTemplate(nil, alert.Labels, m)
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
|
||||
}
|
||||
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
|
||||
if err != nil {
|
||||
logger.Errorf("can not exec source template %s", err)
|
||||
}
|
||||
@@ -304,6 +328,7 @@ func configReload(ctx context.Context, m *manager, groupsCfg []config.Group, sig
|
||||
// init reload metrics with positive values to improve alerting conditions
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
parseFn := config.Parse
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -315,7 +340,11 @@ func configReload(ctx context.Context, m *manager, groupsCfg []config.Group, sig
|
||||
}
|
||||
logger.Infof("SIGHUP received. Going to reload rules %q %s...", *rulePath, tmplMsg)
|
||||
configReloads.Inc()
|
||||
// allow logs emitting during manual config reload
|
||||
parseFn = config.Parse
|
||||
case <-configCheckCh:
|
||||
// disable logs emitting during per-interval config reload
|
||||
parseFn = config.ParseSilent
|
||||
}
|
||||
if err := notifier.Reload(); err != nil {
|
||||
configReloadErrors.Inc()
|
||||
@@ -330,7 +359,7 @@ func configReload(ctx context.Context, m *manager, groupsCfg []config.Group, sig
|
||||
logger.Errorf("failed to load new templates: %s", err)
|
||||
continue
|
||||
}
|
||||
newGroupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
|
||||
newGroupsCfg, err := parseFn(*rulePath, validateTplFn, *validateExpressions)
|
||||
if err != nil {
|
||||
configReloadErrors.Inc()
|
||||
configSuccess.Set(0)
|
||||
|
||||
@@ -82,24 +82,18 @@ func (m *manager) close() {
|
||||
m.wg.Wait()
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) error {
|
||||
if restore && m.rr != nil {
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack, m.labels)
|
||||
if err != nil {
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
return fmt.Errorf("failed to restore ruleState for group %q: %w", group.Name, err)
|
||||
}
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", group.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, g *Group, restore bool) error {
|
||||
m.wg.Add(1)
|
||||
id := group.ID()
|
||||
id := g.ID()
|
||||
go func() {
|
||||
group.start(ctx, m.notifiers, m.rw)
|
||||
m.wg.Done()
|
||||
defer m.wg.Done()
|
||||
if restore {
|
||||
g.start(ctx, m.notifiers, m.rw, m.rr)
|
||||
} else {
|
||||
g.start(ctx, m.notifiers, m.rw, nil)
|
||||
}
|
||||
}()
|
||||
m.groups[id] = group
|
||||
m.groups[id] = g
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -153,6 +147,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
}
|
||||
for _, ng := range groupsRegistry {
|
||||
if err := m.startGroup(ctx, ng, restore); err != nil {
|
||||
m.groupsMu.Unlock()
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -166,6 +161,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
old.updateCh <- new
|
||||
wg.Done()
|
||||
}(item.old, item.new)
|
||||
item.old.interruptEval()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
@@ -180,15 +176,17 @@ func (g *Group) toAPI() APIGroup {
|
||||
// encode as string to avoid rounding
|
||||
ID: fmt.Sprintf("%d", g.ID()),
|
||||
|
||||
Name: g.Name,
|
||||
Type: g.Type.String(),
|
||||
File: g.File,
|
||||
Interval: g.Interval.Seconds(),
|
||||
LastEvaluation: g.LastEvaluation,
|
||||
Concurrency: g.Concurrency,
|
||||
Params: urlValuesToStrings(g.Params),
|
||||
Headers: headersToStrings(g.Headers),
|
||||
Labels: g.Labels,
|
||||
Name: g.Name,
|
||||
Type: g.Type.String(),
|
||||
File: g.File,
|
||||
Interval: g.Interval.Seconds(),
|
||||
LastEvaluation: g.LastEvaluation,
|
||||
Concurrency: g.Concurrency,
|
||||
Params: urlValuesToStrings(g.Params),
|
||||
Headers: headersToStrings(g.Headers),
|
||||
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
||||
|
||||
Labels: g.Labels,
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
ag.Rules = append(ag.Rules, r.ToAPI())
|
||||
|
||||
@@ -64,17 +64,18 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := rand.Intn(len(paths))
|
||||
rnd := r.Intn(len(paths))
|
||||
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
||||
if err != nil { // update can fail and this is expected
|
||||
continue
|
||||
}
|
||||
_ = m.update(context.Background(), cfg, false)
|
||||
}
|
||||
}()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -41,10 +41,12 @@ type Alert struct {
|
||||
LastSent time.Time
|
||||
// Value stores the value returned from evaluating expression from Expr field
|
||||
Value float64
|
||||
// ID is the unique identifer for the Alert
|
||||
// ID is the unique identifier for the Alert
|
||||
ID uint64
|
||||
// Restored is true if Alert was restored after restart
|
||||
Restored bool
|
||||
// For defines for how long Alert needs to be active to become StateFiring
|
||||
For time.Duration
|
||||
}
|
||||
|
||||
// AlertState type indicates the Alert state
|
||||
@@ -80,6 +82,7 @@ type AlertTplData struct {
|
||||
AlertID uint64
|
||||
GroupID uint64
|
||||
ActiveAt time.Time
|
||||
For time.Duration
|
||||
}
|
||||
|
||||
var tplHeaders = []string{
|
||||
@@ -91,6 +94,7 @@ var tplHeaders = []string{
|
||||
"{{ $alertID := .AlertID }}",
|
||||
"{{ $groupID := .GroupID }}",
|
||||
"{{ $activeAt := .ActiveAt }}",
|
||||
"{{ $for := .For }}",
|
||||
}
|
||||
|
||||
// ExecTemplate executes the Alert template for given
|
||||
@@ -98,12 +102,16 @@ var tplHeaders = []string{
|
||||
// Every alert could have a different datasource, so function
|
||||
// requires a queryFunction as an argument.
|
||||
func (a *Alert) ExecTemplate(q templates.QueryFn, labels, annotations map[string]string) (map[string]string, error) {
|
||||
tplData := AlertTplData{Value: a.Value, Labels: labels, Expr: a.Expr, AlertID: a.ID, GroupID: a.GroupID, ActiveAt: a.ActiveAt}
|
||||
tmpl, err := templates.GetWithFuncs(templates.FuncsWithQuery(q))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting a template: %w", err)
|
||||
tplData := AlertTplData{
|
||||
Value: a.Value,
|
||||
Labels: labels,
|
||||
Expr: a.Expr,
|
||||
AlertID: a.ID,
|
||||
GroupID: a.GroupID,
|
||||
ActiveAt: a.ActiveAt,
|
||||
For: a.For,
|
||||
}
|
||||
return templateAnnotations(annotations, tplData, tmpl, true)
|
||||
return ExecTemplate(q, annotations, tplData)
|
||||
}
|
||||
|
||||
// ExecTemplate executes the given template for given annotations map.
|
||||
@@ -162,6 +170,8 @@ func templateAnnotation(dst io.Writer, text string, data tplData, tmpl *textTpl.
|
||||
if err != nil {
|
||||
return fmt.Errorf("error cloning template before parse annotation: %w", err)
|
||||
}
|
||||
// Clone() doesn't copy tpl Options, so we set them manually
|
||||
tpl = tpl.Option("missingkey=zero")
|
||||
tpl, err = tpl.Parse(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing annotation template: %w", err)
|
||||
|
||||
@@ -54,14 +54,15 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
"job": "staging",
|
||||
"instance": "localhost",
|
||||
},
|
||||
For: 5 * time.Minute,
|
||||
},
|
||||
annotations: map[string]string{
|
||||
"summary": "Too high connection number for {{$labels.instance}} for job {{$labels.job}}",
|
||||
"description": "It is {{ $value }} connections for {{$labels.instance}}",
|
||||
"description": "It is {{ $value }} connections for {{$labels.instance}} for more than {{ .For }}",
|
||||
},
|
||||
expTpl: map[string]string{
|
||||
"summary": "Too high connection number for localhost for job staging",
|
||||
"description": "It is 10000 connections for localhost",
|
||||
"description": "It is 10000 connections for localhost for more than 5m0s",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -152,7 +153,7 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ActiveAt custome format",
|
||||
name: "ActiveAt custom format",
|
||||
alert: &Alert{
|
||||
ActiveAt: time.Date(2022, 8, 19, 20, 34, 58, 651387237, time.UTC),
|
||||
},
|
||||
@@ -199,6 +200,9 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if err := ValidateTemplates(tc.annotations); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tpl, err := tc.alert.ExecTemplate(qFn, tc.alert.Labels, tc.annotations)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
@@ -51,24 +51,27 @@ func (am *AlertManager) Close() {
|
||||
func (am AlertManager) Addr() string { return am.addr }
|
||||
|
||||
// Send an alert or resolve message
|
||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
|
||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||
am.metrics.alertsSent.Add(len(alerts))
|
||||
err := am.send(ctx, alerts)
|
||||
err := am.send(ctx, alerts, headers)
|
||||
if err != nil {
|
||||
am.metrics.alertsSendErrors.Add(len(alerts))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert) error {
|
||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||
b := &bytes.Buffer{}
|
||||
writeamRequest(b, alerts, am.argFunc, am.relabelConfigs)
|
||||
|
||||
req, err := http.NewRequest("POST", am.addr, b)
|
||||
req, err := http.NewRequest(http.MethodPost, am.addr, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
for key, value := range headers {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
|
||||
if am.timeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
@@ -105,7 +108,8 @@ const alertManagerPath = "/api/v2/alerts"
|
||||
|
||||
// NewAlertManager is a constructor for AlertManager
|
||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
|
||||
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration) (*AlertManager, error) {
|
||||
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
|
||||
) (*AlertManager, error) {
|
||||
tls := &promauth.TLSConfig{}
|
||||
if authCfg.TLSConfig != nil {
|
||||
tls = authCfg.TLSConfig
|
||||
|
||||
@@ -25,6 +25,7 @@ func TestAlertManager_Addr(t *testing.T) {
|
||||
|
||||
func TestAlertManager_Send(t *testing.T) {
|
||||
const baUser, baPass = "foo", "bar"
|
||||
const headerKey, headerValue = "TenantID", "foo"
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Errorf("should not be called")
|
||||
@@ -73,6 +74,11 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
if a[0].EndAt.IsZero() {
|
||||
t.Errorf("expected non-zero end time")
|
||||
}
|
||||
case 3:
|
||||
if r.Header.Get(headerKey) != headerValue {
|
||||
t.Errorf("expected header %q to be set to %q; got %q instead",
|
||||
headerKey, headerValue, r.Header.Get(headerKey))
|
||||
}
|
||||
}
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
@@ -90,10 +96,10 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %s", err)
|
||||
}
|
||||
if err := am.Send(context.Background(), []Alert{{}, {}}); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{{}, {}}, nil); err == nil {
|
||||
t.Error("expected connection error got nil")
|
||||
}
|
||||
if err := am.Send(context.Background(), []Alert{}); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{}, nil); err == nil {
|
||||
t.Error("expected wrong http code error got nil")
|
||||
}
|
||||
if err := am.Send(context.Background(), []Alert{{
|
||||
@@ -102,10 +108,13 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
Start: time.Now().UTC(),
|
||||
End: time.Now().UTC(),
|
||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||
}}); err != nil {
|
||||
}}, nil); err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if c != 2 {
|
||||
t.Errorf("expected 2 calls(count from zero) to server got %d", c)
|
||||
}
|
||||
if err := am.Send(context.Background(), nil, map[string]string{headerKey: headerValue}); err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ type Config struct {
|
||||
// ConsulSDConfigs contains list of settings for service discovery via Consul
|
||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
|
||||
// DNSSDConfigs ontains list of settings for service discovery via DNS.
|
||||
// DNSSDConfigs contains list of settings for service discovery via DNS.
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
|
||||
|
||||
|
||||
@@ -162,14 +162,15 @@ consul_sd_configs:
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := rand.Intn(len(paths))
|
||||
rnd := r.Intn(len(paths))
|
||||
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
||||
_ = cw.notifiers()
|
||||
}
|
||||
}()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -17,7 +17,8 @@ var (
|
||||
configPath = flag.String("notifier.config", "", "Path to configuration file for notifiers")
|
||||
suppressDuplicateTargetErrors = flag.Bool("notifier.suppressDuplicateTargetErrors", false, "Whether to suppress 'duplicate target' errors during discovery")
|
||||
|
||||
addrs = flagutil.NewArrayString("notifier.url", "Prometheus alertmanager URL, e.g. http://127.0.0.1:9093")
|
||||
addrs = flagutil.NewArrayString("notifier.url", "Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. "+
|
||||
"List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
@@ -30,9 +31,9 @@ var (
|
||||
tlsCertFile = flagutil.NewArrayString("notifier.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
tlsKeyFile = flagutil.NewArrayString("notifier.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
tlsCAFile = flagutil.NewArrayString("notifier.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
"By default system CA is used")
|
||||
"By default, system CA is used")
|
||||
tlsServerName = flagutil.NewArrayString("notifier.tlsServerName", "Optional TLS server name to use for connections to -notifier.url. "+
|
||||
"By default the server name from -notifier.url is used")
|
||||
"By default, the server name from -notifier.url is used")
|
||||
|
||||
oauth2ClientID = flagutil.NewArrayString("notifier.oauth2.clientID", "Optional OAuth2 clientID to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
@@ -80,10 +81,6 @@ var (
|
||||
//
|
||||
// Init returns an error if both mods are used.
|
||||
func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
|
||||
if externalLabels != nil || externalURL != "" {
|
||||
return nil, fmt.Errorf("BUG: notifier.Init was called multiple times")
|
||||
}
|
||||
|
||||
externalURL = extURL
|
||||
externalLabels = extLabels
|
||||
eu, err := url.Parse(externalURL)
|
||||
|
||||
37
app/vmalert/notifier/init_test.go
Normal file
37
app/vmalert/notifier/init_test.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInit(t *testing.T) {
|
||||
oldAddrs := *addrs
|
||||
defer func() { *addrs = oldAddrs }()
|
||||
|
||||
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
|
||||
|
||||
fn, err := Init(nil, nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("%s", err)
|
||||
}
|
||||
|
||||
nfs := fn()
|
||||
if len(nfs) != 2 {
|
||||
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
|
||||
}
|
||||
|
||||
targets := GetTargets()
|
||||
if targets == nil || targets[TargetStatic] == nil {
|
||||
t.Fatalf("expected to get static targets in response")
|
||||
}
|
||||
|
||||
nf1 := targets[TargetStatic][0]
|
||||
if nf1.Addr() != "127.0.0.1/api/v2/alerts" {
|
||||
t.Fatalf("expected to get \"127.0.0.1/api/v2/alerts\"; got %q instead", nf1.Addr())
|
||||
}
|
||||
nf2 := targets[TargetStatic][1]
|
||||
if nf2.Addr() != "127.0.0.2/api/v2/alerts" {
|
||||
t.Fatalf("expected to get \"127.0.0.2/api/v2/alerts\"; got %q instead", nf2.Addr())
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,7 @@ type Notifier interface {
|
||||
// Send sends the given list of alerts.
|
||||
// Returns an error if fails to send the alerts.
|
||||
// Must unblock if the given ctx is cancelled.
|
||||
Send(ctx context.Context, alerts []Alert) error
|
||||
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
|
||||
// Addr returns address where alerts are sent.
|
||||
Addr() string
|
||||
// Close is a destructor for the Notifier
|
||||
|
||||
@@ -58,7 +58,6 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
state: newRuleState(),
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
@@ -67,6 +66,12 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
}),
|
||||
}
|
||||
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
rr.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
rr.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
@@ -94,13 +99,13 @@ func (rr *RecordingRule) Close() {
|
||||
// It doesn't update internal states of the Rule and meant to be used just
|
||||
// to get time series for backfilling.
|
||||
func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
series, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
|
||||
res, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
duplicates := make(map[string]struct{}, len(series))
|
||||
duplicates := make(map[string]struct{}, len(res.Data))
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
for _, s := range series {
|
||||
for _, s := range res.Data {
|
||||
ts := rr.toTimeSeries(s)
|
||||
key := stringifyLabels(ts)
|
||||
if _, ok := duplicates[key]; ok {
|
||||
@@ -115,13 +120,14 @@ func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([
|
||||
// Exec executes RecordingRule expression via the given Querier.
|
||||
func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
qMetrics, req, err := rr.q.Query(ctx, rr.Expr, ts)
|
||||
res, req, err := rr.q.Query(ctx, rr.Expr, ts)
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(qMetrics),
|
||||
curl: requestToCurl(req),
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(res.Data),
|
||||
seriesFetched: res.SeriesFetched,
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
@@ -133,6 +139,7 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
return nil, curState.err
|
||||
}
|
||||
|
||||
qMetrics := res.Data
|
||||
numSeries := len(qMetrics)
|
||||
if limit > 0 && numSeries > limit {
|
||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
|
||||
@@ -203,16 +210,18 @@ func (rr *RecordingRule) UpdateWith(r Rule) error {
|
||||
func (rr *RecordingRule) ToAPI() APIRule {
|
||||
lastState := rr.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "recording",
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
Labels: rr.Labels,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.samples,
|
||||
Updates: rr.state.getAll(),
|
||||
Type: "recording",
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
Labels: rr.Labels,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.samples,
|
||||
LastSeriesFetched: lastState.seriesFetched,
|
||||
MaxUpdates: rr.state.size(),
|
||||
Updates: rr.state.getAll(),
|
||||
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", rr.ID()),
|
||||
|
||||
@@ -19,7 +19,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
expTS []prompbmarshal.TimeSeries
|
||||
}{
|
||||
{
|
||||
&RecordingRule{Name: "foo", state: newRuleState()},
|
||||
&RecordingRule{Name: "foo"},
|
||||
[]datasource.Metric{metricWithValueAndLabels(t, 10,
|
||||
"__name__", "bar",
|
||||
)},
|
||||
@@ -30,7 +30,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
&RecordingRule{Name: "foobarbaz", state: newRuleState()},
|
||||
&RecordingRule{Name: "foobarbaz"},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"),
|
||||
metricWithValueAndLabels(t, 2, "__name__", "bar", "job", "bar"),
|
||||
@@ -53,8 +53,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
},
|
||||
{
|
||||
&RecordingRule{
|
||||
Name: "job:foo",
|
||||
state: newRuleState(),
|
||||
Name: "job:foo",
|
||||
Labels: map[string]string{
|
||||
"source": "test",
|
||||
}},
|
||||
@@ -80,6 +79,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
fq.add(tc.metrics...)
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = newRuleState(10)
|
||||
tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected Exec err: %s", err)
|
||||
@@ -198,7 +198,7 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
||||
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
||||
}
|
||||
rule := &RecordingRule{Name: "job:foo", state: newRuleState(), Labels: map[string]string{
|
||||
rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
}}
|
||||
var err error
|
||||
@@ -216,7 +216,7 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||
rr := &RecordingRule{
|
||||
Name: "job:foo",
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
Labels: map[string]string{
|
||||
"job": "test",
|
||||
},
|
||||
|
||||
@@ -34,9 +34,9 @@ var (
|
||||
tlsCertFile = flag.String("remoteRead.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url")
|
||||
tlsKeyFile = flag.String("remoteRead.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url")
|
||||
tlsCAFile = flag.String("remoteRead.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteRead.url. "+
|
||||
"By default system CA is used")
|
||||
"By default, system CA is used")
|
||||
tlsServerName = flag.String("remoteRead.tlsServerName", "", "Optional TLS server name to use for connections to -remoteRead.url. "+
|
||||
"By default the server name from -remoteRead.url is used")
|
||||
"By default, the server name from -remoteRead.url is used")
|
||||
|
||||
oauth2ClientID = flag.String("remoteRead.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteRead.url.")
|
||||
oauth2ClientSecret = flag.String("remoteRead.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteRead.url.")
|
||||
|
||||
@@ -29,7 +29,7 @@ var (
|
||||
bearerTokenFile = flag.String("remoteWrite.bearerTokenFile", "", "Optional path to bearer token file to use for -remoteWrite.url.")
|
||||
|
||||
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines defines max number of timeseries to be flushed at once")
|
||||
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines max number of timeseries to be flushed at once")
|
||||
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote querier")
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
|
||||
|
||||
@@ -37,9 +37,9 @@ var (
|
||||
tlsCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
|
||||
tlsKeyFile = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
|
||||
tlsCAFile = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
|
||||
"By default system CA is used")
|
||||
"By default, system CA is used")
|
||||
tlsServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
"By default, the server name from -remoteWrite.url is used")
|
||||
|
||||
oauth2ClientID = flag.String("remoteWrite.oauth2.clientID", "", "Optional OAuth2 clientID to use for -remoteWrite.url.")
|
||||
oauth2ClientSecret = flag.String("remoteWrite.oauth2.clientSecret", "", "Optional OAuth2 clientSecret to use for -remoteWrite.url.")
|
||||
|
||||
20
app/vmalert/remotewrite/init_test.go
Normal file
20
app/vmalert/remotewrite/init_test.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInit(t *testing.T) {
|
||||
oldAddr := *addr
|
||||
defer func() { *addr = oldAddr }()
|
||||
|
||||
*addr = "http://localhost:8428"
|
||||
cl, err := Init(context.Background())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := cl.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
@@ -186,7 +186,7 @@ var (
|
||||
)
|
||||
|
||||
// flush is a blocking function that marshals WriteRequest and sends
|
||||
// it to remote write endpoint. Flush performs limited amount of retries
|
||||
// it to remote-write endpoint. Flush performs limited amount of retries
|
||||
// if request fails.
|
||||
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
if len(wr.Timeseries) < 1 {
|
||||
@@ -201,9 +201,14 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
return
|
||||
}
|
||||
|
||||
const attempts = 5
|
||||
b := snappy.Encode(nil, data)
|
||||
for i := 0; i < attempts; i++ {
|
||||
|
||||
const (
|
||||
retryCount = 5
|
||||
retryBackoff = time.Second
|
||||
)
|
||||
|
||||
for attempts := 0; attempts < retryCount; attempts++ {
|
||||
err := c.send(ctx, b)
|
||||
if err == nil {
|
||||
sentRows.Add(len(wr.Timeseries))
|
||||
@@ -211,21 +216,34 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Warnf("attempt %d to send request failed: %s", i+1, err)
|
||||
_, isRetriable := err.(*retriableError)
|
||||
logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, isRetriable)
|
||||
|
||||
if !isRetriable {
|
||||
// exit fast if error isn't retriable
|
||||
break
|
||||
}
|
||||
|
||||
// check if request has been cancelled before backoff
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
break
|
||||
default:
|
||||
}
|
||||
|
||||
// sleeping to avoid remote db hammering
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
time.Sleep(retryBackoff)
|
||||
}
|
||||
|
||||
droppedRows.Add(len(wr.Timeseries))
|
||||
droppedBytes.Add(len(b))
|
||||
logger.Errorf("all %d attempts to send request failed - dropping %d time series",
|
||||
attempts, len(wr.Timeseries))
|
||||
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
|
||||
len(wr.Timeseries))
|
||||
}
|
||||
|
||||
func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
r := bytes.NewReader(data)
|
||||
req, err := http.NewRequest("POST", c.addr, r)
|
||||
req, err := http.NewRequest(http.MethodPost, c.addr, r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||
}
|
||||
@@ -249,10 +267,31 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
req.URL.Redacted(), err, len(data), r.Size())
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
|
||||
// according to https://prometheus.io/docs/concepts/remote_write_spec/
|
||||
// Prometheus remote Write compatible receivers MUST
|
||||
switch resp.StatusCode / 100 {
|
||||
case 2:
|
||||
// respond with a HTTP 2xx status code when the write is successful.
|
||||
return nil
|
||||
case 5:
|
||||
// respond with HTTP status code 5xx when the write fails and SHOULD be retried.
|
||||
return &retriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||
resp.StatusCode, req.URL.Redacted(), body)}
|
||||
default:
|
||||
// respond with HTTP status code 4xx when the request is invalid, will never be able to succeed
|
||||
// and should not be retried.
|
||||
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||
resp.StatusCode, req.URL.Redacted(), body)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type retriableError struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (e *retriableError) Error() string {
|
||||
return e.err.Error()
|
||||
}
|
||||
|
||||
@@ -27,12 +27,13 @@ func TestClient_Push(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create client: %s", err)
|
||||
}
|
||||
r := rand.New(rand.NewSource(1))
|
||||
const rowsN = 1e4
|
||||
var sent int
|
||||
for i := 0; i < rowsN; i++ {
|
||||
s := prompbmarshal.TimeSeries{
|
||||
Samples: []prompbmarshal.Sample{{
|
||||
Value: rand.Float64(),
|
||||
Value: r.Float64(),
|
||||
Timestamp: time.Now().Unix(),
|
||||
}},
|
||||
}
|
||||
|
||||
@@ -22,11 +22,11 @@ var (
|
||||
replayTo = flag.String("replay.timeTo", "",
|
||||
"The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'")
|
||||
replayRulesDelay = flag.Duration("replay.rulesDelay", time.Second,
|
||||
"Delay between rules evaluation within the group. Could be important if there are chained rules inside of the group"+
|
||||
"Delay between rules evaluation within the group. Could be important if there are chained rules inside the group "+
|
||||
"and processing need to wait for previous rule results to be persisted by remote storage before evaluating the next rule."+
|
||||
"Keep it equal or bigger than -remoteWrite.flushInterval.")
|
||||
replayMaxDatapoints = flag.Int("replay.maxDatapointsPerQuery", 1e3,
|
||||
"Max number of data points expected in one request. The higher the value, the less requests will be made during replay.")
|
||||
"Max number of data points expected in one request. It affects the max time range for every `/query_range` request during the replay. The higher the value, the less requests will be made during replay.")
|
||||
replayRuleRetryAttempts = flag.Int("replay.ruleRetryAttempts", 5,
|
||||
"Defines how many retries to make before giving up on rule if request for it returns an error.")
|
||||
disableProgressBar = flag.Bool("replay.disableProgressBar", false, "Whether to disable rendering progress bars during the replay. "+
|
||||
|
||||
@@ -20,21 +20,21 @@ func (fr *fakeReplayQuerier) BuildWithParams(_ datasource.QuerierParams) datasou
|
||||
return fr
|
||||
}
|
||||
|
||||
func (fr *fakeReplayQuerier) QueryRange(_ context.Context, q string, from, to time.Time) ([]datasource.Metric, error) {
|
||||
func (fr *fakeReplayQuerier) QueryRange(_ context.Context, q string, from, to time.Time) (res datasource.Result, err error) {
|
||||
key := fmt.Sprintf("%s+%s", from.Format("15:04:05"), to.Format("15:04:05"))
|
||||
dps, ok := fr.registry[q]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected query received: %q", q)
|
||||
return res, fmt.Errorf("unexpected query received: %q", q)
|
||||
}
|
||||
_, ok = dps[key]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected time range received: %q", key)
|
||||
return res, fmt.Errorf("unexpected time range received: %q", key)
|
||||
}
|
||||
delete(dps, key)
|
||||
if len(fr.registry[q]) < 1 {
|
||||
delete(fr.registry, q)
|
||||
}
|
||||
return nil, nil
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func TestReplay(t *testing.T) {
|
||||
|
||||
@@ -53,15 +53,22 @@ type ruleStateEntry struct {
|
||||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
samples int
|
||||
// stores the number of time series fetched during
|
||||
// the last evaluation.
|
||||
// Is supported by VictoriaMetrics only, starting from v1.90.0
|
||||
// If seriesFetched == nil, then this attribute was missing in
|
||||
// datasource response (unsupported).
|
||||
seriesFetched *int
|
||||
// stores the curl command reflecting the HTTP request used during rule.Exec
|
||||
curl string
|
||||
}
|
||||
|
||||
const defaultStateEntriesLimit = 20
|
||||
|
||||
func newRuleState() *ruleState {
|
||||
func newRuleState(size int) *ruleState {
|
||||
if size < 1 {
|
||||
size = 1
|
||||
}
|
||||
return &ruleState{
|
||||
entries: make([]ruleStateEntry, defaultStateEntriesLimit),
|
||||
entries: make([]ruleStateEntry, size),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,6 +78,12 @@ func (s *ruleState) getLast() ruleStateEntry {
|
||||
return s.entries[s.cur]
|
||||
}
|
||||
|
||||
func (s *ruleState) size() int {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
return len(s.entries)
|
||||
}
|
||||
|
||||
func (s *ruleState) getAll() []ruleStateEntry {
|
||||
entries := make([]ruleStateEntry, 0)
|
||||
|
||||
|
||||
@@ -6,8 +6,27 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRule_stateDisabled(t *testing.T) {
|
||||
state := newRuleState(-1)
|
||||
e := state.getLast()
|
||||
if !e.at.IsZero() {
|
||||
t.Fatalf("expected entry to be zero")
|
||||
}
|
||||
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
|
||||
if len(state.getAll()) != 1 {
|
||||
// state should store at least one update at any circumstances
|
||||
t.Fatalf("expected for state to have %d entries; got %d",
|
||||
1, len(state.getAll()),
|
||||
)
|
||||
}
|
||||
}
|
||||
func TestRule_state(t *testing.T) {
|
||||
state := newRuleState()
|
||||
stateEntriesN := 20
|
||||
state := newRuleState(stateEntriesN)
|
||||
e := state.getLast()
|
||||
if !e.at.IsZero() {
|
||||
t.Fatalf("expected entry to be zero")
|
||||
@@ -39,7 +58,7 @@ func TestRule_state(t *testing.T) {
|
||||
}
|
||||
|
||||
var last time.Time
|
||||
for i := 0; i < defaultStateEntriesLimit*2; i++ {
|
||||
for i := 0; i < stateEntriesN*2; i++ {
|
||||
last = time.Now()
|
||||
state.add(ruleStateEntry{at: last})
|
||||
}
|
||||
@@ -50,9 +69,9 @@ func TestRule_state(t *testing.T) {
|
||||
e.at, last)
|
||||
}
|
||||
|
||||
if len(state.getAll()) != defaultStateEntriesLimit {
|
||||
if len(state.getAll()) != stateEntriesN {
|
||||
t.Fatalf("expected for state to have %d entries only; got %d",
|
||||
defaultStateEntriesLimit, len(state.getAll()),
|
||||
stateEntriesN, len(state.getAll()),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -61,7 +80,7 @@ func TestRule_state(t *testing.T) {
|
||||
// execution of state updates.
|
||||
// Should be executed with -race flag
|
||||
func TestRule_stateConcurrent(t *testing.T) {
|
||||
state := newRuleState()
|
||||
state := newRuleState(20)
|
||||
|
||||
const workers = 50
|
||||
const iterations = 100
|
||||
|
||||
39
app/vmalert/static/js/custom.js
Normal file
39
app/vmalert/static/js/custom.js
Normal file
@@ -0,0 +1,39 @@
|
||||
function expandAll() {
|
||||
$('.collapse').addClass('show');
|
||||
}
|
||||
|
||||
function collapseAll() {
|
||||
$('.collapse').removeClass('show');
|
||||
}
|
||||
|
||||
function toggleByID(id) {
|
||||
let el = $("#" + id);
|
||||
if (el.length > 0) {
|
||||
el.click();
|
||||
}
|
||||
}
|
||||
|
||||
$(document).ready(function () {
|
||||
$(".group-heading a").click(function (e) {
|
||||
e.stopPropagation(); // prevent collapse logic on link click
|
||||
let target = $(this).attr('href');
|
||||
if (target.length > 0) {
|
||||
toggleByID(target.substr(1));
|
||||
}
|
||||
});
|
||||
|
||||
$(".group-heading").click(function (e) {
|
||||
let target = $(this).attr('data-bs-target');
|
||||
let el = $("#" + target);
|
||||
new bootstrap.Collapse(el, {
|
||||
toggle: true
|
||||
});
|
||||
});
|
||||
|
||||
let hash = window.location.hash.substr(1);
|
||||
toggleByID(hash);
|
||||
});
|
||||
|
||||
$(document).ready(function () {
|
||||
$('[data-bs-toggle="tooltip"]').tooltip();
|
||||
});
|
||||
@@ -21,17 +21,18 @@ import (
|
||||
"math"
|
||||
"net"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
textTpl "text/template"
|
||||
"time"
|
||||
|
||||
textTpl "text/template"
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/formatutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
@@ -59,12 +60,12 @@ func Load(pathPatterns []string, overwrite bool) error {
|
||||
var err error
|
||||
tmpl := newTemplate()
|
||||
for _, tp := range pathPatterns {
|
||||
p, err := filepath.Glob(tp)
|
||||
p, err := doublestar.FilepathGlob(tp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve a template glob %q: %w", tp, err)
|
||||
}
|
||||
if len(p) > 0 {
|
||||
tmpl, err = tmpl.ParseGlob(tp)
|
||||
tmpl, err = tmpl.ParseFiles(p...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse template glob %q: %w", tp, err)
|
||||
}
|
||||
@@ -182,6 +183,10 @@ func Get() (*textTpl.Template, error) {
|
||||
func FuncsWithQuery(query QueryFn) textTpl.FuncMap {
|
||||
return textTpl.FuncMap{
|
||||
"query": func(q string) ([]metric, error) {
|
||||
if query == nil {
|
||||
return nil, fmt.Errorf("cannot execute query %q: query is not available in this context", q)
|
||||
}
|
||||
|
||||
result, err := query(q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -225,7 +230,7 @@ func templateFuncs() textTpl.FuncMap {
|
||||
"toLower": strings.ToLower,
|
||||
|
||||
// crlfEscape replaces '\n' and '\r' chars with `\\n` and `\\r`.
|
||||
// This funcion is deprectated.
|
||||
// This function is deprecated.
|
||||
//
|
||||
// It is better to use quotesEscape, jsonEscape, queryEscape or pathEscape instead -
|
||||
// these functions properly escape `\n` and `\r` chars according to their purpose.
|
||||
@@ -350,18 +355,10 @@ func templateFuncs() textTpl.FuncMap {
|
||||
if math.Abs(v) <= 1 || math.IsNaN(v) || math.IsInf(v, 0) {
|
||||
return fmt.Sprintf("%.4g", v), nil
|
||||
}
|
||||
prefix := ""
|
||||
for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} {
|
||||
if math.Abs(v) < 1024 {
|
||||
break
|
||||
}
|
||||
prefix = p
|
||||
v /= 1024
|
||||
}
|
||||
return fmt.Sprintf("%.4g%s", v, prefix), nil
|
||||
return formatutil.HumanizeBytes(v), nil
|
||||
},
|
||||
|
||||
// humanizeDuration converts given seconds to a human readable duration
|
||||
// humanizeDuration converts given seconds to a human-readable duration
|
||||
"humanizeDuration": func(i interface{}) (string, error) {
|
||||
v, err := toFloat64(i)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package templates
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
"testing"
|
||||
textTpl "text/template"
|
||||
@@ -50,6 +51,45 @@ func TestTemplateFuncs(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatalf("unexpected mismatch")
|
||||
}
|
||||
|
||||
formatting := func(funcName string, p interface{}, resultExpected string) {
|
||||
t.Helper()
|
||||
v := funcs[funcName]
|
||||
fLocal := v.(func(s interface{}) (string, error))
|
||||
result, err := fLocal(p)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for %s(%f): %s", funcName, p, err)
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for %s(%f); got\n%s\nwant\n%s", funcName, p, result, resultExpected)
|
||||
}
|
||||
}
|
||||
formatting("humanize1024", float64(0), "0")
|
||||
formatting("humanize1024", math.Inf(0), "+Inf")
|
||||
formatting("humanize1024", math.NaN(), "NaN")
|
||||
formatting("humanize1024", float64(127087), "124.1ki")
|
||||
formatting("humanize1024", float64(130137088), "124.1Mi")
|
||||
formatting("humanize1024", float64(133260378112), "124.1Gi")
|
||||
formatting("humanize1024", float64(136458627186688), "124.1Ti")
|
||||
formatting("humanize1024", float64(139733634239168512), "124.1Pi")
|
||||
formatting("humanize1024", float64(143087241460908556288), "124.1Ei")
|
||||
formatting("humanize1024", float64(146521335255970361638912), "124.1Zi")
|
||||
formatting("humanize1024", float64(150037847302113650318245888), "124.1Yi")
|
||||
formatting("humanize1024", float64(153638755637364377925883789312), "1.271e+05Yi")
|
||||
|
||||
formatting("humanize", float64(127087), "127.1k")
|
||||
formatting("humanize", float64(136458627186688), "136.5T")
|
||||
|
||||
formatting("humanizeDuration", 1, "1s")
|
||||
formatting("humanizeDuration", 0.2, "200ms")
|
||||
formatting("humanizeDuration", 42000, "11h 40m 0s")
|
||||
formatting("humanizeDuration", 16790555, "194d 8h 2m 35s")
|
||||
|
||||
formatting("humanizePercentage", 1, "100%")
|
||||
formatting("humanizePercentage", 0.8, "80%")
|
||||
formatting("humanizePercentage", 0.015, "1.5%")
|
||||
|
||||
formatting("humanizeTimestamp", 1679055557, "2023-03-17 12:19:17 +0000 UTC")
|
||||
}
|
||||
|
||||
func mkTemplate(current, replacement interface{}) textTemplate {
|
||||
|
||||
@@ -10,35 +10,7 @@
|
||||
</main>
|
||||
<script src="{%s prefix %}static/js/jquery-3.6.0.min.js" type="text/javascript"></script>
|
||||
<script src="{%s prefix %}static/js/bootstrap.bundle.min.js" type="text/javascript"></script>
|
||||
<script type="text/javascript">
|
||||
function expandAll() {
|
||||
$('.collapse').addClass('show');
|
||||
}
|
||||
function collapseAll() {
|
||||
$('.collapse').removeClass('show');
|
||||
}
|
||||
|
||||
$(document).ready(function() {
|
||||
// prevent collapse logic on link click
|
||||
$(".group-heading a").click(function(e) {
|
||||
e.stopPropagation();
|
||||
});
|
||||
|
||||
$(".group-heading").click(function(e) {
|
||||
let target = $(this).attr('data-bs-target');
|
||||
let el = $("#"+target);
|
||||
new bootstrap.Collapse(el, {
|
||||
toggle: true
|
||||
});
|
||||
});
|
||||
|
||||
var hash = window.location.hash.substr(1);
|
||||
let group = $("#"+hash);
|
||||
if (group.length > 0) {
|
||||
group.click();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<script src="{%s prefix %}static/js/custom.js" type="text/javascript"></script>
|
||||
</body>
|
||||
</html>
|
||||
{% endfunc %}
|
||||
|
||||
@@ -45,63 +45,39 @@ func StreamFooter(qw422016 *qt422016.Writer, r *http.Request) {
|
||||
qw422016.E().S(prefix)
|
||||
//line app/vmalert/tpl/footer.qtpl:12
|
||||
qw422016.N().S(`static/js/bootstrap.bundle.min.js" type="text/javascript"></script>
|
||||
<script type="text/javascript">
|
||||
function expandAll() {
|
||||
$('.collapse').addClass('show');
|
||||
}
|
||||
function collapseAll() {
|
||||
$('.collapse').removeClass('show');
|
||||
}
|
||||
|
||||
$(document).ready(function() {
|
||||
// prevent collapse logic on link click
|
||||
$(".group-heading a").click(function(e) {
|
||||
e.stopPropagation();
|
||||
});
|
||||
|
||||
$(".group-heading").click(function(e) {
|
||||
let target = $(this).attr('data-bs-target');
|
||||
let el = $("#"+target);
|
||||
new bootstrap.Collapse(el, {
|
||||
toggle: true
|
||||
});
|
||||
});
|
||||
|
||||
var hash = window.location.hash.substr(1);
|
||||
let group = $("#"+hash);
|
||||
if (group.length > 0) {
|
||||
group.click();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<script src="`)
|
||||
//line app/vmalert/tpl/footer.qtpl:13
|
||||
qw422016.E().S(prefix)
|
||||
//line app/vmalert/tpl/footer.qtpl:13
|
||||
qw422016.N().S(`static/js/custom.js" type="text/javascript"></script>
|
||||
</body>
|
||||
</html>
|
||||
`)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
func WriteFooter(qq422016 qtio422016.Writer, r *http.Request) {
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
StreamFooter(qw422016, r)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
func Footer(r *http.Request) string {
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
WriteFooter(qb422016, r)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
return qs422016
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:16
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
switch r.URL.Path {
|
||||
case "/", "/vmalert", "/vmalert/":
|
||||
if r.Method != "GET" {
|
||||
if r.Method != http.MethodGet {
|
||||
httpserver.Errorf(w, r, "path %q supports only GET method", r.URL.Path)
|
||||
return false
|
||||
}
|
||||
@@ -211,7 +211,7 @@ func (rh *requestHandler) groups() []APIGroup {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
var groups []APIGroup
|
||||
groups := make([]APIGroup, 0)
|
||||
for _, g := range rh.m.groups {
|
||||
groups = append(groups, g.toAPI())
|
||||
}
|
||||
@@ -276,6 +276,7 @@ func (rh *requestHandler) listAlerts() ([]byte, error) {
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
lr.Data.Alerts = make([]*APIAlert, 0)
|
||||
for _, g := range rh.m.groups {
|
||||
for _, r := range g.Rules {
|
||||
a, ok := r.(*AlertingRule)
|
||||
|
||||
@@ -30,102 +30,132 @@
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, groups []APIGroup) %}
|
||||
{% func buttonActive(filter, expValue string) %}
|
||||
{% if filter != expValue %}
|
||||
btn-secondary
|
||||
{% else %}
|
||||
btn-primary
|
||||
{% endif %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, originGroups []APIGroup) %}
|
||||
{%code prefix := utils.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "Groups") %}
|
||||
{% if len(groups) > 0 %}
|
||||
{%code
|
||||
filter := r.URL.Query().Get("filter")
|
||||
rOk := make(map[string]int)
|
||||
rNotOk := make(map[string]int)
|
||||
for _, g := range groups {
|
||||
rNoMatch := make(map[string]int)
|
||||
var groups []APIGroup
|
||||
for _, g := range originGroups {
|
||||
var rules []APIRule
|
||||
for _, r := range g.Rules {
|
||||
if r.LastError != "" {
|
||||
rNotOk[g.Name]++
|
||||
rNotOk[g.ID]++
|
||||
} else {
|
||||
rOk[g.Name]++
|
||||
rOk[g.ID]++
|
||||
}
|
||||
if isNoMatch(r) {
|
||||
rNoMatch[g.ID]++
|
||||
}
|
||||
if (filter == "unhealthy" && r.LastError == "") ||
|
||||
(filter == "noMatch" && !isNoMatch(r)) {
|
||||
continue
|
||||
}
|
||||
rules = append(rules, r)
|
||||
}
|
||||
if len(rules) > 0 {
|
||||
g.Rules = rules
|
||||
groups = append(groups, g)
|
||||
}
|
||||
}
|
||||
%}
|
||||
<a class="btn {%= buttonActive(filter, "") %}" role="button" onclick="window.location = window.location.pathname">All</a>
|
||||
<a class="btn btn-primary" role="button" onclick="collapseAll()">Collapse All</a>
|
||||
<a class="btn btn-primary" role="button" onclick="expandAll()">Expand All</a>
|
||||
{% for _, g := range groups %}
|
||||
<div class="group-heading{% if rNotOk[g.Name] > 0 %} alert-danger{% endif %}" data-bs-target="rules-{%s g.ID %}">
|
||||
<span class="anchor" id="group-{%s g.ID %}"></span>
|
||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s)</a>
|
||||
{% if rNotOk[g.Name] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d rNotOk[g.Name] %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules withs status Ok">{%d rOk[g.Name] %}</span>
|
||||
<p class="fs-6 fw-lighter">{%s g.File %}</p>
|
||||
{% if len(g.Params) > 0 %}
|
||||
<div class="fs-6 fw-lighter">Extra params
|
||||
{% for _, param := range g.Params %}
|
||||
<span class="float-left badge bg-primary">{%s param %}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if len(g.Headers) > 0 %}
|
||||
<div class="fs-6 fw-lighter">Extra headers
|
||||
{% for _, header := range g.Headers %}
|
||||
<span class="float-left badge bg-primary">{%s header %}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="collapse" id="rules-{%s g.ID %}">
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" style="width: 60%">Rule</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many samples were produced by the rule">Samples</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for _, r := range g.Rules %}
|
||||
<tr{% if r.LastError != "" %} class="alert-danger"{% endif %}>
|
||||
<td>
|
||||
<div class="row">
|
||||
<div class="col-12 mb-2">
|
||||
{% if r.Type == "alerting" %}
|
||||
<b>alert:</b> {%s r.Name %} (for: {%v r.Duration %} seconds)
|
||||
{% else %}
|
||||
<b>record:</b> {%s r.Name %}
|
||||
{% endif %}
|
||||
| <span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
</div>
|
||||
<div class="col-12">
|
||||
<code><pre>{%s r.Query %}</pre></code>
|
||||
</div>
|
||||
<div class="col-12 mb-2">
|
||||
{% if len(r.Labels) > 0 %} <b>Labels:</b>{% endif %}
|
||||
{% for k, v := range r.Labels %}
|
||||
<span class="ms-1 badge bg-primary">{%s k %}={%s v %}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if r.LastError != "" %}
|
||||
<div class="col-12">
|
||||
<b>Error:</b>
|
||||
<div class="error-cell">
|
||||
{%s r.LastError %}
|
||||
<a class="btn {%= buttonActive(filter, "unhealthy") %}" role="button" onclick="location.href='?filter=unhealthy'" title="Show only rules with errors">Unhealthy</a>
|
||||
<a class="btn {%= buttonActive(filter, "noMatch") %}" role="button" onclick="location.href='?filter=noMatch'" title="Show only rules matching no time series during last evaluation">NoMatch</a>
|
||||
{% if len(groups) > 0 %}
|
||||
{% for _, g := range groups %}
|
||||
<div
|
||||
class="group-heading{% if rNotOk[g.ID] > 0 %} alert-danger{%endif%}" data-bs-target="rules-{%s g.ID %}">
|
||||
<span class="anchor" id="group-{%s g.ID %}"></span>
|
||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
||||
{% if rNotOk[g.ID] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d rNotOk[g.ID] %}</span> {% endif %}
|
||||
{% if rNoMatch[g.ID] > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d rNoMatch[g.ID] %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules withs status Ok">{%d rOk[g.ID] %}</span>
|
||||
<p class="fs-6 fw-lighter">{%s g.File %}</p>
|
||||
{% if len(g.Params) > 0 %}
|
||||
<div class="fs-6 fw-lighter">Extra params
|
||||
{% for _, param := range g.Params %}
|
||||
<span class="float-left badge bg-primary">{%s param %}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if len(g.Headers) > 0 %}
|
||||
<div class="fs-6 fw-lighter">Extra headers
|
||||
{% for _, header := range g.Headers %}
|
||||
<span class="float-left badge bg-primary">{%s header %}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="collapse" id="rules-{%s g.ID %}">
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" style="width: 60%">Rule</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many samples were produced by the rule">Samples</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for _, r := range g.Rules %}
|
||||
<tr{% if r.LastError != "" %} class="alert-danger"{% endif %}>
|
||||
<td>
|
||||
<div class="row">
|
||||
<div class="col-12 mb-2">
|
||||
{% if r.Type == "alerting" %}
|
||||
<b>alert:</b> {%s r.Name %} (for: {%v r.Duration %} seconds)
|
||||
{% else %}
|
||||
<b>record:</b> {%s r.Name %}
|
||||
{% endif %}
|
||||
|
|
||||
{%= seriesFetchedWarn(r) %}
|
||||
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
</div>
|
||||
<div class="col-12">
|
||||
<code><pre>{%s r.Query %}</pre></code>
|
||||
</div>
|
||||
<div class="col-12 mb-2">
|
||||
{% if len(r.Labels) > 0 %} <b>Labels:</b>{% endif %}
|
||||
{% for k, v := range r.Labels %}
|
||||
<span class="ms-1 badge bg-primary">{%s k %}={%s v %}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if r.LastError != "" %}
|
||||
<div class="col-12">
|
||||
<b>Error:</b>
|
||||
<div class="error-cell">
|
||||
{%s r.LastError %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</td>
|
||||
<td class="text-center">{%d r.LastSamples %}</td>
|
||||
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</td>
|
||||
<td class="text-center">{%d r.LastSamples %}</td>
|
||||
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<div>
|
||||
<p>No groups...</p>
|
||||
</div>
|
||||
{% endfor %}
|
||||
|
||||
{% else %}
|
||||
<div>
|
||||
<p>No groups...</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{%= tpl.Footer(r) %}
|
||||
|
||||
@@ -237,9 +267,9 @@
|
||||
{%code typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
|
||||
count := len(ns)
|
||||
%}
|
||||
<div class="group-heading data-bs-target="rules-{%s typeK %}">
|
||||
<span class="anchor" id="notifiers-{%s typeK %}"></span>
|
||||
<a href="#notifiers-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
||||
<div class="group-heading" data-bs-target="notifiers-{%s typeK %}">
|
||||
<span class="anchor" id="group-{%s typeK %}"></span>
|
||||
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
||||
</div>
|
||||
<div class="collapse show" id="notifiers-{%s typeK %}">
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
@@ -377,8 +407,20 @@
|
||||
annotationKeys = append(annotationKeys, k)
|
||||
}
|
||||
sort.Strings(annotationKeys)
|
||||
|
||||
var seriesFetchedEnabled bool
|
||||
var seriesFetchedWarning bool
|
||||
for _, u := range rule.Updates {
|
||||
if u.seriesFetched != nil {
|
||||
seriesFetchedEnabled = true
|
||||
if *u.seriesFetched == 0 && u.samples == 0{
|
||||
seriesFetchedWarning = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
%}
|
||||
<div class="display-6 pb-3 mb-3">Rule: {%s rule.Name %}<span class="ms-2 badge {% if rule.Health!="ok" %}bg-danger{% else %} bg-warning text-dark{% endif %}">{%s rule.Health %}</span></div>
|
||||
<div class="display-6 pb-3 mb-3">Rule: {%s rule.Name %}<span class="ms-2 badge {% if rule.Health!="ok" %}bg-danger{% else %} bg-success text-dark{% endif %}">{%s rule.Health %}</span></div>
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
@@ -427,6 +469,16 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
Debug
|
||||
</div>
|
||||
<div class="col">
|
||||
{%v rule.Debug %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
@@ -440,12 +492,26 @@
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<div class="display-6 pb-3">Last {%d len(rule.Updates) %} updates</span>:</div>
|
||||
{% if seriesFetchedWarning %}
|
||||
<div class="alert alert-warning" role="alert">
|
||||
<strong>Warning:</strong> some of updates have "Series fetched" equal to 0.<br>
|
||||
It might be that either this data is missing in the datasource or there is a typo in rule's expression.
|
||||
For example, <strong>foo{label="bar"} > 0</strong> could never trigger because <strong>foo{label="bar"}</strong>
|
||||
metric doesn't exist.
|
||||
<br>
|
||||
Rule's expressions without time series selector, like <strong>expr: 42</strong> or <strong>expr: time()</strong>
|
||||
aren't fetching time series from datasource, so they could have "Series fetched" equal to 0 and this won't be a problem.
|
||||
<br>
|
||||
See more details about this detection <a target="_blank" href="https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039">here</a>.
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="display-6 pb-3">Last {%d len(rule.Updates) %}/{%d rule.MaxUpdates %} updates</span>:</div>
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many samples were returned">Samples</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" style="width: 10%" class="text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
@@ -458,7 +524,8 @@
|
||||
<td>
|
||||
<span class="badge bg-primary rounded-pill me-3" title="Updated at">{%s u.time.Format(time.RFC3339) %}</span>
|
||||
</td>
|
||||
<td class="text-center" wi>{%d u.samples %}</td>
|
||||
<td class="text-center">{%d u.samples %}</td>
|
||||
{% if seriesFetchedEnabled %}<td class="text-center">{% if u.seriesFetched != nil %}{%d *u.seriesFetched %}{% endif %}</td>{% endif %}
|
||||
<td class="text-center">{%f.3 u.duration.Seconds() %}s</td>
|
||||
<td class="text-center">{%s u.at.Format(time.RFC3339) %}</td>
|
||||
<td>
|
||||
@@ -468,7 +535,7 @@
|
||||
</li>
|
||||
{% if u.err != nil %}
|
||||
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
|
||||
<td colspan="5">
|
||||
<td colspan="{% if seriesFetchedEnabled %}6{%else%}5{%endif%}">
|
||||
<span class="alert-danger">{%v u.err %}</span>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -493,3 +560,22 @@
|
||||
{% func badgeRestored() %}
|
||||
<span class="badge bg-warning text-dark" title="Alert state was restored after the service restart from remote storage">restored</span>
|
||||
{% endfunc %}
|
||||
|
||||
{% func seriesFetchedWarn(r APIRule) %}
|
||||
{% if isNoMatch(r) %}
|
||||
<svg xmlns="http://www.w3.org/2000/svg"
|
||||
data-bs-toggle="tooltip"
|
||||
title="No match! This rule last evaluation hasn't selected any time series from the datasource.
|
||||
It might be that either this data is missing in the datasource or there is a typo in rule's expression.
|
||||
See more in Details."
|
||||
width="18" height="18" fill="currentColor" class="bi bi-exclamation-triangle-fill flex-shrink-0 me-2" viewBox="0 0 16 16" role="img" aria-label="Warning:">
|
||||
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z"/>
|
||||
</svg>
|
||||
{% endif %}
|
||||
{% endfunc %}
|
||||
|
||||
{%code
|
||||
func isNoMatch (r APIRule) bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
%}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,6 +7,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
@@ -17,11 +18,20 @@ func TestHandler(t *testing.T) {
|
||||
alerts: map[uint64]*notifier.Alert{
|
||||
0: {State: notifier.StateFiring},
|
||||
},
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
}
|
||||
ar.state.add(ruleStateEntry{
|
||||
time: time.Now(),
|
||||
at: time.Now(),
|
||||
samples: 10,
|
||||
})
|
||||
rr := &RecordingRule{
|
||||
Name: "record",
|
||||
state: newRuleState(10),
|
||||
}
|
||||
g := &Group{
|
||||
Name: "group",
|
||||
Rules: []Rule{ar},
|
||||
Rules: []Rule{ar, rr},
|
||||
}
|
||||
m := &manager{groups: make(map[uint64]*Group)}
|
||||
m.groups[0] = g
|
||||
@@ -62,6 +72,14 @@ func TestHandler(t *testing.T) {
|
||||
t.Run("/vmalert/rule", func(t *testing.T) {
|
||||
a := ar.ToAPI()
|
||||
getResp(ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||
r := rr.ToAPI()
|
||||
getResp(ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
||||
})
|
||||
t.Run("/vmalert/alert", func(t *testing.T) {
|
||||
alerts := ar.AlertsToAPI()
|
||||
for _, a := range alerts {
|
||||
getResp(ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||
}
|
||||
})
|
||||
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
|
||||
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
|
||||
@@ -144,5 +162,59 @@ func TestHandler(t *testing.T) {
|
||||
t.Run("/api/v1/1/0/status", func(t *testing.T) {
|
||||
getResp(ts.URL+"/api/v1/1/0/status", nil, 404)
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestEmptyResponse(t *testing.T) {
|
||||
rh := &requestHandler{m: &manager{groups: make(map[uint64]*Group)}}
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rh.handler(w, r) }))
|
||||
defer ts.Close()
|
||||
|
||||
getResp := func(url string, to interface{}, code int) {
|
||||
t.Helper()
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err %s", err)
|
||||
}
|
||||
if code != resp.StatusCode {
|
||||
t.Errorf("unexpected status code %d want %d", resp.StatusCode, code)
|
||||
}
|
||||
defer func() {
|
||||
if err := resp.Body.Close(); err != nil {
|
||||
t.Errorf("err closing body %s", err)
|
||||
}
|
||||
}()
|
||||
if to != nil {
|
||||
if err = json.NewDecoder(resp.Body).Decode(to); err != nil {
|
||||
t.Errorf("unexpected err %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t.Run("/api/v1/alerts", func(t *testing.T) {
|
||||
lr := listAlertsResponse{}
|
||||
getResp(ts.URL+"/api/v1/alerts", &lr, 200)
|
||||
if lr.Data.Alerts == nil {
|
||||
t.Errorf("expected /api/v1/alerts response to have non-nil data")
|
||||
}
|
||||
|
||||
lr = listAlertsResponse{}
|
||||
getResp(ts.URL+"/vmalert/api/v1/alerts", &lr, 200)
|
||||
if lr.Data.Alerts == nil {
|
||||
t.Errorf("expected /api/v1/alerts response to have non-nil data")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules", func(t *testing.T) {
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/api/v1/rules", &lr, 200)
|
||||
if lr.Data.Groups == nil {
|
||||
t.Errorf("expected /api/v1/rules response to have non-nil data")
|
||||
}
|
||||
|
||||
lr = listGroupsResponse{}
|
||||
getResp(ts.URL+"/vmalert/api/v1/rules", &lr, 200)
|
||||
if lr.Data.Groups == nil {
|
||||
t.Errorf("expected /api/v1/rules response to have non-nil data")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user