mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-17 08:36:55 +03:00
Compare commits
433 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d36dbcfa9 | ||
|
|
8cfe4064b5 | ||
|
|
b4bf8dc2f0 | ||
|
|
e1c3267e34 | ||
|
|
c33cc4322c | ||
|
|
6a88d5402d | ||
|
|
dac21d874b | ||
|
|
87aeeec3e8 | ||
|
|
d8eaa511b0 | ||
|
|
a2340e6c95 | ||
|
|
c6ad3692ad | ||
|
|
43e104a83f | ||
|
|
c87c7d1e29 | ||
|
|
8b7a828c65 | ||
|
|
d4fc0ed874 | ||
|
|
a02cf92fd1 | ||
|
|
c734416f86 | ||
|
|
b285207aa7 | ||
|
|
c080443fef | ||
|
|
e688121de8 | ||
|
|
24915fd4bc | ||
|
|
637043a40e | ||
|
|
6d019a3c37 | ||
|
|
510f78a96b | ||
|
|
9dec3c8f80 | ||
|
|
aaef0bac00 | ||
|
|
fc720a5a78 | ||
|
|
383bf9689e | ||
|
|
9fbd45a22f | ||
|
|
eb08579452 | ||
|
|
4f649a0573 | ||
|
|
f8811411fd | ||
|
|
b6845951a5 | ||
|
|
d2b92d3264 | ||
|
|
84c82e988d | ||
|
|
317fef95f9 | ||
|
|
110c3896e7 | ||
|
|
57801660ab | ||
|
|
6b4ccc17c2 | ||
|
|
10cf6c9781 | ||
|
|
836d56876a | ||
|
|
d59dc7616d | ||
|
|
ffebc20f6d | ||
|
|
f04ec714c2 | ||
|
|
5446ce0018 | ||
|
|
21546e6922 | ||
|
|
7be8a8a37b | ||
|
|
6c38702212 | ||
|
|
ef1ef0598b | ||
|
|
9d91d8fc91 | ||
|
|
f4f1f2f976 | ||
|
|
a678cbe62e | ||
|
|
76f2c70be3 | ||
|
|
74f122294d | ||
|
|
d5171c155c | ||
|
|
2cea923ff7 | ||
|
|
c86f1f1d1b | ||
|
|
1030be91ae | ||
|
|
8bd2eee8e0 | ||
|
|
406822a16c | ||
|
|
c8467f37a9 | ||
|
|
6ef6f3a771 | ||
|
|
b58107c85a | ||
|
|
87f1ed5d87 | ||
|
|
11ce30820b | ||
|
|
5123a61be9 | ||
|
|
5c4f5b83fc | ||
|
|
ccdddf7996 | ||
|
|
9be1398b92 | ||
|
|
8830607021 | ||
|
|
a646841c07 | ||
|
|
7568658c19 | ||
|
|
af37717108 | ||
|
|
7720d403c0 | ||
|
|
fe196e0b7a | ||
|
|
f83d6d69b2 | ||
|
|
f3be9483f4 | ||
|
|
057698f7fb | ||
|
|
a645a95bd6 | ||
|
|
934646ccf7 | ||
|
|
8ea02eaa8e | ||
|
|
2e2b1ba87e | ||
|
|
9fff48c3e3 | ||
|
|
438b2e11bd | ||
|
|
e5070c0bcd | ||
|
|
c5b9f7f751 | ||
|
|
f9b3409ee3 | ||
|
|
25a9017a72 | ||
|
|
3ec8a4dc80 | ||
|
|
4156e78e11 | ||
|
|
b209d4ace0 | ||
|
|
b11bdc46be | ||
|
|
ed4492ddd5 | ||
|
|
776391917f | ||
|
|
f3625e4f3f | ||
|
|
70f8911ca7 | ||
|
|
f582f9e8ab | ||
|
|
73358571ee | ||
|
|
14c20c1843 | ||
|
|
c12eb2cc7f | ||
|
|
291c41978e | ||
|
|
d4b97b69bf | ||
|
|
035a2b5ed5 | ||
|
|
0e0095d350 | ||
|
|
a42e3e8dfb | ||
|
|
f13a255918 | ||
|
|
513707a8c7 | ||
|
|
f40661e7b7 | ||
|
|
a1432e6b0a | ||
|
|
bff18cb5dd | ||
|
|
e1063ce3c1 | ||
|
|
46a521191f | ||
|
|
8afc0aef8d | ||
|
|
114c14febf | ||
|
|
75bcf86a31 | ||
|
|
a1ee679042 | ||
|
|
a8e88e74cc | ||
|
|
c9d2934bb4 | ||
|
|
6c21b6ec09 | ||
|
|
e83f14210d | ||
|
|
6495b62866 | ||
|
|
86e47177dc | ||
|
|
146fd2eca3 | ||
|
|
67b01329a0 | ||
|
|
f2be447270 | ||
|
|
1901fbf19b | ||
|
|
3a51a3bc42 | ||
|
|
6f24fa2055 | ||
|
|
977c642934 | ||
|
|
c32d8ea29e | ||
|
|
8aa7559462 | ||
|
|
6fd10e8871 | ||
|
|
0a824d9490 | ||
|
|
e4c04b6dbe | ||
|
|
98dc968920 | ||
|
|
f63f487787 | ||
|
|
88fed0232c | ||
|
|
af1a9c5eda | ||
|
|
7440f971ab | ||
|
|
12cf8d9f69 | ||
|
|
e18f8e9413 | ||
|
|
07b7fe83c4 | ||
|
|
a2ba1f09e4 | ||
|
|
79527441ec | ||
|
|
df1e545c0e | ||
|
|
dc142867b8 | ||
|
|
da539bc286 | ||
|
|
fe736c5388 | ||
|
|
607b542222 | ||
|
|
8b9ebf625a | ||
|
|
7b87fac8e7 | ||
|
|
7b1caf1db3 | ||
|
|
9254e494f9 | ||
|
|
68985455f1 | ||
|
|
4cf37c5e70 | ||
|
|
3d331e4c5d | ||
|
|
4ecb61e247 | ||
|
|
442a9f16b4 | ||
|
|
dcc5616126 | ||
|
|
080a3e2396 | ||
|
|
ac8bc77688 | ||
|
|
1cbdcd391c | ||
|
|
0788be35eb | ||
|
|
ab57b92932 | ||
|
|
6a7faf9f22 | ||
|
|
ac14d50c18 | ||
|
|
51ad94677c | ||
|
|
bd716d1b0c | ||
|
|
06f6b76521 | ||
|
|
a0c8b86eab | ||
|
|
ff39a91147 | ||
|
|
372b1688d7 | ||
|
|
1b81d8f542 | ||
|
|
7e355080ce | ||
|
|
2afa4ae00a | ||
|
|
6342eb5523 | ||
|
|
54a0ccbaca | ||
|
|
b28cf0faa8 | ||
|
|
3251d392b5 | ||
|
|
119010f7f2 | ||
|
|
eedb294754 | ||
|
|
4250b67fe8 | ||
|
|
465c889f7f | ||
|
|
834c18a346 | ||
|
|
36941d6d75 | ||
|
|
558165521b | ||
|
|
0890adde67 | ||
|
|
28d92a2f31 | ||
|
|
cb374677a9 | ||
|
|
73256fe438 | ||
|
|
2c1419c687 | ||
|
|
465a285324 | ||
|
|
95ee86b600 | ||
|
|
28f66f0079 | ||
|
|
d655d6b047 | ||
|
|
99d49e3ceb | ||
|
|
20ad848c5d | ||
|
|
1a8875b417 | ||
|
|
c7c4786f3f | ||
|
|
a971bcc3fe | ||
|
|
0fd440cdb4 | ||
|
|
c496f06ca3 | ||
|
|
f7acdb13db | ||
|
|
1cfa183c2b | ||
|
|
3536bef36e | ||
|
|
babecd8363 | ||
|
|
393876e52a | ||
|
|
2c4e384f07 | ||
|
|
ba5a6c851c | ||
|
|
1a3a6ef907 | ||
|
|
7030429958 | ||
|
|
30e968df6d | ||
|
|
f2b40dbe9a | ||
|
|
a11dc6689a | ||
|
|
0a4d8dc777 | ||
|
|
3d1cb011b6 | ||
|
|
a7f8ce5e3d | ||
|
|
7c1daade15 | ||
|
|
f3cb412508 | ||
|
|
edc51b3119 | ||
|
|
cb5d073bc9 | ||
|
|
864c651c03 | ||
|
|
4e25bc2087 | ||
|
|
74df30456b | ||
|
|
df7b81b44d | ||
|
|
d513230d43 | ||
|
|
e8554cd1cb | ||
|
|
59b67f1cfa | ||
|
|
adeec6e369 | ||
|
|
9785b3f57f | ||
|
|
9c62391a5c | ||
|
|
59b97f26c0 | ||
|
|
19e28ce7b6 | ||
|
|
777038fe44 | ||
|
|
e867df5ef5 | ||
|
|
2ac530eb28 | ||
|
|
b8409d6600 | ||
|
|
1ac025bbc9 | ||
|
|
0c625185cb | ||
|
|
68463c9e87 | ||
|
|
d79f1b106c | ||
|
|
322d96bfe5 | ||
|
|
46b3b76d6d | ||
|
|
5cb8ce8174 | ||
|
|
fcef2ff6b2 | ||
|
|
1ffa793322 | ||
|
|
e58921aa8f | ||
|
|
c94020f7dc | ||
|
|
006af394ff | ||
|
|
289af65071 | ||
|
|
e06168f489 | ||
|
|
09d7fa2737 | ||
|
|
094ae82df5 | ||
|
|
092e2c8f2d | ||
|
|
04c408e986 | ||
|
|
cdb6d651e9 | ||
|
|
207a62a3c2 | ||
|
|
27afe7bc38 | ||
|
|
ffe6e6fe59 | ||
|
|
7fd82c0d3a | ||
|
|
f32a79189b | ||
|
|
be8fba9b6a | ||
|
|
98de06ff38 | ||
|
|
20f28eb9d6 | ||
|
|
ec7c3f45ba | ||
|
|
7067e8206c | ||
|
|
e2498af530 | ||
|
|
9f5b5708ff | ||
|
|
9fdd1a10c6 | ||
|
|
a194982117 | ||
|
|
820312a2b1 | ||
|
|
ec23ab6bc2 | ||
|
|
20af29294e | ||
|
|
b4ad3a3b4c | ||
|
|
8537533beb | ||
|
|
5c8bb029b5 | ||
|
|
13a1a7f826 | ||
|
|
83c87f822a | ||
|
|
54aec2b1ba | ||
|
|
b3a70b8284 | ||
|
|
351fc152e0 | ||
|
|
975bb8722f | ||
|
|
f73953a049 | ||
|
|
dff47c73b7 | ||
|
|
65b9dcfcca | ||
|
|
0771d57860 | ||
|
|
31fc29599f | ||
|
|
a0f9cb27f9 | ||
|
|
4faf7ea41e | ||
|
|
c449714c0a | ||
|
|
37fe04999c | ||
|
|
eda940106a | ||
|
|
28df7c2a96 | ||
|
|
2d294cca59 | ||
|
|
95ce1ba6ce | ||
|
|
4225a0bd75 | ||
|
|
0811000bb0 | ||
|
|
37c52ccaf4 | ||
|
|
cbe62f23ba | ||
|
|
53d871d0b1 | ||
|
|
b2ccdaaa2f | ||
|
|
b06e795a1e | ||
|
|
e640ff72f1 | ||
|
|
9a563a6aef | ||
|
|
30ed33fae0 | ||
|
|
645c24dc5f | ||
|
|
2f3ddd4884 | ||
|
|
26cf680468 | ||
|
|
4f0c11ee93 | ||
|
|
562d6bca08 | ||
|
|
21ee9a1fab | ||
|
|
df2a494a7c | ||
|
|
c5e0f527bc | ||
|
|
7afcca0c51 | ||
|
|
67ab49baa9 | ||
|
|
e5eca54951 | ||
|
|
c38a10e143 | ||
|
|
1f9d605988 | ||
|
|
fe0e199859 | ||
|
|
8aee209c53 | ||
|
|
28f8dc41b0 | ||
|
|
5df9fddaf2 | ||
|
|
41e00a0df7 | ||
|
|
488940502c | ||
|
|
5fe7ff24c2 | ||
|
|
ad5bfe3089 | ||
|
|
af263fe881 | ||
|
|
45f39e291e | ||
|
|
986a05e18d | ||
|
|
293e4dc77b | ||
|
|
5c4bd4f7c1 | ||
|
|
c63755c316 | ||
|
|
f299d2ca1a | ||
|
|
e7637885a6 | ||
|
|
463b957e54 | ||
|
|
f392913d00 | ||
|
|
bced9fb978 | ||
|
|
5bdd880142 | ||
|
|
9f348cf8a1 | ||
|
|
cad8553c01 | ||
|
|
1a28f0e5b3 | ||
|
|
48f371a46c | ||
|
|
043b28c725 | ||
|
|
ec2c82e800 | ||
|
|
01bc0c94ab | ||
|
|
9d1104d812 | ||
|
|
8b763175ff | ||
|
|
2ee81a5dbb | ||
|
|
185cdcd813 | ||
|
|
0dea3b71da | ||
|
|
a1076abcbf | ||
|
|
a7e29c38bc | ||
|
|
2460e0f51e | ||
|
|
0e1f0ade31 | ||
|
|
04dff34de4 | ||
|
|
66947ee5a2 | ||
|
|
9d0e1f8e68 | ||
|
|
63bf583b3c | ||
|
|
09fe346d18 | ||
|
|
59f20c1034 | ||
|
|
0ff85d00a4 | ||
|
|
fafece1af8 | ||
|
|
5bca3a5be2 | ||
|
|
fd175ad80b | ||
|
|
fa13bbc48a | ||
|
|
add2c4bf07 | ||
|
|
f33e687723 | ||
|
|
d3a1c36842 | ||
|
|
6db5c3801e | ||
|
|
189f85b24c | ||
|
|
7b264b0c23 | ||
|
|
8fe21ec707 | ||
|
|
fa842d6534 | ||
|
|
93e935bcaa | ||
|
|
0a519c93ef | ||
|
|
4b3d8eb573 | ||
|
|
05fa11b296 | ||
|
|
1794f3d46e | ||
|
|
59e1e84a92 | ||
|
|
b8bc62431a | ||
|
|
1720bddb4f | ||
|
|
2cedb3e883 | ||
|
|
83870aeb8d | ||
|
|
7af8857b68 | ||
|
|
c90752a8be | ||
|
|
0a9e1d64fe | ||
|
|
6588fcbfca | ||
|
|
3dc684634e | ||
|
|
d2f89b55b7 | ||
|
|
3d082ed6db | ||
|
|
c4229a1bba | ||
|
|
1b16118e17 | ||
|
|
7ad9fff7e5 | ||
|
|
293dda7169 | ||
|
|
ed9161a04f | ||
|
|
c612bb165e | ||
|
|
34c705988e | ||
|
|
7d9c4bebc0 | ||
|
|
b11c806d1c | ||
|
|
2a7e392bb3 | ||
|
|
0dca224ec3 | ||
|
|
8ef1fe2047 | ||
|
|
f599e1bd34 | ||
|
|
4deea604bf | ||
|
|
f6d31f5216 | ||
|
|
f6ac045933 | ||
|
|
1df87807fd | ||
|
|
0076422350 | ||
|
|
f1441a598f | ||
|
|
fa236c5a84 | ||
|
|
d3de110070 | ||
|
|
31886aef3d | ||
|
|
3300546eab | ||
|
|
0f8ffc7df9 | ||
|
|
192db0f0b1 | ||
|
|
f443fad56d | ||
|
|
77874d6055 | ||
|
|
7ca49290b6 | ||
|
|
e4e9dfb785 | ||
|
|
731d189fa9 | ||
|
|
4be4645142 | ||
|
|
4e55b67a44 | ||
|
|
8f5e822565 | ||
|
|
cad90c7ac1 | ||
|
|
a48510573e | ||
|
|
8afa7ef837 | ||
|
|
1d7b5cb83c | ||
|
|
18e55d14c6 | ||
|
|
8122191368 | ||
|
|
6bf46c7bf5 | ||
|
|
9fa3f1dc57 | ||
|
|
eabb8762ee | ||
|
|
fd53f86c84 |
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -1,46 +0,0 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
It would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html).
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Logs**
|
||||
Check if any warnings or errors were logged by VictoriaMetrics components
|
||||
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
|
||||
|
||||
**Version**
|
||||
The line returned when passing `--version` command line flag to the binary. For example:
|
||||
```
|
||||
$ ./victoria-metrics-prod --version
|
||||
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
|
||||
```
|
||||
|
||||
**Used command-line flags**
|
||||
Please provide the command-line flags used for running VictoriaMetrics and its components.
|
||||
86
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
86
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
name: Bug report
|
||||
description: Create a report to help us improve
|
||||
labels: [bug]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
|
||||
- type: textarea
|
||||
id: describe-the-bug
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: |
|
||||
A clear and concise description of what the bug is.
|
||||
placeholder: |
|
||||
When I do `A` VictoriaMetrics does `B`. I expect it to do `C`.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: to-reproduce
|
||||
attributes:
|
||||
label: To Reproduce
|
||||
description: |
|
||||
Steps to reproduce the behavior.
|
||||
If reproducing an issue requires some specific configuration file, please paste it here.
|
||||
placeholder: |
|
||||
Steps to reproduce the behavior.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Version
|
||||
description: |
|
||||
The line returned when passing `--version` command line flag to the binary. For example:
|
||||
```
|
||||
$ ./victoria-metrics-prod --version
|
||||
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Logs
|
||||
description: |
|
||||
Check if any warnings or errors were logged by VictoriaMetrics components
|
||||
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: screenshots
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: |
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: flags
|
||||
attributes:
|
||||
label: Used command-line flags
|
||||
description: |
|
||||
Please provide the command-line flags used for running VictoriaMetrics and its components.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional information
|
||||
placeholder: |
|
||||
Additional information that doesn't fit elsewhere
|
||||
validations:
|
||||
required: false
|
||||
5
.github/ISSUE_TEMPLATE/configuration.yml
vendored
Normal file
5
.github/ISSUE_TEMPLATE/configuration.yml
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Ask on Slack
|
||||
url: https://slack.victoriametrics.com/
|
||||
about: You can ask for help here!
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -1,20 +0,0 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
43
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
43
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
name: Feature request
|
||||
description: Suggest an idea for this project
|
||||
labels: [enhancement]
|
||||
body:
|
||||
- type: textarea
|
||||
id: describe-the-problem
|
||||
attributes:
|
||||
label: Is your feature request related to a problem? Please describe
|
||||
description: |
|
||||
A clear and concise description of what the problem is.
|
||||
placeholder: |
|
||||
Ex. I'm always frustrated when [...]
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: describe-the-solution
|
||||
attributes:
|
||||
label: Describe the solution you'd like
|
||||
description: |
|
||||
A clear and concise description of what you want to happen.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: alternative-solutions
|
||||
attributes:
|
||||
label: Describe alternatives you've considered
|
||||
description: |
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
placeholder: |
|
||||
I have tried to do `A`, but that doesn't solve a problem completely.
|
||||
I have tried to do `A` and `B`, but implementing this would be better.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: feature-additional-info
|
||||
attributes:
|
||||
label: Additional information
|
||||
description: |
|
||||
Additional information which you consider helpful for implementing this feature.
|
||||
placeholder: |
|
||||
Add any other context or screenshots about the feature request here.
|
||||
validations:
|
||||
required: false
|
||||
2
.github/workflows/check-licenses.yml
vendored
2
.github/workflows/check-licenses.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.4
|
||||
go-version: 1.21.0
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
46
.github/workflows/codeql-analysis-js.yml
vendored
Normal file
46
.github/workflows/codeql-analysis-js.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
name: "CodeQL - JS"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
schedule:
|
||||
- cron: "30 18 * * 2"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ["javascript"]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
with:
|
||||
category: "javascript"
|
||||
84
.github/workflows/codeql-analysis.yml
vendored
84
.github/workflows/codeql-analysis.yml
vendored
@@ -13,12 +13,26 @@ name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master, cluster ]
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ master, cluster ]
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
schedule:
|
||||
- cron: '30 18 * * 2'
|
||||
- cron: "30 18 * * 2"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
@@ -32,45 +46,47 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'go', 'javascript' ]
|
||||
language: ["go"]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.19
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
|
||||
95
.github/workflows/main.yml
vendored
95
.github/workflows/main.yml
vendored
@@ -1,45 +1,96 @@
|
||||
name: main
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '**.md'
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '**.md'
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
lint:
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.4
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
make install-golint
|
||||
make install-errcheck
|
||||
make install-golangci-lint
|
||||
- name: Build
|
||||
run: |
|
||||
export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
make test-full
|
||||
make test-pure
|
||||
make test-full-386
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
matrix:
|
||||
scenario: ["test-full", "test-pure", "test-full-386"]
|
||||
name: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
build:
|
||||
needs: test
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.20.1
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
||||
28
.github/workflows/nightly-build.yml
vendored
28
.github/workflows/nightly-build.yml
vendored
@@ -12,13 +12,37 @@ jobs:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.4
|
||||
go-version: 1.20.1
|
||||
id: go
|
||||
|
||||
- name: Setup docker scan
|
||||
run: |
|
||||
mkdir -p ~/.docker/cli-plugins && \
|
||||
curl https://github.com/docker/scan-cli-plugin/releases/latest/download/docker-scan_linux_amd64 -L -s -S -o ~/.docker/cli-plugins/docker-scan &&\
|
||||
chmod +x ~/.docker/cli-plugins/docker-scan
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
- name: Publish
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: build & publish
|
||||
run: |
|
||||
docker scan --severity=medium --login --token "$SNYK_TOKEN" --accept-license
|
||||
LATEST_TAG=nightly PKG_TAG=nightly make publish
|
||||
env:
|
||||
SNYK_TOKEN: ${{ secrets.SNYK_AUTH_TOKEN }}
|
||||
|
||||
15
.golangci.yml
Normal file
15
.golangci.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
run:
|
||||
timeout: 2m
|
||||
|
||||
enable:
|
||||
- revive
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
- linters:
|
||||
- staticcheck
|
||||
text: "SA(4003|1019|5011):"
|
||||
|
||||
linters-settings:
|
||||
errcheck:
|
||||
exclude: ./errcheck_excludes.txt
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2022 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2023 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
39
Makefile
39
Makefile
@@ -144,6 +144,7 @@ vmutils-windows-amd64: \
|
||||
vmctl-windows-amd64
|
||||
|
||||
victoria-metrics-crossbuild: \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-amd64 \
|
||||
victoria-metrics-linux-arm64 \
|
||||
victoria-metrics-linux-arm \
|
||||
@@ -155,6 +156,7 @@ victoria-metrics-crossbuild: \
|
||||
victoria-metrics-openbsd-amd64
|
||||
|
||||
vmutils-crossbuild: \
|
||||
vmutils-linux-386 \
|
||||
vmutils-linux-amd64 \
|
||||
vmutils-linux-arm64 \
|
||||
vmutils-linux-arm \
|
||||
@@ -167,16 +169,17 @@ vmutils-crossbuild: \
|
||||
vmutils-windows-amd64
|
||||
|
||||
publish-release:
|
||||
git checkout $(TAG) && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release publish
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-386 \
|
||||
release-victoria-metrics-linux-amd64 \
|
||||
release-victoria-metrics-linux-arm \
|
||||
release-victoria-metrics-linux-arm64 \
|
||||
@@ -185,6 +188,10 @@ release-victoria-metrics: \
|
||||
release-victoria-metrics-freebsd-amd64 \
|
||||
release-victoria-metrics-openbsd-amd64
|
||||
|
||||
# adds i386 arch
|
||||
release-victoria-metrics-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
release-victoria-metrics-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
@@ -216,6 +223,7 @@ release-victoria-metrics-goos-goarch: victoria-metrics-$(GOOS)-$(GOARCH)-prod
|
||||
cd bin && rm -rf victoria-metrics-$(GOOS)-$(GOARCH)-prod
|
||||
|
||||
release-vmutils: \
|
||||
release-vmutils-linux-386 \
|
||||
release-vmutils-linux-amd64 \
|
||||
release-vmutils-linux-arm64 \
|
||||
release-vmutils-linux-arm \
|
||||
@@ -225,6 +233,9 @@ release-vmutils: \
|
||||
release-vmutils-openbsd-amd64 \
|
||||
release-vmutils-windows-amd64
|
||||
|
||||
release-vmutils-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
release-vmutils-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
@@ -315,21 +326,7 @@ vet:
|
||||
go vet ./lib/...
|
||||
go vet ./app/...
|
||||
|
||||
lint: install-golint
|
||||
golint lib/...
|
||||
golint app/...
|
||||
|
||||
install-golint:
|
||||
which golint || go install golang.org/x/lint/golint@latest
|
||||
|
||||
errcheck: install-errcheck
|
||||
errcheck -exclude=errcheck_excludes.txt ./lib/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/...
|
||||
|
||||
install-errcheck:
|
||||
which errcheck || go install github.com/kisielk/errcheck@latest
|
||||
|
||||
check-all: fmt vet lint errcheck golangci-lint govulncheck
|
||||
check-all: fmt vet golangci-lint govulncheck
|
||||
|
||||
test:
|
||||
go test ./lib/... ./app/...
|
||||
@@ -380,10 +377,10 @@ install-qtc:
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m
|
||||
golangci-lint run
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.48.0
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.51.1
|
||||
|
||||
govulncheck: install-govulncheck
|
||||
govulncheck ./...
|
||||
|
||||
180
README.md
180
README.md
@@ -40,19 +40,36 @@ VictoriaMetrics has the following prominent features:
|
||||
* It can be used as a drop-in replacement for Prometheus in Grafana, because it supports [Prometheus querying API](#prometheus-querying-api-usage).
|
||||
* It can be used as a drop-in replacement for Graphite in Grafana, because it supports [Graphite API](#graphite-api-usage).
|
||||
* It features easy setup and operation:
|
||||
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
|
||||
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d)
|
||||
without external dependencies.
|
||||
* All the configuration is done via explicit command-line flags with reasonable defaults.
|
||||
* All the data is stored in a single directory pointed by `-storageDataPath` command-line flag.
|
||||
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) to S3 or GCS can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools. See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
* It implements PromQL-based query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
|
||||
can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
* It implements PromQL-like query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* It provides global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics. Later this data may be queried via a single query.
|
||||
* It provides high performance and good vertical and horizontal scalability for both [data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4). It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f) when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
|
||||
* It provides high performance and good vertical and horizontal scalability for both
|
||||
[data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
|
||||
and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
|
||||
It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893)
|
||||
and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
|
||||
when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
|
||||
* It is optimized for time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate).
|
||||
* It provides high data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) may be crammed into limited storage comparing to TimescaleDB and [up to 7x less storage space is required compared to Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
|
||||
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
|
||||
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB. See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae), [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683) and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* It provides high data compression, so up to 70x more data points may be stored into limited storage comparing to TimescaleDB
|
||||
according to [these benchmarks](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
|
||||
and up to 7x less storage space is required compared to Prometheus, Thanos or Cortex
|
||||
according to [this benchmark](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
|
||||
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc).
|
||||
See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
|
||||
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
|
||||
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
|
||||
[comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
|
||||
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
|
||||
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to
|
||||
[the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* It supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
|
||||
* [Metrics scraping from Prometheus exporters](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* [Prometheus remote write API](#prometheus-setup).
|
||||
@@ -65,11 +82,15 @@ VictoriaMetrics has the following prominent features:
|
||||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* [Native binary format](#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
|
||||
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
|
||||
* It supports metrics [relabeling](#relabeling).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and
|
||||
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data
|
||||
and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
|
||||
* It has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/) and [Google Filestore](https://cloud.google.com/filestore).
|
||||
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/)
|
||||
and [Google Filestore](https://cloud.google.com/filestore).
|
||||
|
||||
See also [various Articles about VictoriaMetrics](https://docs.victoriametrics.com/Articles.html).
|
||||
|
||||
@@ -84,7 +105,7 @@ Case studies:
|
||||
* [Brandwatch](https://docs.victoriametrics.com/CaseStudies.html#brandwatch)
|
||||
* [CERN](https://docs.victoriametrics.com/CaseStudies.html#cern)
|
||||
* [COLOPL](https://docs.victoriametrics.com/CaseStudies.html#colopl)
|
||||
* [Dreamteam](https://docs.victoriametrics.com/CaseStudies.html#dreamteam)
|
||||
* [Dig Security](https://docs.victoriametrics.com/CaseStudies.html#dig-security)
|
||||
* [Fly.io](https://docs.victoriametrics.com/CaseStudies.html#flyio)
|
||||
* [German Research Center for Artificial Intelligence](https://docs.victoriametrics.com/CaseStudies.html#german-research-center-for-artificial-intelligence)
|
||||
* [Grammarly](https://docs.victoriametrics.com/CaseStudies.html#grammarly)
|
||||
@@ -271,6 +292,8 @@ Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](
|
||||
VictoriaMetrics provides UI for query troubleshooting and exploration. The UI is available at `http://victoriametrics:8428/vmui`.
|
||||
The UI allows exploring query results via graphs and tables.
|
||||
It also provides the following features:
|
||||
|
||||
- [metrics explorer](#metrics-explorer)
|
||||
- [cardinality explorer](#cardinality-explorer)
|
||||
- [query tracer](#query-tracing)
|
||||
- [top queries explorer](#top-queries)
|
||||
@@ -306,9 +329,21 @@ See the [example VMUI at VictoriaMetrics playground](https://play.victoriametric
|
||||
* queries with the biggest average execution duration;
|
||||
* queries that took the most summary time for execution.
|
||||
|
||||
## Metrics explorer
|
||||
|
||||
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
|
||||
|
||||
1. Open the `vmui` at `http://victoriametrics:8428/vmui/`.
|
||||
2. Click the `Explore metrics` tab.
|
||||
3. Select the `job` you want to explore.
|
||||
4. Optionally select the `instance` for the selected job to explore.
|
||||
5. Select metrics you want to explore and compare.
|
||||
|
||||
It is possible to change the selected time range for the graphs in the top right corner.
|
||||
|
||||
## Cardinality explorer
|
||||
|
||||
VictoriaMetrics provides an ability to explore time series cardinality at `cardinality` tab in [vmui](#vmui) in the following ways:
|
||||
VictoriaMetrics provides an ability to explore time series cardinality at `Explore cardinality` tab in [vmui](#vmui) in the following ways:
|
||||
|
||||
- To identify metric names with the highest number of series.
|
||||
- To identify labels with the highest number of series.
|
||||
@@ -363,7 +398,7 @@ DataDog agent allows configuring destinations for metrics sending via ENV variab
|
||||
or via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) in section `dd_url`.
|
||||
|
||||
<p align="center">
|
||||
<img src="Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
|
||||
</p>
|
||||
|
||||
To configure DataDog agent via ENV variable add the following prefix:
|
||||
@@ -397,7 +432,7 @@ DataDog allows configuring [Dual Shipping](https://docs.datadoghq.com/agent/guid
|
||||
sending via ENV variable `DD_ADDITIONAL_ENDPOINTS` or via configuration file `additional_endpoints`.
|
||||
|
||||
<p align="center">
|
||||
<img src="Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
|
||||
</p>
|
||||
|
||||
Run DataDog using the following ENV variable with VictoriaMetrics as additional metrics receiver:
|
||||
@@ -701,8 +736,7 @@ VictoriaMetrics accepts optional `extra_label=<label_name>=<label_value>` query
|
||||
VictoriaMetrics accepts optional `extra_filters[]=series_selector` query arg, which can be used for enforcing arbitrary label filters for queries. For example,
|
||||
`/api/v1/query_range?extra_filters[]={env=~"prod|staging",user="xyz"}&query=<query>` would automatically add `{env=~"prod|staging",user="xyz"}` label filters to the given `<query>`. This functionality can be used for limiting the scope of time series visible to the given tenant. It is expected that the `extra_filters[]` query args are automatically set by auth proxy sitting in front of VictoriaMetrics. See [vmauth](https://docs.victoriametrics.com/vmauth.html) and [vmgateway](https://docs.victoriametrics.com/vmgateway.html) as examples of such proxies.
|
||||
|
||||
VictoriaMetrics accepts relative times in `time`, `start` and `end` query args additionally to unix timestamps and [RFC3339](https://www.ietf.org/rfc/rfc3339.txt).
|
||||
For example, the following query would return data for the last 30 minutes: `/api/v1/query_range?start=-30m&query=...`.
|
||||
VictoriaMetrics accepts multiple formats for `time`, `start` and `end` query args - see [these docs](#timestamp-formats).
|
||||
|
||||
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
|
||||
|
||||
@@ -727,6 +761,18 @@ Additionally, VictoriaMetrics provides the following handlers:
|
||||
For example, request to `/api/v1/status/top_queries?topN=5&maxLifetime=30s` would return up to 5 queries per list, which were executed during the last 30 seconds.
|
||||
VictoriaMetrics tracks the last `-search.queryStats.lastQueriesCount` queries with durations at least `-search.queryStats.minQueryDuration`.
|
||||
|
||||
### Timestamp formats
|
||||
|
||||
VictoriaMetrics accepts the following formats for `time`, `start` and `end` query args
|
||||
in [query APIs](https://docs.victoriametrics.com/#prometheus-querying-api-usage) and
|
||||
in [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series).
|
||||
|
||||
- Unix timestamps in seconds with optional milliseconds after the point. For example, `1562529662.678`.
|
||||
- [RFC3339](https://www.ietf.org/rfc/rfc3339.txt). For example, '2022-03-29T01:02:03Z`.
|
||||
- Partial RFC3339. Examples: `2022`, `2022-03`, `2022-03-29`, `2022-03-29T01`, `2022-03-29T01:02`.
|
||||
- Relative duration comparing to the current time. For example, `1h5m` means `one hour and five minutes ago`.
|
||||
|
||||
|
||||
## Graphite API usage
|
||||
|
||||
VictoriaMetrics supports data ingestion in Graphite protocol - see [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details.
|
||||
@@ -946,12 +992,13 @@ Each JSON line contains samples for a single time series. An example output:
|
||||
{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
|
||||
```
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
|
||||
Optional `max_rows_per_line` arg may be added to the request for limiting the maximum number of rows exported per each JSON line.
|
||||
@@ -994,12 +1041,13 @@ where:
|
||||
* `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
|
||||
for metrics to export.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
|
||||
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
||||
@@ -1021,12 +1069,13 @@ wget -O- -q 'http://your_victoriametrics_instance:8428/api/v1/series/count' | jq
|
||||
# relaunch victoriametrics with search.maxExportSeries more than value from previous command
|
||||
```
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
|
||||
The exported data can be imported to VictoriaMetrics via [/api/v1/import/native](#how-to-import-data-in-native-format).
|
||||
@@ -1037,7 +1086,9 @@ The [deduplication](#deduplication) isn't applied for the data exported in nativ
|
||||
|
||||
## How to import time series data
|
||||
|
||||
Time series data can be imported into VictoriaMetrics via any supported data ingestion protocol:
|
||||
VictoriaMetrics can discover and scrape metrics from Prometheus-compatible targets (aka "pull" protocol) -
|
||||
see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
Additionally, VictoriaMetrics can accept metrics via the following popular data ingestion protocols (aka "push" protocols):
|
||||
|
||||
* [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). See [these docs](#prometheus-setup) for details.
|
||||
* DataDog `submit metrics` API. See [these docs](#how-to-send-data-from-datadog-agent) for details.
|
||||
@@ -1258,11 +1309,12 @@ VictoriaMetrics exports [Prometheus-compatible federation data](https://promethe
|
||||
at `http://<victoriametrics-addr>:8428/federate?match[]=<timeseries_selector_for_federation>`.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval.
|
||||
`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48+00:00' -d 'end=2022-06-06T19:29:07+00:00'
|
||||
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=2022-06-06T19:25:48' -d 'end=2022-06-06T19:29:07'
|
||||
```
|
||||
By default, the last point on the interval `[now - max_lookback ... now]` is scraped for each time series. The default value for `max_lookback` is `5m` (5 minutes), but it can be overridden with `max_lookback` query arg.
|
||||
For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation
|
||||
@@ -1305,6 +1357,7 @@ By default VictoriaMetrics is tuned for an optimal resource usage under typical
|
||||
- `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
|
||||
- `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
- `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
|
||||
- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
|
||||
- `-search.maxSeries` limits the number of time series, which may be returned from [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers). This endpoint is used mostly by Grafana for auto-completion of metric names, label names and label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxSeries` to quite low value in order limit CPU and memory usage.
|
||||
- `-search.maxTagKeys` limits the number of items, which may be returned from [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names). This endpoint is used mostly by Grafana for auto-completion of label names. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagKeys` to quite low value in order to limit CPU and memory usage.
|
||||
- `-search.maxTagValues` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage.
|
||||
@@ -1426,8 +1479,8 @@ This increases overhead during data querying, since VictoriaMetrics needs to rea
|
||||
bigger number of parts per each request. That's why it is recommended to have at least 20%
|
||||
of free disk space under directory pointed by `-storageDataPath` command-line flag.
|
||||
|
||||
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [the dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176).
|
||||
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [the dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/).
|
||||
See more details in [monitoring docs](#monitoring).
|
||||
|
||||
See [this article](https://valyala.medium.com/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) for more details.
|
||||
@@ -1565,7 +1618,9 @@ VictoriaMetrics provides the following security-related command-line flags:
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
|
||||
|
||||
VictoriaMetrics has achieved security certifications for Database Software Development and Software-Based Monitoring Services. We apply strict security measures in everything we do. See our [Security page](https://victoriametrics.com/security/) for more details.
|
||||
See also [security recommendation for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#security)
|
||||
and [the general security page at VictoriaMetrics website](https://victoriametrics.com/security/).
|
||||
|
||||
|
||||
## Tuning
|
||||
|
||||
@@ -1594,8 +1649,8 @@ Alternatively, single-node VictoriaMetrics can self-scrape the metrics when `-se
|
||||
set to duration greater than 0. For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page
|
||||
with 10 seconds interval.
|
||||
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/dashboards/10229)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176) VictoriaMetrics.
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/) VictoriaMetrics.
|
||||
See an [alternative dashboard for clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11831)
|
||||
created by community.
|
||||
|
||||
@@ -1844,8 +1899,8 @@ The following metrics for each type of cache are exported at [`/metrics` page](#
|
||||
* `vm_cache_misses_total` - the number of cache misses
|
||||
* `vm_cache_entries` - the number of entries in the cache
|
||||
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
contain `Caches` section with cache metrics visualized. The panels show the current
|
||||
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
|
||||
then cache efficiency is already very high and does not need any tuning.
|
||||
@@ -2121,7 +2176,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-graphiteListenAddr string
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol
|
||||
-graphiteListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -graphiteListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-graphiteTrimTimestamp duration
|
||||
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-http.connTimeout duration
|
||||
@@ -2141,7 +2198,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections (default ":8428")
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8428")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
|
||||
@@ -2154,7 +2213,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-influxDBLabel string
|
||||
Default label for the DB name sent over '?db={db_name}' query parameter (default "db")
|
||||
-influxListenAddr string
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write . See also -influxListenAddr.useProxyProtocol
|
||||
-influxListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -influxListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-influxMeasurementFieldSeparator string
|
||||
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
|
||||
-influxSkipMeasurement
|
||||
@@ -2166,7 +2227,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-inmemoryDataFlushInterval duration
|
||||
The interval for guaranteed saving of in-memory data to disk. The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). Smaller intervals increase disk IO load. Minimum supported value is 1s (default 5s)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500)
|
||||
-logNewSeries
|
||||
Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics
|
||||
-loggerDisableTimestamps
|
||||
@@ -2175,6 +2238,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -2184,7 +2249,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
@@ -2200,9 +2265,13 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
|
||||
-opentsdbHTTPListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbListenAddr string
|
||||
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for OpenTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol
|
||||
-opentsdbListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-opentsdbhttp.maxInsertRequestSize size
|
||||
@@ -2270,6 +2339,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
How frequently to reload the full state from Kubernetes API server (default 30m0s)
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs for details (default 30s)
|
||||
-promscrape.kumaSDCheckInterval duration
|
||||
Interval for checking for changes in kuma service discovery. This works only if kuma_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kuma_sd_configs for details (default 30s)
|
||||
-promscrape.maxDroppedTargets int
|
||||
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
|
||||
-promscrape.maxResponseHeadersSize size
|
||||
@@ -2283,6 +2354,10 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 1000000)
|
||||
-promscrape.noStaleMarkers
|
||||
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
|
||||
-promscrape.nomad.waitTime duration
|
||||
Wait time used by Nomad service discovery. Default value is used if not set
|
||||
-promscrape.nomadSDCheckInterval duration
|
||||
Interval for checking for changes in Nomad. This works only if nomad_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#nomad_sd_configs for details (default 30s)
|
||||
-promscrape.openstackSDCheckInterval duration
|
||||
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#openstack_sd_configs for details (default 30s)
|
||||
-promscrape.seriesLimitPerTarget int
|
||||
@@ -2327,8 +2402,11 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The interval between datapoints stored in the database. It is used at Graphite Render API handler for normalizing the interval between datapoints in case it isn't normalized. It can be overridden by sending 'storage_step' query arg to /render API or by sending the desired interval via 'Storage-Step' http header during querying /render API. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 10s)
|
||||
-search.latencyOffset duration
|
||||
The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
|
||||
-search.logQueryMemoryUsage size
|
||||
Log queries, which require more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.logSlowQueryDuration duration
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging (default 5s)
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
|
||||
-search.maxConcurrentRequests int
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration and -search.maxMemoryPerQuery (default 8)
|
||||
-search.maxExportDuration duration
|
||||
@@ -2342,7 +2420,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.maxLookback duration
|
||||
Synonym to -search.lookback-delta from Prometheus. The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. See also '-search.maxStalenessInterval' flag, which has the same meaining due to historical reasons
|
||||
-search.maxMemoryPerQuery size
|
||||
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests
|
||||
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests . See also -search.logQueryMemoryUsage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.maxPointsPerTimeseries int
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph (default 30000)
|
||||
@@ -2361,6 +2439,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The maximum number of raw samples a single query can scan per each time series. This option allows limiting memory usage (default 30000000)
|
||||
-search.maxSeries int
|
||||
The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage (default 30000)
|
||||
-search.maxSeriesPerAggrFunc int
|
||||
The maximum number of time series an aggregate MetricsQL function can generate (default 1000000)
|
||||
-search.maxStalenessInterval duration
|
||||
The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.setLookbackToStep' flag
|
||||
-search.maxStatusRequestDuration duration
|
||||
@@ -2427,6 +2507,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000)
|
||||
-storageDataPath string
|
||||
Path to storage data (default "victoria-metrics-data")
|
||||
-streamAggr.config string
|
||||
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html . See also -remoteWrite.streamAggr.keepInput and -streamAggr.dedupInterval
|
||||
-streamAggr.dedupInterval duration
|
||||
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero
|
||||
-streamAggr.keepInput
|
||||
Whether to keep input samples after the aggregation with -streamAggr.config. By default the input is dropped after the aggregation, so only the aggregate data is stored. See https://docs.victoriametrics.com/stream-aggregation.html
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
@@ -2444,4 +2530,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Show VictoriaMetrics version
|
||||
-vmalert.proxyURL string
|
||||
Optional URL for proxying requests to vmalert. For example, if -vmalert.proxyURL=http://vmalert:8880 , then alerting API requests such as /api/v1/rules from Grafana will be proxied to http://vmalert:8880/api/v1/rules
|
||||
-vmui.customDashboardsPath string
|
||||
Optional path to vmui dashboards. See https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmui/packages/vmui/public/dashboards
|
||||
```
|
||||
|
||||
@@ -24,7 +24,9 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
|
||||
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only -promscrape.config and then exit. "+
|
||||
@@ -64,7 +66,7 @@ func main() {
|
||||
vminsert.Init()
|
||||
startSelfScraper()
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started VictoriaMetrics in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
@@ -90,7 +92,7 @@ func main() {
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != "GET" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
|
||||
@@ -129,10 +129,10 @@ func setUp() {
|
||||
storagePath = filepath.Join(os.TempDir(), testStorageSuffix)
|
||||
processFlags()
|
||||
logger.Init()
|
||||
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsert.Init()
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
go httpserver.Serve(*httpListenAddr, false, requestHandler)
|
||||
readyStorageCheckFunc := func() bool {
|
||||
resp, err := http.Get(testHealthHTTPPath)
|
||||
if err != nil {
|
||||
@@ -189,10 +189,8 @@ func tearDown() {
|
||||
|
||||
func TestWriteRead(t *testing.T) {
|
||||
t.Run("write", testWrite)
|
||||
vmstorage.Storage.DebugFlush()
|
||||
time.Sleep(1 * time.Second)
|
||||
vmstorage.Stop()
|
||||
// open storage after stop in write
|
||||
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
|
||||
t.Run("read", testRead)
|
||||
}
|
||||
|
||||
@@ -261,7 +259,7 @@ func testRead(t *testing.T) {
|
||||
for _, q := range test.Query {
|
||||
q = testutil.PopulateTimeTplString(q, insertionTime)
|
||||
if test.Issue != "" {
|
||||
test.Issue = "Regression in " + test.Issue
|
||||
test.Issue = "\nRegression in " + test.Issue
|
||||
}
|
||||
switch true {
|
||||
case strings.HasPrefix(q, "/api/v1/export"):
|
||||
@@ -284,7 +282,7 @@ func testRead(t *testing.T) {
|
||||
queryResult := Query{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryResult(queryResult, test.ResultQuery); err != nil {
|
||||
t.Fatalf("Query. %s fails with error %s.%s", q, err, test.Issue)
|
||||
t.Fatalf("Query. %s fails with error: %s.%s", q, err, test.Issue)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported read query %s", q)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -12,7 +12,7 @@ func TestPopulateTimeTplString(t *testing.T) {
|
||||
}
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
result := PopulateTimeTplString(s, now)
|
||||
result := PopulateTimeTplString(s, now.UTC())
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result; got %q; want %q", result, resultExpected)
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"forms_daily_count;item=x 2 {TIME_S-2m}",
|
||||
"forms_daily_count;item=y 3 {TIME_S-1m}",
|
||||
"forms_daily_count;item=y 4 {TIME_S-2m}"],
|
||||
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}"],
|
||||
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}&latency_offset=1ms"],
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"vector","result":[{"metric":{"item":"x"},"value":["{TIME_S-1m}","2"]},{"metric":{"item":"y"},"value":["{TIME_S-1m}","4"]}]}
|
||||
|
||||
@@ -24,8 +24,10 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
|
||||
see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
|
||||
* Can accept data via all the ingestion protocols supported by VictoriaMetrics - see [these docs](#how-to-push-data-to-vmagent).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems -
|
||||
see [these docs](#replication-and-high-availability).
|
||||
* Can aggregate incoming samples by time and by labels before sending them to remote storage - see [these docs](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
* Can replicate collected metrics simultaneously to multiple Prometheus-compatible remote storage systems - see [these docs](#replication-and-high-availability).
|
||||
* Can save egress network bandwidth usage costs when [VictoriaMetrics remote write protocol](#victoriametrics-remote-write-protocol)
|
||||
is used for sending the data to VictoriaMetrics.
|
||||
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as the connection
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
@@ -45,14 +47,15 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
|
||||
|
||||
Please download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) (
|
||||
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags)),
|
||||
unpack it and pass the following flags to the `vmagent` binary in order to start scraping Prometheus-compatible targets:
|
||||
unpack it and pass the following flags to the `vmagent` binary in order to start scraping Prometheus-compatible targets
|
||||
and sending the data to the Prometheus-compatible remote storage:
|
||||
|
||||
* `-promscrape.config` with the path to Prometheus config file (usually located at `/etc/prometheus/prometheus.yml`).
|
||||
* `-promscrape.config` with the path to [Prometheus config file](https://docs.victoriametrics.com/sd_configs.html) (usually located at `/etc/prometheus/prometheus.yml`).
|
||||
The path can point either to local file or to http url. `vmagent` doesn't support some sections of Prometheus config file,
|
||||
so you may need either to delete these sections or to run `vmagent` with `-promscrape.config.strictParse=false` command-line flag.
|
||||
In this case `vmagent` ignores unsupported sections. See [the list of unsupported sections](#unsupported-prometheus-config-sections).
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics, the `-remoteWrite.url` argument can be specified
|
||||
multiple times to replicate data concurrently to an arbitrary number of remote storage systems. See [various use cases](#use-cases).
|
||||
* `-remoteWrite.url` with Prometheus-compatible remote storage endpoint such as VictoriaMetrics, the `-remoteWrite.url` argument can be specified
|
||||
multiple times to replicate data concurrently to multiple remote storage systems. See [various use cases](#use-cases).
|
||||
|
||||
Example command line:
|
||||
|
||||
@@ -73,6 +76,8 @@ and sending it to the provided `-remoteWrite.url`:
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
`vmagent` can save network bandwidth usage costs under high load when [VictoriaMetrics remote write protocol is enabled](#victoriametrics-remote-write-protocol).
|
||||
|
||||
See [troubleshooting docs](#troubleshooting) if you encounter common issues with `vmagent`.
|
||||
|
||||
Pass `-help` to `vmagent` in order to see [the full list of supported command-line flags with their descriptions](#advanced-usage).
|
||||
@@ -118,7 +123,8 @@ data to the remote storage. It re-tries sending the data to remote storage until
|
||||
The maximum on-disk size for the buffered metrics can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
`vmagent` works on various architectures from the IoT world - 32-bit arm, 64-bit arm, ppc64, 386, amd64.
|
||||
See [the corresponding Makefile rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/Makefile) for details.
|
||||
|
||||
The `vmagent` can save network bandwidth usage costs by using [VictoriaMetrics remote write protocol](#victoriametrics-remote-write-protocol).
|
||||
|
||||
### Drop-in replacement for Prometheus
|
||||
|
||||
@@ -126,6 +132,12 @@ If you use Prometheus only for scraping metrics from various targets and forward
|
||||
then `vmagent` can replace Prometheus. Typically, `vmagent` requires lower amounts of RAM, CPU and network bandwidth compared with Prometheus.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
### Statsd alternative
|
||||
|
||||
`vmagent` can be used as an alternative to [statsd](https://github.com/statsd/statsd)
|
||||
when [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) is enabled.
|
||||
See [these docs](https://docs.victoriametrics.com/stream-aggregation.html#statsd-alternative) for details.
|
||||
|
||||
### Flexible metrics relay
|
||||
|
||||
`vmagent` can accept metrics in [various popular data ingestion protocols](#how-to-push-data-to-vmagent), apply [relabeling](#relabeling)
|
||||
@@ -167,6 +179,28 @@ the `-remoteWrite.url` command-line flag should be configured as `<schema>://<vm
|
||||
according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format).
|
||||
There is also support for multitenant writes. See [these docs](#multitenancy).
|
||||
|
||||
## VictoriaMetrics remote write protocol
|
||||
|
||||
`vmagent` supports sending data to the configured `-remoteWrite.url` either via Prometheus remote write protocol
|
||||
or via VictoriaMetrics remote write protocol.
|
||||
|
||||
VictoriaMetrics remote write protocol provides the following benefits comparing to Prometheus remote write protocol:
|
||||
|
||||
- Reduced network bandwidth usage by 2x-5x. This allows saving network bandwidth usage costs when `vmagent` and
|
||||
the configured remote storage systems are located in different datacenters, availability zones or regions.
|
||||
|
||||
- Reduced disk read/write IO and disk space usage at `vmagent` when the remote storage is temporarily unavailable.
|
||||
In this case `vmagent` buffers the incoming data to disk using the VictoriaMetrics remote write format.
|
||||
This reduces disk read/write IO and disk space usage by 2x-5x comparing to Prometheus remote write format.
|
||||
|
||||
`vmagent` automatically uses VictoriaMetrics remote write protocol when it sends data to VictoriaMetrics components such as other `vmagent` instances,
|
||||
[single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html)
|
||||
or `vminsert` at [cluster version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
|
||||
`vmagent` automatically switches to Prometheus remote write protocol when it sends data to old versions of VictoriaMetrics components
|
||||
or to other Prometheus-compatible remote storage systems. It is possible to force switch to Prometheus remote write protocol
|
||||
by specifying `-remoteWrite.forcePromProto` command-line flag for the corresponding `-remoteWrite.url`.
|
||||
|
||||
## Multitenancy
|
||||
|
||||
By default `vmagent` collects the data without tenant identifiers and routes it to the configured `-remoteWrite.url`.
|
||||
@@ -316,7 +350,7 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
|
||||
## Automatically generated metrics
|
||||
|
||||
`vmagent` automatically generates the following metrics per each scrape of every [Prometheus-compatible target](#how-to-collect-metrics-in-prometheus-format)
|
||||
and attaches target-specific `instance` and `job` labels to these metrics:
|
||||
and attaches `instance`, `job` and other target-specific labels to these metrics:
|
||||
|
||||
* `up` - this metric exposes `1` value on successful scrape and `0` value on unsuccessful scrape. This allows monitoring
|
||||
failing scrapes with the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html):
|
||||
@@ -598,8 +632,8 @@ provide the following tools for debugging target-level and metric-level relabeli
|
||||
|
||||
- Target-level debugging (e.g. `relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs))
|
||||
can be performed by navigating to `http://vmagent:8429/targets` page (`http://victoriametrics:8428/targets` page for single-node VictoriaMetrics)
|
||||
and clicking the `debug` link at the target, which must be debugged.
|
||||
The opened page will show step-by-step results for the actual relabeling rules applied to the target labels.
|
||||
and clicking the `debug target relabeling` link at the target, which must be debugged.
|
||||
The opened page will show step-by-step results for the actual target relabeling rules applied to the discovered target labels.
|
||||
|
||||
The `http://vmagent:8429/targets` page shows only active targets. If you need to understand why some target
|
||||
is dropped during the relabeling, then navigate to `http://vmagent:8428/service-discovery` page
|
||||
@@ -608,11 +642,9 @@ provide the following tools for debugging target-level and metric-level relabeli
|
||||
which result to target drop.
|
||||
|
||||
- Metric-level debugging (e.g. `metric_relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs)
|
||||
and all the relabeling, which can be set up via `-relabelConfig`, `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig`
|
||||
command-line flags) can be performed by navigating to `http://vmagent:8429/metric-relabel-debug` page
|
||||
(`http://victoriametrics:8428/metric-relabel-debug` page for single-node VictoriaMetrics)
|
||||
and submitting there relabeling rules together with the metric to be relabeled.
|
||||
The page will show step-by-step results for the entered relabeling rules executed against the entered metric.
|
||||
can be performed by navigating to `http://vmagent:8429/targets` page (`http://victoriametrics:8428/targets` page for single-node VictoriaMetrics)
|
||||
and clicking the `debug metrics relabeling` link at the target, which must be debugged.
|
||||
The opened page will show step-by-step results for the actual metric relabeling rules applied to the given target labels.
|
||||
|
||||
## Prometheus staleness markers
|
||||
|
||||
@@ -757,7 +789,8 @@ scrape_configs:
|
||||
|
||||
## Cardinality limiter
|
||||
|
||||
By default `vmagent` doesn't limit the number of time series each scrape target can expose. The limit can be enforced in the following places:
|
||||
By default `vmagent` doesn't limit the number of time series each scrape target can expose.
|
||||
The limit can be enforced in the following places:
|
||||
|
||||
* Via `-promscrape.seriesLimitPerTarget` command-line option. This limit is applied individually
|
||||
to all the scrape targets defined in the file pointed by `-promscrape.config`.
|
||||
@@ -768,10 +801,7 @@ By default `vmagent` doesn't limit the number of time series each scrape target
|
||||
via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) for targets,
|
||||
which may expose too high number of time series.
|
||||
|
||||
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
|
||||
|
||||
Scraped metrics are dropped for time series exceeding the given limit.
|
||||
|
||||
Scraped metrics are dropped for time series exceeding the given limit on the time window of 24h.
|
||||
`vmagent` creates the following additional per-target metrics for targets with non-zero series limit:
|
||||
|
||||
- `scrape_series_limit_samples_dropped` - the number of dropped samples during the scrape when the unique series limit is exceeded.
|
||||
@@ -785,6 +815,7 @@ These metrics allow building the following alerting rules:
|
||||
- `scrape_series_current / scrape_series_limit > 0.9` - alerts when the number of series exposed by the target reaches 90% of the limit.
|
||||
- `sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0` - alerts when some samples are dropped because the series limit on a particular target is reached.
|
||||
|
||||
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
|
||||
|
||||
By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`.
|
||||
The limit can be enforced by setting the following command-line flags:
|
||||
@@ -816,7 +847,7 @@ See also [cardinality explorer docs](https://docs.victoriametrics.com/#cardinali
|
||||
We recommend setting up regular scraping of this page either through `vmagent` itself or by Prometheus
|
||||
so that the exported metrics may be analyzed later.
|
||||
|
||||
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview.
|
||||
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683-victoriametrics-vmagent/) for `vmagent` state overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add a review to the dashboard.
|
||||
|
||||
@@ -875,7 +906,7 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
The number of dropped blocks can be monitored via `vmagent_remotewrite_packets_dropped_total` metric exported at [/metrics page](#monitoring).
|
||||
|
||||
* Use `-remoteWrite.queues=1` when `-remoteWrite.url` points to remote storage, which doesn't accept out-of-order samples (aka data backfilling).
|
||||
Such storage systems include Prometheus, Cortex and Thanos, which typically emit `out of order sample` errors.
|
||||
Such storage systems include Prometheus, Mimir, Cortex and Thanos, which typically emit `out of order sample` errors.
|
||||
The best solution is to use remote storage with [backfilling support](https://docs.victoriametrics.com/#backfilling) such as VictoriaMetrics.
|
||||
|
||||
* `vmagent` buffers scraped data at the `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
@@ -1156,7 +1187,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-graphiteListenAddr string
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol
|
||||
-graphiteListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -graphiteListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-graphiteTrimTimestamp duration
|
||||
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-http.connTimeout duration
|
||||
@@ -1176,7 +1209,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr='' (default ":8429")
|
||||
TCP address to listen for http connections. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol (default ":8429")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
|
||||
@@ -1189,7 +1224,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-influxDBLabel string
|
||||
Default label for the DB name sent over '?db={db_name}' query parameter (default "db")
|
||||
-influxListenAddr string
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write
|
||||
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . See also -influxListenAddr.useProxyProtocol
|
||||
-influxListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -influxListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-influxMeasurementFieldSeparator string
|
||||
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
|
||||
-influxSkipMeasurement
|
||||
@@ -1199,7 +1236,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-influxTrimTimestamp duration
|
||||
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500)
|
||||
-kafka.consumer.topic array
|
||||
Kafka topic names for data consumption. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -1232,6 +1271,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -1241,7 +1282,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
@@ -1253,9 +1294,13 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
|
||||
-opentsdbHTTPListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbListenAddr string
|
||||
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
|
||||
TCP and UDP address to listen for OpenTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol
|
||||
-opentsdbListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-opentsdbTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-opentsdbhttp.maxInsertRequestSize size
|
||||
@@ -1321,6 +1366,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
How frequently to reload the full state from Kubernetes API server (default 30m0s)
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs for details (default 30s)
|
||||
-promscrape.kumaSDCheckInterval duration
|
||||
Interval for checking for changes in kuma service discovery. This works only if kuma_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#kuma_sd_configs for details (default 30s)
|
||||
-promscrape.maxDroppedTargets int
|
||||
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
|
||||
-promscrape.maxResponseHeadersSize size
|
||||
@@ -1334,6 +1381,10 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 1000000)
|
||||
-promscrape.noStaleMarkers
|
||||
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
|
||||
-promscrape.nomad.waitTime duration
|
||||
Wait time used by Nomad service discovery. Default value is used if not set
|
||||
-promscrape.nomadSDCheckInterval duration
|
||||
Interval for checking for changes in Nomad. This works only if nomad_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#nomad_sd_configs for details (default 30s)
|
||||
-promscrape.openstackSDCheckInterval duration
|
||||
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#openstack_sd_configs for details (default 30s)
|
||||
-promscrape.seriesLimitPerTarget int
|
||||
@@ -1397,6 +1448,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.flushInterval duration
|
||||
Interval for flushing the data to remote storage. This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url (default 1s)
|
||||
-remoteWrite.forcePromProto array
|
||||
Whether to force Prometheus remote write protocol for sending data to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.headers array
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -1443,7 +1497,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data is sent after temporary unavailability of the remote storage
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.relabelConfig string
|
||||
Optional path to file with relabel_config entries. The path can point either to local file or to http url. These entries are applied to all the metrics before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details
|
||||
Optional path to file with relabeling configs, which are applied to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent.html#relabeling
|
||||
-remoteWrite.roundDigits array
|
||||
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
@@ -1455,6 +1509,15 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-remoteWrite.significantFigures array
|
||||
The number of significant figures to leave in metric values before writing them to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.streamAggr.config array
|
||||
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html . See also -remoteWrite.streamAggr.keepInput and -remoteWrite.streamAggr.dedupInterval
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.streamAggr.dedupInterval array
|
||||
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.streamAggr.keepInput array
|
||||
Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config. By default the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. See https://docs.victoriametrics.com/stream-aggregation.html
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. By default system CA is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -1473,10 +1536,10 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-remoteWrite.tmpDataPath string
|
||||
Path to directory where temporary data for remote write component is stored. See also -remoteWrite.maxDiskUsagePerURL (default "vmagent-remotewrite-data")
|
||||
-remoteWrite.url array
|
||||
Remote storage URL to write data to. It must support Prometheus remote_write API. It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . Pass multiple -remoteWrite.url flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.multitenantURL
|
||||
Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. See also -remoteWrite.multitenantURL
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.urlRelabelConfig array
|
||||
Optional path to relabel config for the corresponding -remoteWrite.url. The path can point either to local file or to http url
|
||||
Optional path to relabel configs for the corresponding -remoteWrite.url. See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent.html#relabeling
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}Enabled sorting for labels can slow down ingestion performance a bit
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -26,10 +26,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
return stream.Parse(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -28,11 +28,9 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -20,9 +20,7 @@ var (
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
})
|
||||
return stream.Parse(r, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -37,10 +37,8 @@ var (
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
})
|
||||
return stream.Parse(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -52,15 +50,13 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, extraLabels)
|
||||
})
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return stream.Parse(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -38,23 +38,35 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write")
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . "+
|
||||
"See also -influxListenAddr.useProxyProtocol")
|
||||
influxUseProxyProtocol = flag.Bool("influxListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -influxListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. "+
|
||||
"See also -graphiteListenAddr.useProxyProtocol")
|
||||
graphiteUseProxyProtocol = flag.Bool("graphiteListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -graphiteListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpenTSDB metrics. "+
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
"Usually :4242 must be set. Doesn't work if empty")
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
|
||||
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
|
||||
"Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol")
|
||||
opentsdbUseProxyProtocol = flag.Bool("opentsdbListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. "+
|
||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . "+
|
||||
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag")
|
||||
)
|
||||
@@ -104,28 +116,27 @@ func main() {
|
||||
startTime := time.Now()
|
||||
remotewrite.Init()
|
||||
common.StartUnmarshalWorkers()
|
||||
writeconcurrencylimiter.Init()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, func(r io.Reader) error {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
})
|
||||
}
|
||||
if len(*graphiteListenAddr) > 0 {
|
||||
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, graphite.InsertHandler)
|
||||
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, *graphiteUseProxyProtocol, graphite.InsertHandler)
|
||||
}
|
||||
if len(*opentsdbListenAddr) > 0 {
|
||||
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
|
||||
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, opentsdb.InsertHandler, httpInsertHandler)
|
||||
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, *opentsdbUseProxyProtocol, opentsdb.InsertHandler, httpInsertHandler)
|
||||
}
|
||||
if len(*opentsdbHTTPListenAddr) > 0 {
|
||||
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, httpInsertHandler)
|
||||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(remotewrite.Push)
|
||||
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
}
|
||||
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
@@ -197,7 +208,7 @@ func getAuthTokenFromPath(path string) (*auth.Token, error) {
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != "GET" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
@@ -225,7 +236,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
statusCode := http.StatusNoContent
|
||||
if strings.HasPrefix(path, "/prometheus/api/v1/import/prometheus/metrics/job/") ||
|
||||
strings.HasPrefix(path, "/api/v1/import/prometheus/metrics/job/") {
|
||||
// Return 200 status code for pushgateway requests.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
|
||||
statusCode = http.StatusOK
|
||||
}
|
||||
w.WriteHeader(statusCode)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "datadog/") {
|
||||
@@ -235,6 +253,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
switch path {
|
||||
case "/prometheus/api/v1/write", "/api/v1/write":
|
||||
if common.HandleVMProtoServerHandshake(w, r) {
|
||||
return true
|
||||
}
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(nil, r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
@@ -349,12 +370,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return true
|
||||
case "/prometheus/config", "/config":
|
||||
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeConfigRequests.Inc()
|
||||
@@ -363,12 +379,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/prometheus/api/v1/status/config", "/api/v1/status/config":
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#config
|
||||
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
return true
|
||||
}
|
||||
promscrapeStatusConfigRequests.Inc()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -10,9 +10,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -31,14 +30,12 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
return err
|
||||
}
|
||||
isGzip := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
|
||||
return insertRows(at, block, extraLabels)
|
||||
})
|
||||
return stream.Parse(req.Body, isGzip, func(block *stream.Block) error {
|
||||
return insertRows(at, block, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, block *parser.Block, extraLabels []prompbmarshal.Label) error {
|
||||
func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompbmarshal.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -20,9 +20,7 @@ var (
|
||||
//
|
||||
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
})
|
||||
return stream.Parse(r, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp/stream"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -25,10 +25,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
return stream.Parse(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
package prometheusimport
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -31,20 +31,11 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, nil)
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader with optional gzip format
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, 0, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
}, nil)
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return stream.Parse(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, func(s string) {
|
||||
httpserver.LogError(req, s)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
60
app/vmagent/prometheusimport/request_handler_test.go
Normal file
60
app/vmagent/prometheusimport/request_handler_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package prometheusimport
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
)
|
||||
|
||||
var (
|
||||
srv *httptest.Server
|
||||
testOutput *bytes.Buffer
|
||||
)
|
||||
|
||||
func TestInsertHandler(t *testing.T) {
|
||||
setUp()
|
||||
defer tearDown()
|
||||
req := httptest.NewRequest(http.MethodPost, "/insert/0/api/v1/import/prometheus", bytes.NewBufferString(`{"foo":"bar"}
|
||||
go_memstats_alloc_bytes_total 1`))
|
||||
if err := InsertHandler(nil, req); err != nil {
|
||||
t.Errorf("unxepected error %s", err)
|
||||
}
|
||||
expectedMsg := "cannot unmarshal Prometheus line"
|
||||
if !strings.Contains(testOutput.String(), expectedMsg) {
|
||||
t.Errorf("output %q should contain %q", testOutput.String(), expectedMsg)
|
||||
}
|
||||
}
|
||||
|
||||
func setUp() {
|
||||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(204)
|
||||
}))
|
||||
flag.Parse()
|
||||
remoteWriteFlag := "remoteWrite.url"
|
||||
if err := flag.Lookup(remoteWriteFlag).Value.Set(srv.URL); err != nil {
|
||||
log.Fatalf("unable to set %q with value %q, err: %v", remoteWriteFlag, srv.URL, err)
|
||||
}
|
||||
logger.Init()
|
||||
common.StartUnmarshalWorkers()
|
||||
remotewrite.Init()
|
||||
testOutput = &bytes.Buffer{}
|
||||
logger.SetOutputForTests(testOutput)
|
||||
}
|
||||
|
||||
func tearDown() {
|
||||
common.StopUnmarshalWorkers()
|
||||
srv.Close()
|
||||
logger.ResetOutputForTest()
|
||||
tmpDataDir := flag.Lookup("remoteWrite.tmpDataPath").Value.String()
|
||||
fs.MustRemoveAll(tmpDataDir)
|
||||
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
package promremotewrite
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
@@ -11,9 +10,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -29,19 +27,9 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, nil)
|
||||
})
|
||||
isVMRemoteWrite := req.Header.Get("Content-Encoding") == "zstd"
|
||||
return stream.Parse(req.Body, isVMRemoteWrite, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -67,10 +67,11 @@ var (
|
||||
)
|
||||
|
||||
type client struct {
|
||||
sanitizedURL string
|
||||
remoteWriteURL string
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
sanitizedURL string
|
||||
remoteWriteURL string
|
||||
isVMRemoteWrite bool
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
sendBlock func(block []byte) bool
|
||||
authCfg *promauth.Config
|
||||
@@ -92,7 +93,7 @@ type client struct {
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int, isVMRemoteWrite bool) *client {
|
||||
authCfg, err := getAuthConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
@@ -122,17 +123,19 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
|
||||
}
|
||||
tr.Proxy = http.ProxyURL(pu)
|
||||
}
|
||||
hc := &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
|
||||
}
|
||||
c := &client{
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
authCfg: authCfg,
|
||||
awsCfg: awsCfg,
|
||||
fq: fq,
|
||||
hc: &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
|
||||
},
|
||||
stopCh: make(chan struct{}),
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
isVMRemoteWrite: isVMRemoteWrite,
|
||||
authCfg: authCfg,
|
||||
awsCfg: awsCfg,
|
||||
fq: fq,
|
||||
hc: hc,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.sendBlock = c.sendBlockHTTP
|
||||
return c
|
||||
@@ -291,7 +294,9 @@ func (c *client) runWorker() {
|
||||
}
|
||||
}
|
||||
|
||||
// sendBlockHTTP returns false only if c.stopCh is closed.
|
||||
// sendBlockHTTP sends the given block to c.remoteWriteURL.
|
||||
//
|
||||
// The function returns false only if c.stopCh is closed.
|
||||
// Otherwise it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.register(len(block), c.stopCh)
|
||||
@@ -305,7 +310,7 @@ func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
}
|
||||
|
||||
again:
|
||||
req, err := http.NewRequest("POST", c.remoteWriteURL, bytes.NewBuffer(block))
|
||||
req, err := http.NewRequest(http.MethodPost, c.remoteWriteURL, bytes.NewBuffer(block))
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.sanitizedURL, err)
|
||||
}
|
||||
@@ -313,8 +318,13 @@ again:
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vmagent")
|
||||
h.Set("Content-Type", "application/x-protobuf")
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
if c.isVMRemoteWrite {
|
||||
h.Set("Content-Encoding", "zstd")
|
||||
h.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
|
||||
} else {
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
}
|
||||
if c.awsCfg != nil {
|
||||
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
|
||||
// there is no need in retry, request will be rejected by client.Do and retried by code below
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -33,9 +34,10 @@ type pendingSeries struct {
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(pushBlock func(block []byte), significantFigures, roundDigits int) *pendingSeries {
|
||||
func newPendingSeries(pushBlock func(block []byte), isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.isVMRemoteWrite = isVMRemoteWrite
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
ps.stopCh = make(chan struct{})
|
||||
@@ -88,6 +90,9 @@ type writeRequest struct {
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite bool
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
significantFigures int
|
||||
|
||||
@@ -104,7 +109,7 @@ type writeRequest struct {
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset pushBlock, significantFigures and roundDigits, since they are re-used.
|
||||
// Do not reset lastFlushTime, pushBlock, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
@@ -126,7 +131,7 @@ func (wr *writeRequest) flush() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock)
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock, wr.isVMRemoteWrite)
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
@@ -188,7 +193,7 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
wr.buf = buf
|
||||
}
|
||||
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte)) {
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte), isVMRemoteWrite bool) {
|
||||
if len(wr.Timeseries) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
@@ -197,7 +202,11 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
|
||||
zb := snappyBufPool.Get()
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
if isVMRemoteWrite {
|
||||
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, 0)
|
||||
} else {
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
}
|
||||
writeRequestBufPool.Put(bb)
|
||||
if len(zb.B) <= persistentqueue.MaxBlockSize {
|
||||
pushBlock(zb.B)
|
||||
@@ -221,18 +230,18 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
}
|
||||
n := len(samples) / 2
|
||||
wr.Timeseries[0].Samples = samples[:n]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries[0].Samples = samples[n:]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return
|
||||
}
|
||||
timeseries := wr.Timeseries
|
||||
n := len(timeseries) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries = timeseries[n:]
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
wr.Timeseries = timeseries
|
||||
}
|
||||
|
||||
|
||||
@@ -2,40 +2,48 @@ package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func TestPushWriteRequest(t *testing.T) {
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
rowsCounts := []int{1, 10, 100, 1e3, 1e4}
|
||||
expectedBlockLensProm := []int{216, 1848, 16424, 169882, 1757876}
|
||||
expectedBlockLensVM := []int{138, 492, 3927, 34995, 288476}
|
||||
for i, rowsCount := range rowsCounts {
|
||||
expectedBlockLenProm := expectedBlockLensProm[i]
|
||||
expectedBlockLenVM := expectedBlockLensVM[i]
|
||||
t.Run(fmt.Sprintf("%d", rowsCount), func(t *testing.T) {
|
||||
testPushWriteRequest(t, rowsCount)
|
||||
testPushWriteRequest(t, rowsCount, expectedBlockLenProm, expectedBlockLenVM)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func testPushWriteRequest(t *testing.T, rowsCount int) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expectedBlockLenVM int) {
|
||||
f := func(isVMRemoteWrite bool, expectedBlockLen int, tolerancePrc float64) {
|
||||
t.Helper()
|
||||
wr := newTestWriteRequest(rowsCount, 20)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
|
||||
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
|
||||
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
b := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
zb := snappy.Encode(nil, b)
|
||||
maxPushBlockLen := len(zb)
|
||||
minPushBlockLen := maxPushBlockLen / 2
|
||||
if pushBlockLen < minPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be at least %d bytes", pushBlockLen, minPushBlockLen)
|
||||
}
|
||||
if pushBlockLen > maxPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be smaller or equal to %d bytes", pushBlockLen, maxPushBlockLen)
|
||||
}
|
||||
|
||||
// Check Prometheus remote write
|
||||
f(false, expectedBlockLenProm, 0)
|
||||
|
||||
// Check VictoriaMetrics remote write
|
||||
f(true, expectedBlockLenVM, 15)
|
||||
}
|
||||
|
||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompbmarshal.WriteRequest {
|
||||
|
||||
@@ -15,11 +15,13 @@ import (
|
||||
var (
|
||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. "+
|
||||
"The path can point either to local file or to http url. These entries are applied to all the metrics "+
|
||||
"before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details")
|
||||
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url. "+
|
||||
"The path can point either to local file or to http url")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||
"The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel configs for the corresponding -remoteWrite.url. "+
|
||||
"See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
|
||||
usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+
|
||||
"in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+
|
||||
@@ -86,7 +88,7 @@ func initLabelsGlobal() {
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 && !*usePromCompatibleNaming {
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
|
||||
49
app/vmagent/remotewrite/relabel_test.go
Normal file
49
app/vmagent/remotewrite/relabel_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestApplyRelabeling(t *testing.T) {
|
||||
f := func(extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
|
||||
rctx := &relabelCtx{}
|
||||
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
|
||||
gotTss := rctx.applyRelabeling(tss, extraLabels, pcs)
|
||||
if !reflect.DeepEqual(gotTss, expTss) {
|
||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, gotTss)
|
||||
}
|
||||
}
|
||||
|
||||
f(nil, nil, "up", "up")
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, "up", `up{foo="bar"}`)
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, `up{foo="baz"}`, `up{foo="bar"}`)
|
||||
|
||||
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
- action: labeldrop
|
||||
regex: "env.*"
|
||||
`))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
f(nil, pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
|
||||
|
||||
oldVal := *usePromCompatibleNaming
|
||||
*usePromCompatibleNaming = true
|
||||
f(nil, nil, `foo.bar`, `foo_bar`)
|
||||
*usePromCompatibleNaming = oldVal
|
||||
}
|
||||
|
||||
func parseSeries(data string) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
tss = append(tss, prompbmarshal.TimeSeries{
|
||||
Labels: promutils.MustNewLabelsFromString(data).GetLabels(),
|
||||
})
|
||||
return tss
|
||||
}
|
||||
@@ -21,18 +21,22 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support Prometheus remote_write API. "+
|
||||
"It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.multitenantURL")
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
|
||||
"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. See also -remoteWrite.multitenantURL")
|
||||
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
forcePromProto = flagutil.NewArrayBool("remoteWrite.forcePromProto", "Whether to force Prometheus remote write protocol for sending data "+
|
||||
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
@@ -58,6 +62,15 @@ var (
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
|
||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html . "+
|
||||
"See also -remoteWrite.streamAggr.keepInput and -remoteWrite.streamAggr.dedupInterval")
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config. "+
|
||||
"By default the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", "Input samples are de-duplicated with this interval before being aggregated. "+
|
||||
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -140,6 +153,7 @@ func Init() {
|
||||
logger.Fatalf("cannot load relabel configs: %s", err)
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
|
||||
@@ -435,9 +449,13 @@ var (
|
||||
)
|
||||
|
||||
type remoteWriteCtx struct {
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
|
||||
sas *streamaggr.Aggregators
|
||||
streamAggrKeepInput bool
|
||||
|
||||
pss []*pendingSeries
|
||||
pssNextIdx uint64
|
||||
|
||||
@@ -460,15 +478,28 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
|
||||
// Auto-detect whether the remote storage supports VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite := false
|
||||
usePromProto := forcePromProto.GetOptionalArg(argIdx)
|
||||
if !usePromProto {
|
||||
isVMRemoteWrite = common.HandleVMProtoClientHandshake(remoteWriteURL)
|
||||
if !isVMRemoteWrite {
|
||||
logger.Infof("the remote storage at %q doesn't support VictoriaMetrics remote write protocol. Switching to Prometheus remote write protocol. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol", sanitizedURL)
|
||||
}
|
||||
}
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
case "http", "https":
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues, isVMRemoteWrite)
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||
}
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
|
||||
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
|
||||
pssLen := *queues
|
||||
@@ -479,9 +510,10 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
}
|
||||
pss := make([]*pendingSeries, pssLen)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, sf, rd)
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, isVMRemoteWrite, sf, rd)
|
||||
}
|
||||
return &remoteWriteCtx{
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
@@ -490,6 +522,20 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
}
|
||||
|
||||
// Initialize sas
|
||||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(argIdx, 0)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggrFile=%q: %s", sasFile, err)
|
||||
}
|
||||
rwctx.sas = sas
|
||||
rwctx.streamAggrKeepInput = streamAggrKeepInput.GetOptionalArg(argIdx)
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) MustStop() {
|
||||
@@ -501,6 +547,8 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.fq.UnblockAllReaders()
|
||||
rwctx.c.MustStop()
|
||||
rwctx.c = nil
|
||||
rwctx.sas.MustStop()
|
||||
rwctx.sas = nil
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
|
||||
@@ -509,6 +557,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
@@ -526,11 +575,17 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
rowsCountAfterRelabel := getRowsCount(tss)
|
||||
rwctx.rowsDroppedByRelabel.Add(rowsCountBeforeRelabel - rowsCountAfterRelabel)
|
||||
}
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
rowsCount := getRowsCount(tss)
|
||||
rwctx.rowsPushedAfterRelabel.Add(rowsCount)
|
||||
pss[idx].Push(tss)
|
||||
|
||||
// Apply stream aggregation if any
|
||||
rwctx.sas.Push(tss)
|
||||
if rwctx.sas == nil || rwctx.streamAggrKeepInput {
|
||||
// Push samples to the remote storage
|
||||
rwctx.pushInternal(tss)
|
||||
}
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssRelabelPool.Put(v)
|
||||
@@ -538,6 +593,12 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
}
|
||||
|
||||
var tssRelabelPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
a := []prompbmarshal.TimeSeries{}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package vmimport
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
@@ -12,8 +11,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -31,20 +30,9 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
})
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return stream.Parse(req.Body, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,13 @@ protocol and require `-remoteWrite.url` to be configured.
|
||||
Vmalert is heavily inspired by [Prometheus](https://prometheus.io/docs/alerting/latest/overview/)
|
||||
implementation and aims to be compatible with its syntax.
|
||||
|
||||
A [single-node](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmalert)
|
||||
or [cluster version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmalert)
|
||||
of VictoriaMetrics are capable of proxying requests to vmalert via `-vmalert.proxyURL` command-line flag.
|
||||
Use this feature for the following cases:
|
||||
* for proxying requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
|
||||
* for accessing vmalert's UI through VictoriaMetrics Web interface.
|
||||
|
||||
## Features
|
||||
|
||||
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
|
||||
@@ -21,7 +28,8 @@ implementation and aims to be compatible with its syntax.
|
||||
* Graphite datasource can be used for alerting and recording rules. See [these docs](#graphite);
|
||||
* Recording and Alerting rules backfilling (aka `replay`). See [these docs](#rules-backfilling);
|
||||
* Lightweight and without extra dependencies.
|
||||
* Supports [reusable templates](#reusable-templates) for annotations.
|
||||
* Supports [reusable templates](#reusable-templates) for annotations;
|
||||
* Load of recording and alerting rules from local filesystem, GCS and S3.
|
||||
|
||||
## Limitations
|
||||
|
||||
@@ -69,16 +77,17 @@ Then configure `vmalert` accordingly:
|
||||
-external.label=replica=a # Multiple external labels may be set
|
||||
```
|
||||
|
||||
Note there's a separate `remoteWrite.url` to allow writing results of
|
||||
Note there's a separate `-remoteWrite.url` command-line flag to allow writing results of
|
||||
alerting/recording rules into a different storage than the initial data that's
|
||||
queried. This allows using `vmalert` to aggregate data from a short-term,
|
||||
high-frequency, high-cardinality storage into a long-term storage with
|
||||
decreased cardinality and a bigger interval between samples.
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
|
||||
See the full list of configuration flags in [configuration](#configuration) section.
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
to specify different `-external.label` command-line flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
@@ -191,6 +200,11 @@ expr: <string>
|
||||
# Is applicable to alerting rules only.
|
||||
[ debug: <bool> | default = false ]
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
|
||||
# Labels to add or overwrite for each alert.
|
||||
labels:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
@@ -319,6 +333,12 @@ expr: <string>
|
||||
# Labels to add or overwrite before storing the result.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
```
|
||||
|
||||
For recording rules to work `-remoteWrite.url` must be specified.
|
||||
@@ -385,6 +405,25 @@ The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz`
|
||||
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) and in `*-enterprise`
|
||||
tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags).
|
||||
|
||||
### Reading rules from object storage
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/enterprise.html) of `vmalert` may read alerting and recording rules
|
||||
from object storage:
|
||||
|
||||
- `./bin/vmalert -rule=s3://bucket/dir/alert.rules` would read rules from the given path at S3 bucket
|
||||
- `./bin/vmalert -rule=gs://bucket/bir/alert.rules` would read rules from the given path at GCS bucket
|
||||
|
||||
S3 and GCS paths support only matching by prefix, e.g. `s3://bucket/dir/rule_` matches
|
||||
all files with prefix `rule_` in the folder `dir`.
|
||||
|
||||
The following [command-line flags](#flags) can be used for fine-tuning access to S3 and GCS:
|
||||
|
||||
- `-s3.credsFilePath` - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
- `-s3.configFilePath` - path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
- `-s3.configProfile` - profile name for S3 configs. If no set, the value of the environment variable will be loaded (`AWS_PROFILE` or `AWS_DEFAULT_PROFILE`).
|
||||
- `-s3.customEndpoint` - custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set.
|
||||
- `-s3.forcePathStyle` - prefixing endpoint with bucket name when set false, true by default.
|
||||
|
||||
### Topology examples
|
||||
|
||||
The following sections are showing how `vmalert` may be used and configured
|
||||
@@ -469,7 +508,8 @@ Alertmanager will automatically deduplicate alerts with identical labels, so ens
|
||||
all `vmalert`s are having the same config.
|
||||
|
||||
Don't forget to configure [cluster mode](https://prometheus.io/docs/alerting/latest/alertmanager/)
|
||||
for Alertmanagers for better reliability.
|
||||
for Alertmanagers for better reliability. List all Alertmanager URLs in vmalert's `-notifier.url`
|
||||
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
|
||||
|
||||
This example uses single-node VM server for the sake of simplicity.
|
||||
Check how to replace it with [cluster VictoriaMetrics](#cluster-victoriametrics) if needed.
|
||||
@@ -502,8 +542,8 @@ groups:
|
||||
expr: avg_over_time(http_requests[5m])
|
||||
```
|
||||
|
||||
Ability of `vmalert` to be configured with different `datasource.url` and `remoteWrite.url` allows
|
||||
reading data from one data source and backfilling results to another. This helps to build a system
|
||||
Ability of `vmalert` to be configured with different `-datasource.url` and `-remoteWrite.url` command-line flags
|
||||
allows reading data from one data source and backfilling results to another. This helps to build a system
|
||||
for aggregating and downsampling the data.
|
||||
|
||||
The following example shows how to build a topology where `vmalert` will process data from one cluster
|
||||
@@ -527,7 +567,7 @@ Please note, [replay](#rules-backfilling) feature may be used for transforming h
|
||||
|
||||
Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only recording rules are used.
|
||||
|
||||
See also [downsampling docs](https://docs.victoriametrics.com/#downsampling).
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) and [downsampling](https://docs.victoriametrics.com/#downsampling).
|
||||
|
||||
#### Multiple remote writes
|
||||
|
||||
@@ -581,6 +621,8 @@ can read the same rules configuration as normal, evaluate them on the given time
|
||||
results via remote write to the configured storage. vmalert supports any PromQL/MetricsQL compatible
|
||||
data source for backfilling.
|
||||
|
||||
See a blogpost about [Rules backfilling via vmalert](https://victoriametrics.com/blog/rules-replay/).
|
||||
|
||||
### How it works
|
||||
|
||||
In `replay` mode vmalert works as a cli-tool and exits immediately after work is done.
|
||||
@@ -678,38 +720,49 @@ The default list of alerting rules for these metric can be found [here](https://
|
||||
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus so that the exported
|
||||
metrics may be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview.
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950-victoriametrics-vmalert/) for `vmalert` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
vmalert executes configured rules within certain intervals. It is expected that at the moment when rule is executed,
|
||||
the data is already present in configured `-datasource.url`:
|
||||
### Data delay
|
||||
|
||||
Data delay is one of the most common issues with rules execution.
|
||||
vmalert executes configured rules within certain intervals at specifics timestamps.
|
||||
It expects that the data is already present in configured `-datasource.url` at the moment of time when rule is executed:
|
||||
|
||||
<img alt="vmalert expected evaluation" src="vmalert_ts_normal.gif">
|
||||
|
||||
Usually, troubles start to appear when data in `-datasource.url` is delayed or absent. In such cases, evaluations
|
||||
may get empty response from datasource and produce empty recording rules or reset alerts state:
|
||||
may get empty response from the datasource and produce empty recording rules or reset alerts state:
|
||||
|
||||
<img alt="vmalert evaluation when data is delayed" src="vmalert_ts_data_delay.gif">
|
||||
|
||||
By default recently written samples to VictoriaMetrics aren't visible for queries for up to 30s.
|
||||
This behavior is controlled by `-search.latencyOffset` command-line flag and the `latency_offset` query ag at `vmselect`.
|
||||
Usually, this results into a 30s shift for recording rules results.
|
||||
Note that too small value passed to `-search.latencyOffset` or to `latency_offest` query arg may lead to incomplete query results.
|
||||
Try the following recommendations to reduce the chance of hitting the data delay issue:
|
||||
|
||||
Try the following recommendations in such cases:
|
||||
|
||||
* Always configure group's `evaluationInterval` to be bigger or equal to `scrape_interval` at which metrics
|
||||
are delivered to the datasource;
|
||||
* Always configure group's `evaluationInterval` to be bigger or at least equal to
|
||||
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution);
|
||||
* Ensure that `[duration]` value is at least twice bigger than
|
||||
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution). For example,
|
||||
if expression is `rate(my_metric[2m]) > 0` then ensure that `my_metric` resolution is at least `1m` or better `30s`.
|
||||
If you use VictoriaMetrics as datasource, `[duration]` can be omitted and VictoriaMetrics will adjust it automatically.
|
||||
* If you know in advance, that data in datasource is delayed - try changing vmalert's `-datasource.lookback`
|
||||
command-line flag to add a time shift for evaluations;
|
||||
* If time intervals between datapoints in datasource are irregular or `>=5min` - try changing vmalert's
|
||||
`-datasource.queryStep` command-line flag to specify how far search query can lookback for the recent datapoint.
|
||||
The recommendation is to have the step at least two times bigger than `scrape_interval`, since
|
||||
there are no guarantees that scrape will not fail.
|
||||
command-line flag to add a time shift for evaluations. Or extend `[duration]` to tolerate the delay.
|
||||
For example, `max_over_time(errors_total[10m]) > 0` will be active even if there is no data in datasource for last `9m`.
|
||||
* If [time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution)
|
||||
in datasource is inconsistent or `>=5min` - try changing vmalert's `-datasource.queryStep` command-line flag to specify
|
||||
how far search query can lookback for the recent datapoint. The recommendation is to have the step
|
||||
at least two times bigger than the resolution.
|
||||
|
||||
> Please note, data delay is inevitable in distributed systems. And it is better to account for it instead of ignoring.
|
||||
|
||||
By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s
|
||||
(see `-search.latencyOffset` command-line flag at vmselect). Such delay is needed to eliminate risk of incomplete
|
||||
data on the moment of querying, since metrics collectors won't be able to deliver the data in time.
|
||||
|
||||
### Alerts state
|
||||
|
||||
Sometimes, it is not clear why some specific alert fired or didn't fire. It is very important to remember, that
|
||||
alerts with `for: 0` fire immediately when their expression becomes true. And alerts with `for > 0` will fire only
|
||||
@@ -721,8 +774,9 @@ If `-remoteWrite.url` command-line flag is configured, vmalert will persist aler
|
||||
[vmui](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmui) or Grafana to track how alerts state
|
||||
changed in time.
|
||||
|
||||
vmalert also stores last N state updates for each rule. To check updates, click on `Details` link next to rule's name
|
||||
on `/vmalert/groups` page and check the `Last updates` section:
|
||||
vmalert stores last `-rule.updateEntriesLimit` (or `update_entries_limit` [per-rule config](https://docs.victoriametrics.com/vmalert.html#alerting-rules))
|
||||
state updates for each rule. To check updates, click on `Details` link next to rule's name on `/vmalert/groups` page
|
||||
and check the `Last updates` section:
|
||||
|
||||
<img alt="vmalert state" src="vmalert_state.png">
|
||||
|
||||
@@ -731,7 +785,9 @@ HTTP request sent by vmalert to the `-datasource.url` during evaluation. If spec
|
||||
no samples returned and curl command returns data - then it is very likely there was no data in datasource on the
|
||||
moment when rule was evaluated.
|
||||
|
||||
vmalert also alows configuring more detailed logging for specific rule. Just set `debug: true` in rule's configuration
|
||||
### Debug mode
|
||||
|
||||
vmalert allows configuring more detailed logging for specific alerting rule. Just set `debug: true` in rule's configuration
|
||||
and vmalert will start printing additional log messages:
|
||||
```terminal
|
||||
2022-09-15T13:35:41.155Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:41+02:00: query returned 0 samples (elapsed: 5.896041ms)
|
||||
@@ -883,13 +939,21 @@ The shortlist of configuration flags is the following:
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8880")
|
||||
Address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8880")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500)
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -898,6 +962,8 @@ The shortlist of configuration flags is the following:
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
@@ -955,7 +1021,7 @@ The shortlist of configuration flags is the following:
|
||||
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-notifier.url array
|
||||
Prometheus alertmanager URL, e.g. http://127.0.0.1:9093
|
||||
Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
@@ -992,7 +1058,7 @@ The shortlist of configuration flags is the following:
|
||||
-remoteRead.headers string
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteRead.url. For example, -remoteRead.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteRead.url. Multiple headers must be delimited by '^^': -remoteRead.headers='header1:value1^^header2:value2'
|
||||
-remoteRead.ignoreRestoreErrors
|
||||
Whether to ignore errors from remote storage when restoring alerts state on startup. (default true)
|
||||
Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases. (default true)
|
||||
-remoteRead.lookback duration
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteRead.oauth2.clientID string
|
||||
@@ -1069,8 +1135,8 @@ The shortlist of configuration flags is the following:
|
||||
Optional URL to VictoriaMetrics or vminsert where to persist alerts state and recording rules results in form of timeseries. For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend, '-remoteWrite.showURL'.
|
||||
-replay.disableProgressBar
|
||||
Whether to disable rendering progress bars during the replay. Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.
|
||||
-replay.maxDatapointsPerQuery int
|
||||
Max number of data points expected in one request. The higher the value, the less requests will be made during replay. (default 1000)
|
||||
-replay.maxDatapointsPerQuery /query_range
|
||||
Max number of data points expected in one request. It affects the max time range for every /query_range request during the replay. The higher the value, the less requests will be made during replay. (default 1000)
|
||||
-replay.ruleRetryAttempts int
|
||||
Defines how many retries to make before giving up on rule if request for it returns an error. (default 5)
|
||||
-replay.rulesDelay duration
|
||||
@@ -1080,18 +1146,24 @@ The shortlist of configuration flags is the following:
|
||||
-replay.timeTo string
|
||||
The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'
|
||||
-rule array
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Path to the files with alerting and/or recording rules.
|
||||
Supports hierarchical patterns and regexpes.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
-rule="dir/*.yaml" -rule="/*.yaml" -rule="gcs://vmalert-rules/tenant_%{TENANT_ID}/prod".
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
|
||||
|
||||
Enterprise version of vmalert supports S3 and GCS paths to rules.
|
||||
For example: gs://bucket/path/to/rules, s3://bucket/path/to/rules
|
||||
S3 and GCS paths support only matching by prefix, e.g. s3://bucket/dir/rule_ matches
|
||||
all files with prefix rule_ in folder dir.
|
||||
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
|
||||
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-rule.configCheckInterval duration
|
||||
Interval for checking for changes in '-rule' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead
|
||||
-rule.maxResolveDuration duration
|
||||
Limits the maximum duration for automatic alert expiration, which is by default equal to 3 evaluation intervals of the parent group.
|
||||
Limits the maximum duration for automatic alert expiration, which by default is 4 times evaluationInterval of the parent group.
|
||||
-rule.resendDelay duration
|
||||
Minimum amount of time to wait before resending an alert to notifier
|
||||
-rule.templates array
|
||||
@@ -1102,10 +1174,24 @@ The shortlist of configuration flags is the following:
|
||||
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
|
||||
absolute path to all .tpl files in root.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-rule.updateEntriesLimit int
|
||||
Defines the max number of rule's state updates stored in-memory. Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overriden per rule via update_entries_limit param. (default 20)
|
||||
-rule.validateExpressions
|
||||
Whether to validate rules expressions via MetricsQL engine (default true)
|
||||
-rule.validateTemplates
|
||||
Whether to validate annotation and label templates (default true)
|
||||
-s3.configFilePath string
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.configProfile string
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.customEndpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
-s3.forcePathStyle
|
||||
Prefixing endpoint with bucket name when set false, true by default. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default true)
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
@@ -1185,6 +1271,8 @@ dns_sd_configs:
|
||||
```
|
||||
|
||||
The list of configured or discovered Notifiers can be explored via [UI](#Web).
|
||||
If Alertmanager runs in cluster mode then all its URLs needs to be available during discovery
|
||||
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
|
||||
|
||||
The configuration file [specification](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/config.go)
|
||||
is the following:
|
||||
|
||||
@@ -74,10 +74,15 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
Debug: cfg.Debug,
|
||||
}),
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
metrics: &alertingRuleMetrics{},
|
||||
}
|
||||
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
ar.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
@@ -416,7 +421,9 @@ func (ar *AlertingRule) UpdateWith(r Rule) error {
|
||||
ar.Labels = nr.Labels
|
||||
ar.Annotations = nr.Annotations
|
||||
ar.EvalInterval = nr.EvalInterval
|
||||
ar.Debug = nr.Debug
|
||||
ar.q = nr.q
|
||||
ar.state = nr.state
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -491,7 +498,9 @@ func (ar *AlertingRule) ToAPI() APIRule {
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
MaxUpdates: ar.state.size(),
|
||||
Updates: ar.state.getAll(),
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
@@ -598,54 +607,59 @@ func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.Time
|
||||
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
|
||||
}
|
||||
|
||||
// Restore restores the state of active alerts basing on previously written time series.
|
||||
// Restore restores only ActiveAt field. Field State will be always Pending and supposed
|
||||
// to be updated on next Exec, as well as Value field.
|
||||
// Only rules with For > 0 will be restored.
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration, labels map[string]string) error {
|
||||
if q == nil {
|
||||
return fmt.Errorf("querier is nil")
|
||||
// Restore restores the value of ActiveAt field for active alerts,
|
||||
// based on previously written time series `alertForStateMetricName`.
|
||||
// Only rules with For > 0 can be restored.
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||
if ar.For < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ts := time.Now()
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res, err
|
||||
ar.alertsMu.Lock()
|
||||
defer ar.alertsMu.Unlock()
|
||||
|
||||
if len(ar.alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// account for external labels in filter
|
||||
var labelsFilter string
|
||||
for k, v := range labels {
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q%s}[%ds])",
|
||||
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
|
||||
qMetrics, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range qMetrics {
|
||||
ls := &labelSet{
|
||||
origin: make(map[string]string, len(m.Labels)),
|
||||
processed: make(map[string]string, len(m.Labels)),
|
||||
for _, a := range ar.alerts {
|
||||
if a.Restored || a.State != notifier.StatePending {
|
||||
continue
|
||||
}
|
||||
for _, l := range m.Labels {
|
||||
if l.Name == "__name__" {
|
||||
continue
|
||||
}
|
||||
ls.origin[l.Name] = l.Value
|
||||
ls.processed[l.Name] = l.Value
|
||||
|
||||
var labelsFilter []string
|
||||
for k, v := range a.Labels {
|
||||
labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
|
||||
}
|
||||
a, err := ar.newAlert(m, ls, time.Unix(int64(m.Values[0]), 0), qFn)
|
||||
sort.Strings(labelsFilter)
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
|
||||
alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
|
||||
|
||||
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
|
||||
|
||||
qMetrics, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create alert: %w", err)
|
||||
return err
|
||||
}
|
||||
a.ID = hash(ls.processed)
|
||||
a.State = notifier.StatePending
|
||||
|
||||
if len(qMetrics) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
continue
|
||||
}
|
||||
|
||||
// only one series expected in response
|
||||
m := qMetrics[0]
|
||||
// __name__ supposed to be alertForStateMetricName
|
||||
m.DelLabel("__name__")
|
||||
|
||||
// we assume that restore query contains all label matchers,
|
||||
// so all received labels will match anyway if their number is equal.
|
||||
if len(m.Labels) != len(a.Labels) {
|
||||
ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
|
||||
a.Restored = true
|
||||
ar.alerts[a.ID] = a
|
||||
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -6,12 +6,15 @@ import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestAlertingRule_ToTimeSeries(t *testing.T) {
|
||||
@@ -502,118 +505,156 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_Restore(t *testing.T) {
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
metrics []datasource.Metric
|
||||
expAlerts map[uint64]*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestRuleWithLabels("no extra labels"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(nil): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("metric labels"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
alertNameLabel, "metric labels",
|
||||
alertGroupNameLabel, "groupID",
|
||||
"foo", "bar",
|
||||
"namespace", "baz",
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "metric labels",
|
||||
alertGroupNameLabel: "groupID",
|
||||
"foo": "bar",
|
||||
"namespace": "baz",
|
||||
}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("rule labels", "source", "vm"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"foo", "bar",
|
||||
"namespace", "baz",
|
||||
// extra labels set by rule
|
||||
"source", "vm",
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
"foo": "bar",
|
||||
"namespace": "baz",
|
||||
"source": "vm",
|
||||
}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("multiple alerts"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"host", "localhost-1",
|
||||
),
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(2*time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"host", "localhost-2",
|
||||
),
|
||||
metricWithValueAndLabels(t, float64(time.Now().Truncate(3*time.Hour).Unix()),
|
||||
"__name__", alertForStateMetricName,
|
||||
"host", "localhost-3",
|
||||
),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{"host": "localhost-1"}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(time.Hour)},
|
||||
hash(map[string]string{"host": "localhost-2"}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(2 * time.Hour)},
|
||||
hash(map[string]string{"host": "localhost-3"}): {State: notifier.StatePending,
|
||||
ActiveAt: time.Now().Truncate(3 * time.Hour)},
|
||||
},
|
||||
},
|
||||
func TestGroup_Restore(t *testing.T) {
|
||||
defaultTS := time.Now()
|
||||
fqr := &fakeQuerierWithRegistry{}
|
||||
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
|
||||
t.Helper()
|
||||
defer fqr.reset()
|
||||
|
||||
for _, r := range rules {
|
||||
fqr.set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
||||
}
|
||||
|
||||
fg := newGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
nts := func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} }
|
||||
fg.start(context.Background(), nts, nil, fqr)
|
||||
wg.Done()
|
||||
}()
|
||||
fg.close()
|
||||
wg.Wait()
|
||||
|
||||
gotAlerts := make(map[uint64]*notifier.Alert)
|
||||
for _, rs := range fg.Rules {
|
||||
alerts := rs.(*AlertingRule).alerts
|
||||
for k, v := range alerts {
|
||||
if !v.Restored {
|
||||
// set not restored alerts to predictable timestamp
|
||||
v.ActiveAt = defaultTS
|
||||
}
|
||||
gotAlerts[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
if len(gotAlerts) != len(expAlerts) {
|
||||
t.Fatalf("expected %d alerts; got %d", len(expAlerts), len(gotAlerts))
|
||||
}
|
||||
for key, exp := range expAlerts {
|
||||
got, ok := gotAlerts[key]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have key %d", key)
|
||||
}
|
||||
if got.State != notifier.StatePending {
|
||||
t.Fatalf("expected state %d; got %d", notifier.StatePending, got.State)
|
||||
}
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
}
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
tc.rule.q = fq
|
||||
fq.add(tc.metrics...)
|
||||
if err := tc.rule.Restore(context.TODO(), fq, time.Hour, nil); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
||||
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
||||
}
|
||||
for key, exp := range tc.expAlerts {
|
||||
got, ok := tc.rule.alerts[key]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have key %d", key)
|
||||
}
|
||||
if got.State != exp.State {
|
||||
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
||||
}
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
}
|
||||
|
||||
stateMetric := func(name string, value time.Time, labels ...string) datasource.Metric {
|
||||
labels = append(labels, "__name__", alertForStateMetricName)
|
||||
labels = append(labels, alertNameLabel, name)
|
||||
labels = append(labels, alertGroupNameLabel, "TestRestore")
|
||||
return metricWithValueAndLabels(t, float64(value.Unix()), labels...)
|
||||
}
|
||||
|
||||
// one active alert, no previous state
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
fqr.reset()
|
||||
|
||||
// one active alert with state restore
|
||||
ts := time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, two with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("bar", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: ts},
|
||||
})
|
||||
|
||||
// one active alert but wrong state restore
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
||||
stateMetric("wrong alert", ts))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with labels
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
|
||||
// one active alert with restore labels missmatch
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
@@ -709,7 +750,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"summary": `{{ $labels.alertname }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||
@@ -749,7 +789,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
||||
@@ -789,7 +828,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1,
|
||||
@@ -820,6 +858,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = newRuleState(10)
|
||||
fq.add(tc.metrics...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
@@ -936,6 +975,6 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||
For: waitFor,
|
||||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,8 +5,6 @@ import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
@@ -114,6 +112,9 @@ type Rule struct {
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
Debug bool `yaml:"debug,omitempty"`
|
||||
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
|
||||
// Overrides `-rule.updateEntriesLimit`.
|
||||
UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"`
|
||||
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
@@ -200,19 +201,15 @@ type ValidateTplFn func(annotations map[string]string) error
|
||||
|
||||
// Parse parses rule configs from given file patterns
|
||||
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
var fp []string
|
||||
for _, pattern := range pathPatterns {
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading file pattern %s: %w", pattern, err)
|
||||
}
|
||||
fp = append(fp, matches...)
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
}
|
||||
errGroup := new(utils.ErrGroup)
|
||||
var groups []Group
|
||||
for _, file := range fp {
|
||||
for file, data := range files {
|
||||
uniqueGroups := map[string]struct{}{}
|
||||
gr, err := parseFile(file)
|
||||
gr, err := parseConfig(data)
|
||||
if err != nil {
|
||||
errGroup.Add(fmt.Errorf("failed to parse file %q: %w", file, err))
|
||||
continue
|
||||
@@ -240,14 +237,10 @@ func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressio
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func parseFile(path string) ([]Group, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
func parseConfig(data []byte) ([]Group, error) {
|
||||
data, err := envtemplate.ReplaceBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading alert rule file %q: %w", path, err)
|
||||
}
|
||||
data, err = envtemplate.ReplaceBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot expand environment vars in %q: %w", path, err)
|
||||
return nil, fmt.Errorf("cannot expand environment vars: %w", err)
|
||||
}
|
||||
g := struct {
|
||||
Groups []Group `yaml:"groups"`
|
||||
|
||||
@@ -550,6 +550,20 @@ rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
debug: true
|
||||
`)
|
||||
})
|
||||
t.Run("`update_entries_limit` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
update_entries_limit: 33
|
||||
`)
|
||||
})
|
||||
}
|
||||
|
||||
89
app/vmalert/config/fs.go
Normal file
89
app/vmalert/config/fs.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/fslocal"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// FS represent a file system abstract for reading files.
|
||||
type FS interface {
|
||||
// Init initializes FS.
|
||||
Init() error
|
||||
|
||||
// String must return human-readable representation of FS.
|
||||
String() string
|
||||
|
||||
// Read returns a list of read files in form of a map
|
||||
// where key is a file name and value is a content of read file.
|
||||
// Read must be called only after the successful Init call.
|
||||
Read() (map[string][]byte, error)
|
||||
}
|
||||
|
||||
var (
|
||||
fsRegistryMu sync.Mutex
|
||||
fsRegistry = make(map[string]FS)
|
||||
)
|
||||
|
||||
// readFromFS parses the given path list and inits FS for each item.
|
||||
// Once inited, readFromFS will try to read and return files from each FS.
|
||||
// readFromFS returns an error if at least one FS failed to init.
|
||||
// The function can be called multiple times but each unique path
|
||||
// will be inited only once.
|
||||
//
|
||||
// It is allowed to mix different FS types in path list.
|
||||
func readFromFS(paths []string) (map[string][]byte, error) {
|
||||
var err error
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range paths {
|
||||
|
||||
fsRegistryMu.Lock()
|
||||
fs, ok := fsRegistry[path]
|
||||
if !ok {
|
||||
fs, err = newFS(path)
|
||||
if err != nil {
|
||||
fsRegistryMu.Unlock()
|
||||
return nil, fmt.Errorf("error while parsing path %q: %w", path, err)
|
||||
}
|
||||
if err := fs.Init(); err != nil {
|
||||
fsRegistryMu.Unlock()
|
||||
return nil, fmt.Errorf("error while initializing path %q: %w", path, err)
|
||||
}
|
||||
fsRegistry[path] = fs
|
||||
}
|
||||
fsRegistryMu.Unlock()
|
||||
|
||||
files, err := fs.Read()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while reading files from %q: %w", fs, err)
|
||||
}
|
||||
for k, v := range files {
|
||||
if _, ok := result[k]; ok {
|
||||
return nil, fmt.Errorf("duplicate found for file name %q: file names must be unique", k)
|
||||
}
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// newFS creates FS based on the give path.
|
||||
// Supported file systems are: fs
|
||||
func newFS(path string) (FS, error) {
|
||||
scheme := "fs"
|
||||
n := strings.Index(path, "://")
|
||||
if n >= 0 {
|
||||
scheme = path[:n]
|
||||
path = path[n+len("://"):]
|
||||
}
|
||||
if len(path) == 0 {
|
||||
return nil, fmt.Errorf("path cannot be empty")
|
||||
}
|
||||
switch scheme {
|
||||
case "fs":
|
||||
return &fslocal.FS{Pattern: path}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported scheme %q", scheme)
|
||||
}
|
||||
}
|
||||
39
app/vmalert/config/fs_test.go
Normal file
39
app/vmalert/config/fs_test.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewFS(t *testing.T) {
|
||||
f := func(path, expStr string) {
|
||||
t.Helper()
|
||||
fs, err := newFS(path)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
if fs.String() != expStr {
|
||||
t.Fatalf("expected FS %q; got %q", expStr, fs.String())
|
||||
}
|
||||
}
|
||||
|
||||
f("/foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
|
||||
f("fs:///foo/bar", "Local FS{MatchPattern: \"/foo/bar\"}")
|
||||
}
|
||||
|
||||
func TestNewFSNegative(t *testing.T) {
|
||||
f := func(path, expErr string) {
|
||||
t.Helper()
|
||||
_, err := newFS(path)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to have err: %s", expErr)
|
||||
}
|
||||
if !strings.Contains(err.Error(), expErr) {
|
||||
t.Fatalf("expected to have err %q; got %q instead", expErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
f("", "path cannot be empty")
|
||||
f("fs://", "path cannot be empty")
|
||||
f("foobar://baz", `unsupported scheme "foobar"`)
|
||||
}
|
||||
44
app/vmalert/config/fslocal/fslocal.go
Normal file
44
app/vmalert/config/fslocal/fslocal.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package fslocal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// FS represents a local file system
|
||||
type FS struct {
|
||||
// Pattern is used for matching one or multiple files.
|
||||
// The pattern may describe hierarchical names such as
|
||||
// /usr/*/bin/ed (assuming the Separator is '/').
|
||||
Pattern string
|
||||
}
|
||||
|
||||
// Init verifies that configured Pattern is correct
|
||||
func (fs *FS) Init() error {
|
||||
_, err := filepath.Glob(fs.Pattern)
|
||||
return err
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
func (fs *FS) String() string {
|
||||
return fmt.Sprintf("Local FS{MatchPattern: %q}", fs.Pattern)
|
||||
}
|
||||
|
||||
// Read returns a map of read files where
|
||||
// key is the file name and value is file's content.
|
||||
func (fs *FS) Read() (map[string][]byte, error) {
|
||||
matches, err := filepath.Glob(fs.Pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while matching files via pattern %s: %w", fs.Pattern, err)
|
||||
}
|
||||
result := make(map[string][]byte)
|
||||
for _, path := range matches {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while reading file %q: %w", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -12,6 +12,7 @@ groups:
|
||||
expr: vm_tcplistener_conns > 0
|
||||
for: 3m
|
||||
debug: true
|
||||
update_entries_limit: 40
|
||||
annotations:
|
||||
labels: "Available labels: {{ $labels }}"
|
||||
summary: Too high connection number for {{ $labels.instance }}
|
||||
@@ -20,6 +21,7 @@ groups:
|
||||
{{ end }}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
update_entries_limit: -1
|
||||
expr: sum by(job)
|
||||
(up == 1)
|
||||
labels:
|
||||
|
||||
@@ -7,6 +7,7 @@ groups:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
|
||||
for: 3m
|
||||
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
|
||||
@@ -72,6 +72,15 @@ func (m *Metric) AddLabel(key, value string) {
|
||||
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
||||
}
|
||||
|
||||
// DelLabel deletes the given label from the label set
|
||||
func (m *Metric) DelLabel(key string) {
|
||||
for i, l := range m.Labels {
|
||||
if l.Name == key {
|
||||
m.Labels = append(m.Labels[:i], m.Labels[i+1:]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Label returns the given label value.
|
||||
// If label is missing empty string will be returned
|
||||
func (m *Metric) Label(key string) string {
|
||||
|
||||
@@ -174,7 +174,7 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
|
||||
}
|
||||
|
||||
func (s *VMStorage) newRequestPOST() (*http.Request, error) {
|
||||
req, err := http.NewRequest("POST", s.datasourceURL, nil)
|
||||
req, err := http.NewRequest(http.MethodPost, s.datasourceURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -158,23 +158,23 @@ func (g *Group) ID() uint64 {
|
||||
}
|
||||
|
||||
// Restore restores alerts state for group rules
|
||||
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, lookback time.Duration, labels map[string]string) error {
|
||||
labels = mergeLabels(g.Name, "", labels, g.Labels)
|
||||
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
||||
for _, rule := range g.Rules {
|
||||
rr, ok := rule.(*AlertingRule)
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if rr.For < 1 {
|
||||
if ar.For < 1 {
|
||||
continue
|
||||
}
|
||||
// ignore QueryParams on purpose, because they could contain
|
||||
// query filters. This may affect the restore procedure.
|
||||
q := qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: g.Type.String(),
|
||||
Headers: g.Headers,
|
||||
DataSourceType: g.Type.String(),
|
||||
EvaluationInterval: g.Interval,
|
||||
QueryParams: g.Params,
|
||||
Headers: g.Headers,
|
||||
Debug: ar.Debug,
|
||||
})
|
||||
if err := rr.Restore(ctx, q, lookback, labels); err != nil {
|
||||
if err := ar.Restore(ctx, q, ts, lookback); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
||||
}
|
||||
}
|
||||
@@ -251,7 +251,7 @@ func (g *Group) close() {
|
||||
|
||||
var skipRandSleepOnGroupStart bool
|
||||
|
||||
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client) {
|
||||
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
|
||||
e := &executor{
|
||||
@@ -259,26 +259,6 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||
notifiers: nts,
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label)}
|
||||
|
||||
// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
|
||||
if !skipRandSleepOnGroupStart {
|
||||
randSleep := uint64(float64(g.Interval) * (float64(g.ID()) / (1 << 64)))
|
||||
sleepOffset := uint64(time.Now().UnixNano()) % uint64(g.Interval)
|
||||
if randSleep < sleepOffset {
|
||||
randSleep += uint64(g.Interval)
|
||||
}
|
||||
randSleep -= sleepOffset
|
||||
sleepTimer := time.NewTimer(time.Duration(randSleep))
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
sleepTimer.Stop()
|
||||
return
|
||||
case <-g.doneCh:
|
||||
sleepTimer.Stop()
|
||||
return
|
||||
case <-sleepTimer.C:
|
||||
}
|
||||
}
|
||||
|
||||
evalTS := time.Now()
|
||||
|
||||
logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
@@ -309,6 +289,16 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
err := g.Restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
||||
@@ -209,7 +209,7 @@ func TestGroupStart(t *testing.T) {
|
||||
fs.add(m1)
|
||||
fs.add(m2)
|
||||
go func() {
|
||||
g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil)
|
||||
g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
@@ -460,7 +460,7 @@ func TestFaultyRW(t *testing.T) {
|
||||
|
||||
r := &RecordingRule{
|
||||
Name: "test",
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
q: fq,
|
||||
}
|
||||
|
||||
|
||||
@@ -61,6 +61,49 @@ func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) ([]dataso
|
||||
return cp, req, nil
|
||||
}
|
||||
|
||||
type fakeQuerierWithRegistry struct {
|
||||
sync.Mutex
|
||||
registry map[string][]datasource.Metric
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) set(key string, metrics ...datasource.Metric) {
|
||||
fqr.Lock()
|
||||
if fqr.registry == nil {
|
||||
fqr.registry = make(map[string][]datasource.Metric)
|
||||
}
|
||||
fqr.registry[key] = metrics
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) reset() {
|
||||
fqr.Lock()
|
||||
fqr.registry = nil
|
||||
fqr.Unlock()
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
|
||||
return fqr
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) ([]datasource.Metric, error) {
|
||||
req, _, err := fqr.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) ([]datasource.Metric, *http.Request, error) {
|
||||
fqr.Lock()
|
||||
defer fqr.Unlock()
|
||||
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
metrics, ok := fqr.registry[expr]
|
||||
if !ok {
|
||||
return nil, req, nil
|
||||
}
|
||||
cp := make([]datasource.Metric, len(metrics))
|
||||
copy(cp, metrics)
|
||||
return cp, req, nil
|
||||
}
|
||||
|
||||
type fakeNotifier struct {
|
||||
sync.Mutex
|
||||
alerts []notifier.Alert
|
||||
|
||||
@@ -28,13 +28,19 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
rulePath = flagutil.NewArrayString("rule", `Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
rulePath = flagutil.NewArrayString("rule", `Path to the files with alerting and/or recording rules.
|
||||
Supports hierarchical patterns and regexpes.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
-rule="dir/*.yaml" -rule="/*.yaml". Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.`)
|
||||
-rule="dir/*.yaml" -rule="/*.yaml" -rule="gcs://vmalert-rules/tenant_%{TENANT_ID}/prod".
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
|
||||
|
||||
Enterprise version of vmalert supports S3 and GCS paths to rules.
|
||||
For example: gs://bucket/path/to/rules, s3://bucket/path/to/rules
|
||||
S3 and GCS paths support only matching by prefix, e.g. s3://bucket/dir/rule_ matches
|
||||
all files with prefix rule_ in folder dir.
|
||||
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
|
||||
`)
|
||||
|
||||
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions
|
||||
for rules annotations templating. Flag can be specified multiple times.
|
||||
@@ -49,14 +55,18 @@ absolute path to all .tpl files in root.`)
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||
"By default the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
|
||||
|
||||
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
|
||||
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
||||
"which is by default equal to 3 evaluation intervals of the parent group.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
|
||||
"which by default is 4 times evaluationInterval of the parent group.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overriden per rule via update_entries_limit param.")
|
||||
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
|
||||
@@ -69,7 +79,7 @@ absolute path to all .tpl files in root.`)
|
||||
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup.")
|
||||
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases.")
|
||||
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
|
||||
@@ -90,6 +100,10 @@ func main() {
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
logger.Warnf("flag `remoteRead.ignoreRestoreErrors` is deprecated and will be removed in next releases.")
|
||||
}
|
||||
|
||||
err := templates.Load(*ruleTemplatesPath, true)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to parse %q: %s", *ruleTemplatesPath, err)
|
||||
@@ -168,7 +182,7 @@ func main() {
|
||||
go configReload(ctx, manager, groupsCfg, sighupCh)
|
||||
|
||||
rh := &requestHandler{m: manager}
|
||||
go httpserver.Serve(*httpListenAddr, rh.handler)
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, rh.handler)
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("service received signal %s", sig)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"net/url"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
@@ -82,24 +83,38 @@ func (m *manager) close() {
|
||||
m.wg.Wait()
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) error {
|
||||
if restore && m.rr != nil {
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack, m.labels)
|
||||
if err != nil {
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
return fmt.Errorf("failed to restore ruleState for group %q: %w", group.Name, err)
|
||||
}
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", group.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, g *Group, restore bool) error {
|
||||
m.wg.Add(1)
|
||||
id := group.ID()
|
||||
id := g.ID()
|
||||
go func() {
|
||||
group.start(ctx, m.notifiers, m.rw)
|
||||
// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
|
||||
if !skipRandSleepOnGroupStart {
|
||||
randSleep := uint64(float64(g.Interval) * (float64(g.ID()) / (1 << 64)))
|
||||
sleepOffset := uint64(time.Now().UnixNano()) % uint64(g.Interval)
|
||||
if randSleep < sleepOffset {
|
||||
randSleep += uint64(g.Interval)
|
||||
}
|
||||
randSleep -= sleepOffset
|
||||
sleepTimer := time.NewTimer(time.Duration(randSleep))
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
sleepTimer.Stop()
|
||||
return
|
||||
case <-g.doneCh:
|
||||
sleepTimer.Stop()
|
||||
return
|
||||
case <-sleepTimer.C:
|
||||
}
|
||||
}
|
||||
if restore {
|
||||
g.start(ctx, m.notifiers, m.rw, m.rr)
|
||||
} else {
|
||||
g.start(ctx, m.notifiers, m.rw, nil)
|
||||
}
|
||||
|
||||
m.wg.Done()
|
||||
}()
|
||||
m.groups[id] = group
|
||||
m.groups[id] = g
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -64,17 +64,18 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := rand.Intn(len(paths))
|
||||
rnd := r.Intn(len(paths))
|
||||
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
||||
if err != nil { // update can fail and this is expected
|
||||
continue
|
||||
}
|
||||
_ = m.update(context.Background(), cfg, false)
|
||||
}
|
||||
}()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -41,7 +41,7 @@ type Alert struct {
|
||||
LastSent time.Time
|
||||
// Value stores the value returned from evaluating expression from Expr field
|
||||
Value float64
|
||||
// ID is the unique identifer for the Alert
|
||||
// ID is the unique identifier for the Alert
|
||||
ID uint64
|
||||
// Restored is true if Alert was restored after restart
|
||||
Restored bool
|
||||
|
||||
@@ -64,7 +64,7 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert) error {
|
||||
b := &bytes.Buffer{}
|
||||
writeamRequest(b, alerts, am.argFunc, am.relabelConfigs)
|
||||
|
||||
req, err := http.NewRequest("POST", am.addr, b)
|
||||
req, err := http.NewRequest(http.MethodPost, am.addr, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ type Config struct {
|
||||
// ConsulSDConfigs contains list of settings for service discovery via Consul
|
||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
|
||||
// DNSSDConfigs ontains list of settings for service discovery via DNS.
|
||||
// DNSSDConfigs contains list of settings for service discovery via DNS.
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
|
||||
|
||||
|
||||
@@ -162,14 +162,15 @@ consul_sd_configs:
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := rand.Intn(len(paths))
|
||||
rnd := r.Intn(len(paths))
|
||||
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
||||
_ = cw.notifiers()
|
||||
}
|
||||
}()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -17,7 +17,8 @@ var (
|
||||
configPath = flag.String("notifier.config", "", "Path to configuration file for notifiers")
|
||||
suppressDuplicateTargetErrors = flag.Bool("notifier.suppressDuplicateTargetErrors", false, "Whether to suppress 'duplicate target' errors during discovery")
|
||||
|
||||
addrs = flagutil.NewArrayString("notifier.url", "Prometheus alertmanager URL, e.g. http://127.0.0.1:9093")
|
||||
addrs = flagutil.NewArrayString("notifier.url", "Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. "+
|
||||
"List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
|
||||
@@ -58,7 +58,6 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
state: newRuleState(),
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
@@ -67,6 +66,12 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
}),
|
||||
}
|
||||
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
rr.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
rr.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
@@ -212,6 +217,7 @@ func (rr *RecordingRule) ToAPI() APIRule {
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.samples,
|
||||
MaxUpdates: rr.state.size(),
|
||||
Updates: rr.state.getAll(),
|
||||
|
||||
// encode as strings to avoid rounding
|
||||
|
||||
@@ -19,7 +19,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
expTS []prompbmarshal.TimeSeries
|
||||
}{
|
||||
{
|
||||
&RecordingRule{Name: "foo", state: newRuleState()},
|
||||
&RecordingRule{Name: "foo"},
|
||||
[]datasource.Metric{metricWithValueAndLabels(t, 10,
|
||||
"__name__", "bar",
|
||||
)},
|
||||
@@ -30,7 +30,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
&RecordingRule{Name: "foobarbaz", state: newRuleState()},
|
||||
&RecordingRule{Name: "foobarbaz"},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"),
|
||||
metricWithValueAndLabels(t, 2, "__name__", "bar", "job", "bar"),
|
||||
@@ -53,8 +53,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
},
|
||||
{
|
||||
&RecordingRule{
|
||||
Name: "job:foo",
|
||||
state: newRuleState(),
|
||||
Name: "job:foo",
|
||||
Labels: map[string]string{
|
||||
"source": "test",
|
||||
}},
|
||||
@@ -80,6 +79,7 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
fq.add(tc.metrics...)
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = newRuleState(10)
|
||||
tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected Exec err: %s", err)
|
||||
@@ -198,7 +198,7 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
||||
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
||||
}
|
||||
rule := &RecordingRule{Name: "job:foo", state: newRuleState(), Labels: map[string]string{
|
||||
rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
}}
|
||||
var err error
|
||||
@@ -216,7 +216,7 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||
rr := &RecordingRule{
|
||||
Name: "job:foo",
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
Labels: map[string]string{
|
||||
"job": "test",
|
||||
},
|
||||
|
||||
@@ -225,7 +225,7 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
|
||||
func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
r := bytes.NewReader(data)
|
||||
req, err := http.NewRequest("POST", c.addr, r)
|
||||
req, err := http.NewRequest(http.MethodPost, c.addr, r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||
}
|
||||
|
||||
@@ -27,12 +27,13 @@ func TestClient_Push(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create client: %s", err)
|
||||
}
|
||||
r := rand.New(rand.NewSource(1))
|
||||
const rowsN = 1e4
|
||||
var sent int
|
||||
for i := 0; i < rowsN; i++ {
|
||||
s := prompbmarshal.TimeSeries{
|
||||
Samples: []prompbmarshal.Sample{{
|
||||
Value: rand.Float64(),
|
||||
Value: r.Float64(),
|
||||
Timestamp: time.Now().Unix(),
|
||||
}},
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ var (
|
||||
"and processing need to wait for previous rule results to be persisted by remote storage before evaluating the next rule."+
|
||||
"Keep it equal or bigger than -remoteWrite.flushInterval.")
|
||||
replayMaxDatapoints = flag.Int("replay.maxDatapointsPerQuery", 1e3,
|
||||
"Max number of data points expected in one request. The higher the value, the less requests will be made during replay.")
|
||||
"Max number of data points expected in one request. It affects the max time range for every `/query_range` request during the replay. The higher the value, the less requests will be made during replay.")
|
||||
replayRuleRetryAttempts = flag.Int("replay.ruleRetryAttempts", 5,
|
||||
"Defines how many retries to make before giving up on rule if request for it returns an error.")
|
||||
disableProgressBar = flag.Bool("replay.disableProgressBar", false, "Whether to disable rendering progress bars during the replay. "+
|
||||
|
||||
@@ -57,11 +57,12 @@ type ruleStateEntry struct {
|
||||
curl string
|
||||
}
|
||||
|
||||
const defaultStateEntriesLimit = 20
|
||||
|
||||
func newRuleState() *ruleState {
|
||||
func newRuleState(size int) *ruleState {
|
||||
if size < 1 {
|
||||
size = 1
|
||||
}
|
||||
return &ruleState{
|
||||
entries: make([]ruleStateEntry, defaultStateEntriesLimit),
|
||||
entries: make([]ruleStateEntry, size),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,6 +72,12 @@ func (s *ruleState) getLast() ruleStateEntry {
|
||||
return s.entries[s.cur]
|
||||
}
|
||||
|
||||
func (s *ruleState) size() int {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
return len(s.entries)
|
||||
}
|
||||
|
||||
func (s *ruleState) getAll() []ruleStateEntry {
|
||||
entries := make([]ruleStateEntry, 0)
|
||||
|
||||
|
||||
@@ -6,8 +6,27 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRule_stateDisabled(t *testing.T) {
|
||||
state := newRuleState(-1)
|
||||
e := state.getLast()
|
||||
if !e.at.IsZero() {
|
||||
t.Fatalf("expected entry to be zero")
|
||||
}
|
||||
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
|
||||
if len(state.getAll()) != 1 {
|
||||
// state should store at least one update at any circumstances
|
||||
t.Fatalf("expected for state to have %d entries; got %d",
|
||||
1, len(state.getAll()),
|
||||
)
|
||||
}
|
||||
}
|
||||
func TestRule_state(t *testing.T) {
|
||||
state := newRuleState()
|
||||
stateEntriesN := 20
|
||||
state := newRuleState(stateEntriesN)
|
||||
e := state.getLast()
|
||||
if !e.at.IsZero() {
|
||||
t.Fatalf("expected entry to be zero")
|
||||
@@ -39,7 +58,7 @@ func TestRule_state(t *testing.T) {
|
||||
}
|
||||
|
||||
var last time.Time
|
||||
for i := 0; i < defaultStateEntriesLimit*2; i++ {
|
||||
for i := 0; i < stateEntriesN*2; i++ {
|
||||
last = time.Now()
|
||||
state.add(ruleStateEntry{at: last})
|
||||
}
|
||||
@@ -50,9 +69,9 @@ func TestRule_state(t *testing.T) {
|
||||
e.at, last)
|
||||
}
|
||||
|
||||
if len(state.getAll()) != defaultStateEntriesLimit {
|
||||
if len(state.getAll()) != stateEntriesN {
|
||||
t.Fatalf("expected for state to have %d entries only; got %d",
|
||||
defaultStateEntriesLimit, len(state.getAll()),
|
||||
stateEntriesN, len(state.getAll()),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -61,7 +80,7 @@ func TestRule_state(t *testing.T) {
|
||||
// execution of state updates.
|
||||
// Should be executed with -race flag
|
||||
func TestRule_stateConcurrent(t *testing.T) {
|
||||
state := newRuleState()
|
||||
state := newRuleState(20)
|
||||
|
||||
const workers = 50
|
||||
const iterations = 100
|
||||
|
||||
@@ -27,11 +27,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
textTpl "text/template"
|
||||
"time"
|
||||
|
||||
textTpl "text/template"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/formatutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
@@ -225,7 +225,7 @@ func templateFuncs() textTpl.FuncMap {
|
||||
"toLower": strings.ToLower,
|
||||
|
||||
// crlfEscape replaces '\n' and '\r' chars with `\\n` and `\\r`.
|
||||
// This funcion is deprectated.
|
||||
// This function is deprecated.
|
||||
//
|
||||
// It is better to use quotesEscape, jsonEscape, queryEscape or pathEscape instead -
|
||||
// these functions properly escape `\n` and `\r` chars according to their purpose.
|
||||
@@ -350,15 +350,7 @@ func templateFuncs() textTpl.FuncMap {
|
||||
if math.Abs(v) <= 1 || math.IsNaN(v) || math.IsInf(v, 0) {
|
||||
return fmt.Sprintf("%.4g", v), nil
|
||||
}
|
||||
prefix := ""
|
||||
for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} {
|
||||
if math.Abs(v) < 1024 {
|
||||
break
|
||||
}
|
||||
prefix = p
|
||||
v /= 1024
|
||||
}
|
||||
return fmt.Sprintf("%.4g%s", v, prefix), nil
|
||||
return formatutil.HumanizeBytes(v), nil
|
||||
},
|
||||
|
||||
// humanizeDuration converts given seconds to a human-readable duration
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package templates
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
"testing"
|
||||
textTpl "text/template"
|
||||
@@ -50,6 +51,31 @@ func TestTemplateFuncs(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatalf("unexpected mismatch")
|
||||
}
|
||||
|
||||
formatting := func(funcName string, p interface{}, resultExpected string) {
|
||||
t.Helper()
|
||||
v := funcs[funcName]
|
||||
fLocal := v.(func(s interface{}) (string, error))
|
||||
result, err := fLocal(p)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for %s(%f): %s", funcName, p, err)
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for %s(%f); got\n%s\nwant\n%s", funcName, p, result, resultExpected)
|
||||
}
|
||||
}
|
||||
formatting("humanize1024", float64(0), "0")
|
||||
formatting("humanize1024", math.Inf(0), "+Inf")
|
||||
formatting("humanize1024", math.NaN(), "NaN")
|
||||
formatting("humanize1024", float64(127087), "124.1ki")
|
||||
formatting("humanize1024", float64(130137088), "124.1Mi")
|
||||
formatting("humanize1024", float64(133260378112), "124.1Gi")
|
||||
formatting("humanize1024", float64(136458627186688), "124.1Ti")
|
||||
formatting("humanize1024", float64(139733634239168512), "124.1Pi")
|
||||
formatting("humanize1024", float64(143087241460908556288), "124.1Ei")
|
||||
formatting("humanize1024", float64(146521335255970361638912), "124.1Zi")
|
||||
formatting("humanize1024", float64(150037847302113650318245888), "124.1Yi")
|
||||
formatting("humanize1024", float64(153638755637364377925883789312), "1.271e+05Yi")
|
||||
}
|
||||
|
||||
func mkTemplate(current, replacement interface{}) textTemplate {
|
||||
|
||||
@@ -57,7 +57,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
switch r.URL.Path {
|
||||
case "/", "/vmalert", "/vmalert/":
|
||||
if r.Method != "GET" {
|
||||
if r.Method != http.MethodGet {
|
||||
httpserver.Errorf(w, r, "path %q supports only GET method", r.URL.Path)
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -40,9 +40,9 @@
|
||||
for _, g := range groups {
|
||||
for _, r := range g.Rules {
|
||||
if r.LastError != "" {
|
||||
rNotOk[g.Name]++
|
||||
rNotOk[g.ID]++
|
||||
} else {
|
||||
rOk[g.Name]++
|
||||
rOk[g.ID]++
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -50,11 +50,11 @@
|
||||
<a class="btn btn-primary" role="button" onclick="collapseAll()">Collapse All</a>
|
||||
<a class="btn btn-primary" role="button" onclick="expandAll()">Expand All</a>
|
||||
{% for _, g := range groups %}
|
||||
<div class="group-heading{% if rNotOk[g.Name] > 0 %} alert-danger{% endif %}" data-bs-target="rules-{%s g.ID %}">
|
||||
<div class="group-heading{% if rNotOk[g.ID] > 0 %} alert-danger{% endif %}" data-bs-target="rules-{%s g.ID %}">
|
||||
<span class="anchor" id="group-{%s g.ID %}"></span>
|
||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s)</a>
|
||||
{% if rNotOk[g.Name] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d rNotOk[g.Name] %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules withs status Ok">{%d rOk[g.Name] %}</span>
|
||||
{% if rNotOk[g.ID] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d rNotOk[g.ID] %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules withs status Ok">{%d rOk[g.ID] %}</span>
|
||||
<p class="fs-6 fw-lighter">{%s g.File %}</p>
|
||||
{% if len(g.Params) > 0 %}
|
||||
<div class="fs-6 fw-lighter">Extra params
|
||||
@@ -427,6 +427,16 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
Debug
|
||||
</div>
|
||||
<div class="col">
|
||||
{%v rule.Debug %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
@@ -440,7 +450,7 @@
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<div class="display-6 pb-3">Last {%d len(rule.Updates) %} updates</span>:</div>
|
||||
<div class="display-6 pb-3">Last {%d len(rule.Updates) %}/{%d rule.MaxUpdates %} updates</span>:</div>
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
|
||||
@@ -171,9 +171,9 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []APIGr
|
||||
for _, g := range groups {
|
||||
for _, r := range g.Rules {
|
||||
if r.LastError != "" {
|
||||
rNotOk[g.Name]++
|
||||
rNotOk[g.ID]++
|
||||
} else {
|
||||
rOk[g.Name]++
|
||||
rOk[g.ID]++
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -189,7 +189,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []APIGr
|
||||
qw422016.N().S(`
|
||||
<div class="group-heading`)
|
||||
//line app/vmalert/web.qtpl:53
|
||||
if rNotOk[g.Name] > 0 {
|
||||
if rNotOk[g.ID] > 0 {
|
||||
//line app/vmalert/web.qtpl:53
|
||||
qw422016.N().S(` alert-danger`)
|
||||
//line app/vmalert/web.qtpl:53
|
||||
@@ -230,11 +230,11 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []APIGr
|
||||
qw422016.N().S(`s)</a>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:56
|
||||
if rNotOk[g.Name] > 0 {
|
||||
if rNotOk[g.ID] > 0 {
|
||||
//line app/vmalert/web.qtpl:56
|
||||
qw422016.N().S(`<span class="badge bg-danger" title="Number of rules with status Error">`)
|
||||
//line app/vmalert/web.qtpl:56
|
||||
qw422016.N().D(rNotOk[g.Name])
|
||||
qw422016.N().D(rNotOk[g.ID])
|
||||
//line app/vmalert/web.qtpl:56
|
||||
qw422016.N().S(`</span> `)
|
||||
//line app/vmalert/web.qtpl:56
|
||||
@@ -243,7 +243,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []APIGr
|
||||
qw422016.N().S(`
|
||||
<span class="badge bg-success" title="Number of rules withs status Ok">`)
|
||||
//line app/vmalert/web.qtpl:57
|
||||
qw422016.N().D(rOk[g.Name])
|
||||
qw422016.N().D(rOk[g.ID])
|
||||
//line app/vmalert/web.qtpl:57
|
||||
qw422016.N().S(`</span>
|
||||
<p class="fs-6 fw-lighter">`)
|
||||
@@ -1313,10 +1313,24 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
Debug
|
||||
</div>
|
||||
<div class="col">
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:436
|
||||
qw422016.E().V(rule.Debug)
|
||||
//line app/vmalert/web.qtpl:436
|
||||
qw422016.N().S(`
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:430
|
||||
//line app/vmalert/web.qtpl:440
|
||||
}
|
||||
//line app/vmalert/web.qtpl:430
|
||||
//line app/vmalert/web.qtpl:440
|
||||
qw422016.N().S(`
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
@@ -1325,17 +1339,17 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||
</div>
|
||||
<div class="col">
|
||||
<a target="_blank" href="`)
|
||||
//line app/vmalert/web.qtpl:437
|
||||
//line app/vmalert/web.qtpl:447
|
||||
qw422016.E().S(prefix)
|
||||
//line app/vmalert/web.qtpl:437
|
||||
//line app/vmalert/web.qtpl:447
|
||||
qw422016.N().S(`groups#group-`)
|
||||
//line app/vmalert/web.qtpl:437
|
||||
//line app/vmalert/web.qtpl:447
|
||||
qw422016.E().S(rule.GroupID)
|
||||
//line app/vmalert/web.qtpl:437
|
||||
//line app/vmalert/web.qtpl:447
|
||||
qw422016.N().S(`">`)
|
||||
//line app/vmalert/web.qtpl:437
|
||||
//line app/vmalert/web.qtpl:447
|
||||
qw422016.E().S(rule.GroupID)
|
||||
//line app/vmalert/web.qtpl:437
|
||||
//line app/vmalert/web.qtpl:447
|
||||
qw422016.N().S(`</a>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1343,9 +1357,13 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||
|
||||
<br>
|
||||
<div class="display-6 pb-3">Last `)
|
||||
//line app/vmalert/web.qtpl:443
|
||||
//line app/vmalert/web.qtpl:453
|
||||
qw422016.N().D(len(rule.Updates))
|
||||
//line app/vmalert/web.qtpl:443
|
||||
//line app/vmalert/web.qtpl:453
|
||||
qw422016.N().S(`/`)
|
||||
//line app/vmalert/web.qtpl:453
|
||||
qw422016.N().D(rule.MaxUpdates)
|
||||
//line app/vmalert/web.qtpl:453
|
||||
qw422016.N().S(` updates</span>:</div>
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
@@ -1360,201 +1378,201 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||
<tbody>
|
||||
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:456
|
||||
//line app/vmalert/web.qtpl:466
|
||||
for _, u := range rule.Updates {
|
||||
//line app/vmalert/web.qtpl:456
|
||||
//line app/vmalert/web.qtpl:466
|
||||
qw422016.N().S(`
|
||||
<tr`)
|
||||
//line app/vmalert/web.qtpl:457
|
||||
//line app/vmalert/web.qtpl:467
|
||||
if u.err != nil {
|
||||
//line app/vmalert/web.qtpl:457
|
||||
//line app/vmalert/web.qtpl:467
|
||||
qw422016.N().S(` class="alert-danger"`)
|
||||
//line app/vmalert/web.qtpl:457
|
||||
//line app/vmalert/web.qtpl:467
|
||||
}
|
||||
//line app/vmalert/web.qtpl:457
|
||||
//line app/vmalert/web.qtpl:467
|
||||
qw422016.N().S(`>
|
||||
<td>
|
||||
<span class="badge bg-primary rounded-pill me-3" title="Updated at">`)
|
||||
//line app/vmalert/web.qtpl:459
|
||||
//line app/vmalert/web.qtpl:469
|
||||
qw422016.E().S(u.time.Format(time.RFC3339))
|
||||
//line app/vmalert/web.qtpl:459
|
||||
//line app/vmalert/web.qtpl:469
|
||||
qw422016.N().S(`</span>
|
||||
</td>
|
||||
<td class="text-center" wi>`)
|
||||
//line app/vmalert/web.qtpl:461
|
||||
//line app/vmalert/web.qtpl:471
|
||||
qw422016.N().D(u.samples)
|
||||
//line app/vmalert/web.qtpl:461
|
||||
//line app/vmalert/web.qtpl:471
|
||||
qw422016.N().S(`</td>
|
||||
<td class="text-center">`)
|
||||
//line app/vmalert/web.qtpl:462
|
||||
//line app/vmalert/web.qtpl:472
|
||||
qw422016.N().FPrec(u.duration.Seconds(), 3)
|
||||
//line app/vmalert/web.qtpl:462
|
||||
//line app/vmalert/web.qtpl:472
|
||||
qw422016.N().S(`s</td>
|
||||
<td class="text-center">`)
|
||||
//line app/vmalert/web.qtpl:463
|
||||
//line app/vmalert/web.qtpl:473
|
||||
qw422016.E().S(u.at.Format(time.RFC3339))
|
||||
//line app/vmalert/web.qtpl:463
|
||||
//line app/vmalert/web.qtpl:473
|
||||
qw422016.N().S(`</td>
|
||||
<td>
|
||||
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">`)
|
||||
//line app/vmalert/web.qtpl:465
|
||||
//line app/vmalert/web.qtpl:475
|
||||
qw422016.E().S(u.curl)
|
||||
//line app/vmalert/web.qtpl:465
|
||||
//line app/vmalert/web.qtpl:475
|
||||
qw422016.N().S(`</textarea>
|
||||
</td>
|
||||
</tr>
|
||||
</li>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:469
|
||||
//line app/vmalert/web.qtpl:479
|
||||
if u.err != nil {
|
||||
//line app/vmalert/web.qtpl:469
|
||||
//line app/vmalert/web.qtpl:479
|
||||
qw422016.N().S(`
|
||||
<tr`)
|
||||
//line app/vmalert/web.qtpl:470
|
||||
//line app/vmalert/web.qtpl:480
|
||||
if u.err != nil {
|
||||
//line app/vmalert/web.qtpl:470
|
||||
//line app/vmalert/web.qtpl:480
|
||||
qw422016.N().S(` class="alert-danger"`)
|
||||
//line app/vmalert/web.qtpl:470
|
||||
//line app/vmalert/web.qtpl:480
|
||||
}
|
||||
//line app/vmalert/web.qtpl:470
|
||||
//line app/vmalert/web.qtpl:480
|
||||
qw422016.N().S(`>
|
||||
<td colspan="5">
|
||||
<span class="alert-danger">`)
|
||||
//line app/vmalert/web.qtpl:472
|
||||
//line app/vmalert/web.qtpl:482
|
||||
qw422016.E().V(u.err)
|
||||
//line app/vmalert/web.qtpl:472
|
||||
//line app/vmalert/web.qtpl:482
|
||||
qw422016.N().S(`</span>
|
||||
</td>
|
||||
</tr>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:475
|
||||
//line app/vmalert/web.qtpl:485
|
||||
}
|
||||
//line app/vmalert/web.qtpl:475
|
||||
//line app/vmalert/web.qtpl:485
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:476
|
||||
//line app/vmalert/web.qtpl:486
|
||||
}
|
||||
//line app/vmalert/web.qtpl:476
|
||||
//line app/vmalert/web.qtpl:486
|
||||
qw422016.N().S(`
|
||||
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:478
|
||||
//line app/vmalert/web.qtpl:488
|
||||
tpl.StreamFooter(qw422016, r)
|
||||
//line app/vmalert/web.qtpl:478
|
||||
//line app/vmalert/web.qtpl:488
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule APIRule) {
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
StreamRuleDetails(qw422016, r, rule)
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
func RuleDetails(r *http.Request, rule APIRule) string {
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
WriteRuleDetails(qb422016, r, rule)
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
return qs422016
|
||||
//line app/vmalert/web.qtpl:479
|
||||
//line app/vmalert/web.qtpl:489
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:483
|
||||
//line app/vmalert/web.qtpl:493
|
||||
func streambadgeState(qw422016 *qt422016.Writer, state string) {
|
||||
//line app/vmalert/web.qtpl:483
|
||||
//line app/vmalert/web.qtpl:493
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:485
|
||||
//line app/vmalert/web.qtpl:495
|
||||
badgeClass := "bg-warning text-dark"
|
||||
if state == "firing" {
|
||||
badgeClass = "bg-danger"
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:489
|
||||
//line app/vmalert/web.qtpl:499
|
||||
qw422016.N().S(`
|
||||
<span class="badge `)
|
||||
//line app/vmalert/web.qtpl:490
|
||||
//line app/vmalert/web.qtpl:500
|
||||
qw422016.E().S(badgeClass)
|
||||
//line app/vmalert/web.qtpl:490
|
||||
//line app/vmalert/web.qtpl:500
|
||||
qw422016.N().S(`">`)
|
||||
//line app/vmalert/web.qtpl:490
|
||||
//line app/vmalert/web.qtpl:500
|
||||
qw422016.E().S(state)
|
||||
//line app/vmalert/web.qtpl:490
|
||||
//line app/vmalert/web.qtpl:500
|
||||
qw422016.N().S(`</span>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
func writebadgeState(qq422016 qtio422016.Writer, state string) {
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
streambadgeState(qw422016, state)
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
func badgeState(state string) string {
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
writebadgeState(qb422016, state)
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
return qs422016
|
||||
//line app/vmalert/web.qtpl:491
|
||||
//line app/vmalert/web.qtpl:501
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:493
|
||||
//line app/vmalert/web.qtpl:503
|
||||
func streambadgeRestored(qw422016 *qt422016.Writer) {
|
||||
//line app/vmalert/web.qtpl:493
|
||||
//line app/vmalert/web.qtpl:503
|
||||
qw422016.N().S(`
|
||||
<span class="badge bg-warning text-dark" title="Alert state was restored after the service restart from remote storage">restored</span>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
func writebadgeRestored(qq422016 qtio422016.Writer) {
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
streambadgeRestored(qw422016)
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
func badgeRestored() string {
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
writebadgeRestored(qb422016)
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
return qs422016
|
||||
//line app/vmalert/web.qtpl:495
|
||||
//line app/vmalert/web.qtpl:505
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ func TestHandler(t *testing.T) {
|
||||
alerts: map[uint64]*notifier.Alert{
|
||||
0: {State: notifier.StateFiring},
|
||||
},
|
||||
state: newRuleState(),
|
||||
state: newRuleState(10),
|
||||
}
|
||||
g := &Group{
|
||||
Name: "group",
|
||||
|
||||
@@ -113,16 +113,20 @@ type APIRule struct {
|
||||
|
||||
// Additional fields
|
||||
|
||||
// Type of the rule: recording or alerting
|
||||
// DatasourceType of the rule: prometheus or graphite
|
||||
DatasourceType string `json:"datasourceType"`
|
||||
LastSamples int `json:"lastSamples"`
|
||||
// ID is a unique Alert's ID within a group
|
||||
ID string `json:"id"`
|
||||
// GroupID is an unique Group's ID
|
||||
GroupID string `json:"group_id"`
|
||||
// Debug shows whether debug mode is enabled
|
||||
Debug bool `json:"debug"`
|
||||
|
||||
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
||||
MaxUpdates int `json:"max_updates_entries"`
|
||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||
Updates []ruleStateEntry `json:"updates"`
|
||||
Updates []ruleStateEntry `json:"-"`
|
||||
}
|
||||
|
||||
// WebLink returns a link to the alert which can be used in UI.
|
||||
|
||||
@@ -28,7 +28,36 @@ accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.co
|
||||
|
||||
## Load balancing
|
||||
|
||||
Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls. In the latter case `vmauth` balances load among the configured urls in a round-robin manner. This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
|
||||
In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner.
|
||||
`vmauth` retries failing `GET` requests across the configured list of urls.
|
||||
This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes
|
||||
in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
|
||||
## Concurrency limiting
|
||||
|
||||
`vmauth` limits the number of concurrent requests it can proxy according to the following command-line flags:
|
||||
|
||||
- `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
|
||||
- `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
|
||||
|
||||
It is also possible to set individual limits on the number of concurrent requests per each user
|
||||
with the `max_concurrent_requests` option - see [auth config example](#auth-config).
|
||||
|
||||
`vmauth` responds with `429 Too Many Requests` HTTP error when the number of concurrent requests exceeds the configured limits.
|
||||
|
||||
The following [metrics](#monitoring) related to concurrency limits are exposed by `vmauth`:
|
||||
|
||||
- `vmauth_concurrent_requests_capacity` - the global limit on the number of concurrent requests `vmauth` can serve.
|
||||
It is set via `-maxConcurrentRequests` command-line flag.
|
||||
- `vmauth_concurrent_requests_current` - the current number of concurrent requests `vmauth` processes.
|
||||
- `vmauth_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the global concurrency limit has been reached.
|
||||
- `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_limit_reached_total{username="foo"}` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for the given `username`.
|
||||
|
||||
|
||||
## Auth config
|
||||
|
||||
@@ -55,26 +84,27 @@ users:
|
||||
headers:
|
||||
- "X-Scope-OrgID: foobar"
|
||||
|
||||
# The user for querying local single-node VictoriaMetrics.
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be proxied to http://localhost:8428 .
|
||||
# are proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
#
|
||||
# The given user can send maximum 10 concurrent requests according to the provided max_concurrent_requests.
|
||||
# Excess concurrent requests are rejected with 429 HTTP status code.
|
||||
# See also -maxConcurrentPerUserRequests and -maxConcurrentRequests command-line flags.
|
||||
- username: "local-single-node"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428"
|
||||
max_concurrent_requests: 10
|
||||
|
||||
# The user for querying local single-node VictoriaMetrics with extra_label team=dev.
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# are proxied to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
- username: "local-single-node"
|
||||
- username: "local-single-node2"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# The user for querying account 123 in VictoriaMetrics cluster
|
||||
# See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
|
||||
# - http://vmselect1:8481/select/123/prometheus/api/v1/select
|
||||
# - http://vmselect2:8481/select/123/prometheus/api/v1/select
|
||||
@@ -84,10 +114,8 @@ users:
|
||||
- "http://vmselect1:8481/select/123/prometheus"
|
||||
- "http://vmselect2:8481/select/123/prometheus"
|
||||
|
||||
# The user for inserting Prometheus data into VictoriaMetrics cluster under account 42
|
||||
# See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be load-balanced between http://vminsert1:8480/insert/42/prometheus and http://vminsert2:8480/insert/42/prometheus
|
||||
# are load-balanced between http://vminsert1:8480/insert/42/prometheus and http://vminsert2:8480/insert/42/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/write is proxied to the following urls in a round-robin manner:
|
||||
# - http://vminsert1:8480/insert/42/prometheus/api/v1/write
|
||||
# - http://vminsert2:8480/insert/42/prometheus/api/v1/write
|
||||
@@ -261,7 +289,11 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections (default ":8427")
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8427")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500)
|
||||
-logInvalidAuthTokens
|
||||
Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
|
||||
-loggerDisableTimestamps
|
||||
@@ -270,6 +302,8 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -278,8 +312,12 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentPerUserRequests int
|
||||
The maximum number of concurrent requests vmauth can process per each configured user. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option in per-user config (default 300)
|
||||
-maxConcurrentRequests int
|
||||
The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000)
|
||||
-maxIdleConnsPerBackend int
|
||||
The maximum number of idle connections vmauth can open per each backend host (default 100)
|
||||
The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
@@ -299,6 +337,8 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-reloadAuthKey string
|
||||
Auth key for /-/reload http endpoint. It must be passed as authKey=...
|
||||
-responseTimeout duration
|
||||
The timeout for receiving a response from backend (default 5m0s)
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
@@ -32,17 +33,43 @@ type AuthConfig struct {
|
||||
|
||||
// UserInfo is user information read from authConfigPath
|
||||
type UserInfo struct {
|
||||
Name string `yaml:"name,omitempty"`
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
URLMap []URLMap `yaml:"url_map,omitempty"`
|
||||
Headers []Header `yaml:"headers,omitempty"`
|
||||
Name string `yaml:"name,omitempty"`
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
URLMaps []URLMap `yaml:"url_map,omitempty"`
|
||||
Headers []Header `yaml:"headers,omitempty"`
|
||||
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
|
||||
|
||||
concurrencyLimitCh chan struct{}
|
||||
concurrencyLimitReached *metrics.Counter
|
||||
|
||||
requests *metrics.Counter
|
||||
}
|
||||
|
||||
func (ui *UserInfo) beginConcurrencyLimit() error {
|
||||
select {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
ui.concurrencyLimitReached.Inc()
|
||||
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
|
||||
}
|
||||
}
|
||||
|
||||
func (ui *UserInfo) endConcurrencyLimit() {
|
||||
<-ui.concurrencyLimitCh
|
||||
}
|
||||
|
||||
func (ui *UserInfo) getMaxConcurrentRequests() int {
|
||||
mcr := ui.MaxConcurrentRequests
|
||||
if mcr <= 0 {
|
||||
mcr = *maxConcurrentPerUserRequests
|
||||
}
|
||||
return mcr
|
||||
}
|
||||
|
||||
// Header is `Name: Value` http header, which must be added to the proxied request.
|
||||
type Header struct {
|
||||
Name string
|
||||
@@ -83,16 +110,77 @@ type SrcPath struct {
|
||||
re *regexp.Regexp
|
||||
}
|
||||
|
||||
// URLPrefix represents pased `url_prefix`
|
||||
// URLPrefix represents passed `url_prefix`
|
||||
type URLPrefix struct {
|
||||
n uint32
|
||||
urls []*url.URL
|
||||
n uint32
|
||||
bus []*backendURL
|
||||
}
|
||||
|
||||
func (up *URLPrefix) getNextURL() *url.URL {
|
||||
type backendURL struct {
|
||||
brokenDeadline uint64
|
||||
concurrentRequests int32
|
||||
url *url.URL
|
||||
}
|
||||
|
||||
func (bu *backendURL) isBroken() bool {
|
||||
ct := fasttime.UnixTimestamp()
|
||||
return ct < atomic.LoadUint64(&bu.brokenDeadline)
|
||||
}
|
||||
|
||||
func (bu *backendURL) setBroken() {
|
||||
deadline := fasttime.UnixTimestamp() + 3
|
||||
atomic.StoreUint64(&bu.brokenDeadline, deadline)
|
||||
}
|
||||
|
||||
func (bu *backendURL) put() {
|
||||
atomic.AddInt32(&bu.concurrentRequests, -1)
|
||||
}
|
||||
|
||||
func (up *URLPrefix) getBackendsCount() int {
|
||||
return len(up.bus)
|
||||
}
|
||||
|
||||
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
|
||||
bus := up.bus
|
||||
if len(bus) == 1 {
|
||||
// Fast path - return the only backend url.
|
||||
bu := bus[0]
|
||||
atomic.AddInt32(&bu.concurrentRequests, 1)
|
||||
return bu
|
||||
}
|
||||
|
||||
// Slow path - select other backend urls.
|
||||
n := atomic.AddUint32(&up.n, 1)
|
||||
idx := n % uint32(len(up.urls))
|
||||
return up.urls[idx]
|
||||
|
||||
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
if atomic.CompareAndSwapInt32(&bu.concurrentRequests, 0, 1) {
|
||||
// Fast path - return the backend with zero concurrently executed requests.
|
||||
return bu
|
||||
}
|
||||
}
|
||||
|
||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||
buMin := bus[n%uint32(len(bus))]
|
||||
minRequests := atomic.LoadInt32(&buMin.concurrentRequests)
|
||||
for _, bu := range bus {
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
if n := atomic.LoadInt32(&bu.concurrentRequests); n < minRequests {
|
||||
buMin = bu
|
||||
minRequests = n
|
||||
}
|
||||
}
|
||||
atomic.AddInt32(&buMin.concurrentRequests, 1)
|
||||
return buMin
|
||||
}
|
||||
|
||||
// UnmarshalYAML unmarshals up from yaml.
|
||||
@@ -121,31 +209,33 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
|
||||
default:
|
||||
return fmt.Errorf("unexpected type for `url_prefix`: %T; want string or []string", v)
|
||||
}
|
||||
pus := make([]*url.URL, len(urls))
|
||||
bus := make([]*backendURL, len(urls))
|
||||
for i, u := range urls {
|
||||
pu, err := url.Parse(u)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal %q into url: %w", u, err)
|
||||
}
|
||||
pus[i] = pu
|
||||
bus[i] = &backendURL{
|
||||
url: pu,
|
||||
}
|
||||
}
|
||||
up.urls = pus
|
||||
up.bus = bus
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalYAML marshals up to yaml.
|
||||
func (up *URLPrefix) MarshalYAML() (interface{}, error) {
|
||||
var b []byte
|
||||
if len(up.urls) == 1 {
|
||||
u := up.urls[0].String()
|
||||
if len(up.bus) == 1 {
|
||||
u := up.bus[0].url.String()
|
||||
b = strconv.AppendQuote(b, u)
|
||||
return string(b), nil
|
||||
}
|
||||
b = append(b, '[')
|
||||
for i, pu := range up.urls {
|
||||
u := pu.String()
|
||||
for i, bu := range up.bus {
|
||||
u := bu.url.String()
|
||||
b = strconv.AppendQuote(b, u)
|
||||
if i+1 < len(up.urls) {
|
||||
if i+1 < len(up.bus) {
|
||||
b = append(b, ',')
|
||||
}
|
||||
}
|
||||
@@ -284,7 +374,7 @@ func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
for _, e := range ui.URLMap {
|
||||
for _, e := range ui.URLMaps {
|
||||
if len(e.SrcPaths) == 0 {
|
||||
return nil, fmt.Errorf("missing `src_paths` in `url_map`")
|
||||
}
|
||||
@@ -295,32 +385,47 @@ func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if len(ui.URLMap) == 0 && ui.URLPrefix == nil {
|
||||
if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
|
||||
return nil, fmt.Errorf("missing `url_prefix`")
|
||||
}
|
||||
name := ui.name()
|
||||
if ui.BearerToken != "" {
|
||||
name := "bearer_token"
|
||||
if ui.Name != "" {
|
||||
name = ui.Name
|
||||
}
|
||||
if ui.Password != "" {
|
||||
return nil, fmt.Errorf("password shouldn't be set for bearer_token %q", ui.BearerToken)
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_requests_total{username=%q}`, name))
|
||||
}
|
||||
if ui.Username != "" {
|
||||
name := ui.Username
|
||||
if ui.Name != "" {
|
||||
name = ui.Name
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_requests_total{username=%q}`, name))
|
||||
}
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
ui.concurrencyLimitCh = make(chan struct{}, mcr)
|
||||
ui.concurrencyLimitReached = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_concurrent_requests_limit_reached_total{username=%q}`, name))
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_capacity{username=%q}`, name), func() float64 {
|
||||
return float64(cap(ui.concurrencyLimitCh))
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
byAuthToken[at1] = ui
|
||||
byAuthToken[at2] = ui
|
||||
}
|
||||
return byAuthToken, nil
|
||||
}
|
||||
|
||||
func (ui *UserInfo) name() string {
|
||||
if ui.Name != "" {
|
||||
return ui.Name
|
||||
}
|
||||
if ui.Username != "" {
|
||||
return ui.Username
|
||||
}
|
||||
if ui.BearerToken != "" {
|
||||
return "bearer_token"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getAuthTokens(bearerToken, username, password string) (string, string) {
|
||||
if bearerToken != "" {
|
||||
// Accept the bearerToken as Basic Auth username with empty password
|
||||
@@ -342,12 +447,12 @@ func getAuthToken(bearerToken, username, password string) string {
|
||||
}
|
||||
|
||||
func (up *URLPrefix) sanitize() error {
|
||||
for i, pu := range up.urls {
|
||||
puNew, err := sanitizeURLPrefix(pu)
|
||||
for _, bu := range up.bus {
|
||||
puNew, err := sanitizeURLPrefix(bu.url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
up.urls[i] = puNew
|
||||
bu.url = puNew
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -218,11 +218,13 @@ users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: http://aaa:343/bbb
|
||||
max_concurrent_requests: 5
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("", "foo", "bar"): {
|
||||
Username: "foo",
|
||||
Password: "bar",
|
||||
URLPrefix: mustParseURL("http://aaa:343/bbb"),
|
||||
Username: "foo",
|
||||
Password: "bar",
|
||||
URLPrefix: mustParseURL("http://aaa:343/bbb"),
|
||||
MaxConcurrentRequests: 5,
|
||||
},
|
||||
})
|
||||
|
||||
@@ -278,7 +280,7 @@ users:
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("foo", "", ""): {
|
||||
BearerToken: "foo",
|
||||
URLMap: []URLMap{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),
|
||||
@@ -304,7 +306,7 @@ users:
|
||||
},
|
||||
getAuthToken("", "foo", ""): {
|
||||
BearerToken: "foo",
|
||||
URLMap: []URLMap{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),
|
||||
@@ -390,15 +392,17 @@ func mustParseURL(u string) *URLPrefix {
|
||||
}
|
||||
|
||||
func mustParseURLs(us []string) *URLPrefix {
|
||||
pus := make([]*url.URL, len(us))
|
||||
bus := make([]*backendURL, len(us))
|
||||
for i, u := range us {
|
||||
pu, err := url.Parse(u)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
|
||||
}
|
||||
pus[i] = pu
|
||||
bus[i] = &backendURL{
|
||||
url: pu,
|
||||
}
|
||||
}
|
||||
return &URLPrefix{
|
||||
urls: pus,
|
||||
bus: bus,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,26 +18,27 @@ users:
|
||||
headers:
|
||||
- "X-Scope-OrgID: foobar"
|
||||
|
||||
# The user for querying local single-node VictoriaMetrics.
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be proxied to http://localhost:8428 .
|
||||
# are proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
#
|
||||
# The given user can send maximum 10 concurrent requests according to the provided max_concurrent_requests.
|
||||
# Excess concurrent requests are rejected with 429 HTTP status code.
|
||||
# See also -maxConcurrentPerUserRequests and -maxConcurrentRequests command-line flags.
|
||||
- username: "local-single-node"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428"
|
||||
max_concurrent_requests: 10
|
||||
|
||||
# The user for querying local single-node VictoriaMetrics with extra_label team=dev.
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be routed to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# are proxied to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
- username: "local-single-node"
|
||||
- username: "local-single-node2"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# The user for querying account 123 in VictoriaMetrics cluster
|
||||
# See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
|
||||
# - http://vmselect1:8481/select/123/prometheus/api/v1/select
|
||||
# - http://vmselect2:8481/select/123/prometheus/api/v1/select
|
||||
@@ -47,10 +48,8 @@ users:
|
||||
- "http://vmselect1:8481/select/123/prometheus"
|
||||
- "http://vmselect2:8481/select/123/prometheus"
|
||||
|
||||
# The user for inserting Prometheus data into VictoriaMetrics cluster under account 42
|
||||
# See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# will be load-balanced between http://vminsert1:8480/insert/42/prometheus and http://vminsert2:8480/insert/42/prometheus
|
||||
# are load-balanced between http://vminsert1:8480/insert/42/prometheus and http://vminsert2:8480/insert/42/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/write is proxied to the following urls in a round-robin manner:
|
||||
# - http://vminsert1:8480/insert/42/prometheus/api/v1/write
|
||||
# - http://vminsert2:8480/insert/42/prometheus/api/v1/write
|
||||
|
||||
@@ -3,8 +3,10 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/textproto"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
@@ -12,20 +14,31 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections")
|
||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
|
||||
reloadAuthKey = flag.String("reloadAuthKey", "", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
|
||||
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
|
||||
"See also -maxConcurrentRequests")
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
|
||||
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
|
||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
|
||||
"in per-user config")
|
||||
reloadAuthKey = flag.String("reloadAuthKey", "", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
|
||||
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
||||
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
|
||||
)
|
||||
|
||||
@@ -41,7 +54,7 @@ func main() {
|
||||
logger.Infof("starting vmauth at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
initAuthConfig()
|
||||
go httpserver.Serve(*httpListenAddr, requestHandler)
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started vmauth in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
@@ -60,9 +73,7 @@ func main() {
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
switch r.URL.Path {
|
||||
case "/-/reload":
|
||||
authKey := r.FormValue("authKey")
|
||||
if authKey != *reloadAuthKey {
|
||||
httpserver.Errorf(w, r, "invalid authKey %q. It must match the value from -reloadAuthKey command line flag", authKey)
|
||||
if !httpserver.CheckAuthFlag(w, r, *reloadAuthKey, "reloadAuthKey") {
|
||||
return true
|
||||
}
|
||||
configReloadRequests.Inc()
|
||||
@@ -81,44 +92,175 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
// See https://docs.influxdata.com/influxdb/v2.0/api/
|
||||
authToken = strings.Replace(authToken, "Token", "Bearer", 1)
|
||||
}
|
||||
|
||||
ac := authConfig.Load().(map[string]*UserInfo)
|
||||
ui := ac[authToken]
|
||||
if ui == nil {
|
||||
invalidAuthTokenRequests.Inc()
|
||||
err := fmt.Errorf("cannot find the provided auth token %q in config", authToken)
|
||||
if *logInvalidAuthTokens {
|
||||
httpserver.Errorf(w, r, "cannot find the provided auth token %q in config", authToken)
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
} else {
|
||||
errStr := fmt.Sprintf("cannot find the provided auth token %q in config", authToken)
|
||||
http.Error(w, errStr, http.StatusBadRequest)
|
||||
http.Error(w, err.Error(), http.StatusUnauthorized)
|
||||
}
|
||||
return true
|
||||
}
|
||||
ui.requests.Inc()
|
||||
targetURL, headers, err := createTargetURL(ui, r.URL)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot determine targetURL: %s", err)
|
||||
|
||||
// Limit the concurrency of requests to backends
|
||||
concurrencyLimitOnce.Do(concurrencyLimitInit)
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
if err := ui.beginConcurrencyLimit(); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
<-concurrencyLimitCh
|
||||
return true
|
||||
}
|
||||
default:
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return true
|
||||
}
|
||||
r.Header.Set("vm-target-url", targetURL.String())
|
||||
for _, h := range headers {
|
||||
r.Header.Set(h.Name, h.Value)
|
||||
}
|
||||
proxyRequest(w, r)
|
||||
processRequest(w, r, ui)
|
||||
ui.endConcurrencyLimit()
|
||||
<-concurrencyLimitCh
|
||||
return true
|
||||
}
|
||||
|
||||
func proxyRequest(w http.ResponseWriter, r *http.Request) {
|
||||
defer func() {
|
||||
err := recover()
|
||||
if err == nil || err == http.ErrAbortHandler {
|
||||
// Suppress http.ErrAbortHandler panic.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1353
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
u := normalizeURL(r.URL)
|
||||
up, headers, err := ui.getURLPrefixAndHeaders(u)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot determine targetURL: %s", err)
|
||||
return
|
||||
}
|
||||
maxAttempts := up.getBackendsCount()
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
bu := up.getLeastLoadedBackendURL()
|
||||
targetURL := mergeURLs(bu.url, u)
|
||||
ok := tryProcessingRequest(w, r, targetURL, headers)
|
||||
bu.put()
|
||||
if ok {
|
||||
return
|
||||
}
|
||||
// Forward other panics to the caller.
|
||||
panic(err)
|
||||
}()
|
||||
getReverseProxy().ServeHTTP(w, r)
|
||||
bu.setBroken()
|
||||
}
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("all the backends for the user %q are unavailable", ui.name()),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
}
|
||||
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, headers []Header) bool {
|
||||
// This code has been copied from net/http/httputil/reverseproxy.go
|
||||
req := sanitizeRequestHeaders(r)
|
||||
req.URL = targetURL
|
||||
for _, h := range headers {
|
||||
req.Header.Set(h.Name, h.Value)
|
||||
}
|
||||
transportOnce.Do(transportInit)
|
||||
res, err := transport.RoundTrip(req)
|
||||
if err != nil {
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
if r.Method == http.MethodPost || r.Method == http.MethodPut {
|
||||
// It is impossible to retry POST and PUT requests,
|
||||
// since we already proxied the request body to the backend.
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot proxy the request to %q: %w", targetURL, err),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying the request to %q: %s", remoteAddr, requestURI, targetURL, err)
|
||||
return false
|
||||
}
|
||||
removeHopHeaders(res.Header)
|
||||
copyHeader(w.Header(), res.Header)
|
||||
w.WriteHeader(res.StatusCode)
|
||||
|
||||
copyBuf := copyBufPool.Get()
|
||||
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
||||
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
|
||||
copyBufPool.Put(copyBuf)
|
||||
_ = res.Body.Close()
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var copyBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func copyHeader(dst, src http.Header) {
|
||||
for k, vv := range src {
|
||||
for _, v := range vv {
|
||||
dst.Add(k, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sanitizeRequestHeaders(r *http.Request) *http.Request {
|
||||
// This code has been copied from net/http/httputil/reverseproxy.go
|
||||
req := r.Clone(r.Context())
|
||||
removeHopHeaders(req.Header)
|
||||
if clientIP, _, err := net.SplitHostPort(req.RemoteAddr); err == nil {
|
||||
// If we aren't the first proxy retain prior
|
||||
// X-Forwarded-For information as a comma+space
|
||||
// separated list and fold multiple headers into one.
|
||||
prior := req.Header["X-Forwarded-For"]
|
||||
if len(prior) > 0 {
|
||||
clientIP = strings.Join(prior, ", ") + ", " + clientIP
|
||||
}
|
||||
req.Header.Set("X-Forwarded-For", clientIP)
|
||||
}
|
||||
return req
|
||||
}
|
||||
|
||||
func removeHopHeaders(h http.Header) {
|
||||
// remove hop-by-hop headers listed in the "Connection" header of h.
|
||||
// See RFC 7230, section 6.1
|
||||
for _, f := range h["Connection"] {
|
||||
for _, sf := range strings.Split(f, ",") {
|
||||
if sf = textproto.TrimString(sf); sf != "" {
|
||||
h.Del(sf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove hop-by-hop headers to the backend. Especially
|
||||
// important is "Connection" because we want a persistent
|
||||
// connection, regardless of what the client sent to us.
|
||||
for _, key := range hopHeaders {
|
||||
h.Del(key)
|
||||
}
|
||||
}
|
||||
|
||||
// Hop-by-hop headers. These are removed when sent to the backend.
|
||||
// As of RFC 7230, hop-by-hop headers are required to appear in the
|
||||
// Connection header field. These are the headers defined by the
|
||||
// obsoleted RFC 2616 (section 13.5.1) and are used for backward
|
||||
// compatibility.
|
||||
var hopHeaders = []string{
|
||||
"Connection",
|
||||
"Proxy-Connection", // non-standard but still sent by libcurl and rejected by e.g. google
|
||||
"Keep-Alive",
|
||||
"Proxy-Authenticate",
|
||||
"Proxy-Authorization",
|
||||
"Te", // canonicalized version of "TE"
|
||||
"Trailer", // not Trailers per URL above; https://www.rfc-editor.org/errata_search.php?eid=4522
|
||||
"Transfer-Encoding",
|
||||
"Upgrade",
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -128,43 +270,41 @@ var (
|
||||
)
|
||||
|
||||
var (
|
||||
reverseProxy *httputil.ReverseProxy
|
||||
reverseProxyOnce sync.Once
|
||||
transport *http.Transport
|
||||
transportOnce sync.Once
|
||||
)
|
||||
|
||||
func getReverseProxy() *httputil.ReverseProxy {
|
||||
reverseProxyOnce.Do(initReverseProxy)
|
||||
return reverseProxy
|
||||
func transportInit() {
|
||||
tr := http.DefaultTransport.(*http.Transport).Clone()
|
||||
tr.ResponseHeaderTimeout = *responseTimeout
|
||||
// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
|
||||
tr.DisableCompression = true
|
||||
// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
|
||||
tr.ForceAttemptHTTP2 = false
|
||||
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
|
||||
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
|
||||
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
|
||||
}
|
||||
transport = tr
|
||||
}
|
||||
|
||||
// initReverseProxy must be called after flag.Parse(), since it uses command-line flags.
|
||||
func initReverseProxy() {
|
||||
reverseProxy = &httputil.ReverseProxy{
|
||||
Director: func(r *http.Request) {
|
||||
targetURL := r.Header.Get("vm-target-url")
|
||||
target, err := url.Parse(targetURL)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when parsing targetURL=%q: %s", targetURL, err)
|
||||
}
|
||||
r.URL = target
|
||||
},
|
||||
Transport: func() *http.Transport {
|
||||
tr := http.DefaultTransport.(*http.Transport).Clone()
|
||||
// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
|
||||
tr.DisableCompression = true
|
||||
// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
|
||||
tr.ForceAttemptHTTP2 = false
|
||||
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
|
||||
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
|
||||
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
|
||||
}
|
||||
return tr
|
||||
}(),
|
||||
FlushInterval: time.Second,
|
||||
ErrorLog: logger.StdErrorLogger(),
|
||||
}
|
||||
var (
|
||||
concurrencyLimitCh chan struct{}
|
||||
concurrencyLimitOnce sync.Once
|
||||
)
|
||||
|
||||
func concurrencyLimitInit() {
|
||||
concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests)
|
||||
_ = metrics.NewGauge("vmauth_concurrent_requests_capacity", func() float64 {
|
||||
return float64(*maxConcurrentRequests)
|
||||
})
|
||||
_ = metrics.NewGauge("vmauth_concurrent_requests_current", func() float64 {
|
||||
return float64(len(concurrencyLimitCh))
|
||||
})
|
||||
}
|
||||
|
||||
var concurrentRequestsLimitReached = metrics.NewCounter("vmauth_concurrent_requests_limit_reached_total")
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
@@ -173,3 +313,12 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
w.Header().Add("Retry-After", "10")
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -7,11 +7,6 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func (up *URLPrefix) mergeURLs(requestURI *url.URL) *url.URL {
|
||||
pu := up.getNextURL()
|
||||
return mergeURLs(pu, requestURI)
|
||||
}
|
||||
|
||||
func mergeURLs(uiURL, requestURI *url.URL) *url.URL {
|
||||
targetURL := *uiURL
|
||||
targetURL.Path += requestURI.Path
|
||||
@@ -35,12 +30,27 @@ func mergeURLs(uiURL, requestURI *url.URL) *url.URL {
|
||||
return &targetURL
|
||||
}
|
||||
|
||||
func createTargetURL(ui *UserInfo, uOrig *url.URL) (*url.URL, []Header, error) {
|
||||
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, []Header, error) {
|
||||
for _, e := range ui.URLMaps {
|
||||
for _, sp := range e.SrcPaths {
|
||||
if sp.match(u.Path) {
|
||||
return e.URLPrefix, e.Headers, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if ui.URLPrefix != nil {
|
||||
return ui.URLPrefix, ui.Headers, nil
|
||||
}
|
||||
missingRouteRequests.Inc()
|
||||
return nil, nil, fmt.Errorf("missing route for %q", u.String())
|
||||
}
|
||||
|
||||
func normalizeURL(uOrig *url.URL) *url.URL {
|
||||
u := *uOrig
|
||||
// Prevent from attacks with using `..` in r.URL.Path
|
||||
u.Path = path.Clean(u.Path)
|
||||
if !strings.HasSuffix(u.Path, "/") && strings.HasSuffix(uOrig.Path, "/") {
|
||||
// The path.Clean() removes traling slash.
|
||||
// The path.Clean() removes trailing slash.
|
||||
// Return it back if needed.
|
||||
// This should fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1752
|
||||
u.Path += "/"
|
||||
@@ -52,16 +62,5 @@ func createTargetURL(ui *UserInfo, uOrig *url.URL) (*url.URL, []Header, error) {
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1554
|
||||
u.Path = ""
|
||||
}
|
||||
for _, e := range ui.URLMap {
|
||||
for _, sp := range e.SrcPaths {
|
||||
if sp.match(u.Path) {
|
||||
return e.URLPrefix.mergeURLs(&u), e.Headers, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if ui.URLPrefix != nil {
|
||||
return ui.URLPrefix.mergeURLs(&u), ui.Headers, nil
|
||||
}
|
||||
missingRouteRequests.Inc()
|
||||
return nil, nil, fmt.Errorf("missing route for %q", u.String())
|
||||
return &u
|
||||
}
|
||||
|
||||
@@ -13,10 +13,14 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
target, headers, err := createTargetURL(ui, u)
|
||||
u = normalizeURL(u)
|
||||
up, headers, err := ui.getURLPrefixAndHeaders(u)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
bu := up.getLeastLoadedBackendURL()
|
||||
target := mergeURLs(bu.url, u)
|
||||
bu.put()
|
||||
if target.String() != expectedTarget {
|
||||
t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
|
||||
}
|
||||
@@ -54,7 +58,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
|
||||
// Complex routing with `url_map`
|
||||
ui := &UserInfo{
|
||||
URLMap: []URLMap{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
|
||||
@@ -86,7 +90,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
|
||||
// Complex routing regexp paths in `url_map`
|
||||
ui = &UserInfo{
|
||||
URLMap: []URLMap{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query(_range)?", "/api/v1/label/[^/]+/values"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
|
||||
@@ -119,20 +123,21 @@ func TestCreateTargetURLFailure(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
target, headers, err := createTargetURL(ui, u)
|
||||
u = normalizeURL(u)
|
||||
up, headers, err := ui.getURLPrefixAndHeaders(u)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if target != nil {
|
||||
t.Fatalf("unexpected target=%q; want empty string", target)
|
||||
if up != nil {
|
||||
t.Fatalf("unexpected non-empty up=%#v", up)
|
||||
}
|
||||
if headers != nil {
|
||||
t.Fatalf("unexpected headers=%q; want empty string", headers)
|
||||
t.Fatalf("unexpected non-empty headers=%q", headers)
|
||||
}
|
||||
}
|
||||
f(&UserInfo{}, "/foo/bar")
|
||||
f(&UserInfo{
|
||||
URLMap: []URLMap{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
|
||||
URLPrefix: mustParseURL("http://foobar/baz"),
|
||||
|
||||
@@ -225,6 +225,8 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
|
||||
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -41,25 +42,36 @@ func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
flagutil.RegisterSecretFlag("snapshot.createURL")
|
||||
flagutil.RegisterSecretFlag("snapshot.deleteURL")
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if len(*snapshotCreateURL) > 0 {
|
||||
// create net/url object
|
||||
createUrl, err := url.Parse(*snapshotCreateURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse snapshotCreateURL: %s", err)
|
||||
}
|
||||
if len(*snapshotName) > 0 {
|
||||
logger.Fatalf("-snapshotName shouldn't be set if -snapshot.createURL is set, since snapshots are created automatically in this case")
|
||||
}
|
||||
logger.Infof("Snapshot create url %s", *snapshotCreateURL)
|
||||
logger.Infof("Snapshot create url %s", createUrl.Redacted())
|
||||
if len(*snapshotDeleteURL) <= 0 {
|
||||
err := flag.Set("snapshot.deleteURL", strings.Replace(*snapshotCreateURL, "/create", "/delete", 1))
|
||||
if err != nil {
|
||||
logger.Fatalf("Failed to set snapshot.deleteURL flag: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("Snapshot delete url %s", *snapshotDeleteURL)
|
||||
deleteUrl, err := url.Parse(*snapshotDeleteURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse snapshotDeleteURL: %s", err)
|
||||
}
|
||||
logger.Infof("Snapshot delete url %s", deleteUrl.Redacted())
|
||||
|
||||
name, err := snapshot.Create(*snapshotCreateURL)
|
||||
name, err := snapshot.Create(createUrl.String())
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create snapshot: %s", err)
|
||||
}
|
||||
@@ -69,7 +81,7 @@ func main() {
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := snapshot.Delete(*snapshotDeleteURL, name)
|
||||
err := snapshot.Delete(deleteUrl.String(), name)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot delete snapshot: %s", err)
|
||||
}
|
||||
@@ -81,7 +93,7 @@ func main() {
|
||||
logger.Fatalf("invalid -snapshotName=%q: %s", *snapshotName, err)
|
||||
}
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, nil)
|
||||
go httpserver.Serve(*httpListenAddr, false, nil)
|
||||
|
||||
srcFS, err := newSrcFS()
|
||||
if err != nil {
|
||||
@@ -118,7 +130,7 @@ func main() {
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3
|
||||
vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3, azblob
|
||||
or local filesystem. Backed up data can be restored with vmrestore.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/vmbackup.html .
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -67,7 +67,7 @@ credentials.json
|
||||
"project_id": "<project>",
|
||||
"private_key_id": "",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\-----END PRIVATE KEY-----\n",
|
||||
"client_email": “test@<project>.iam.gserviceaccount.com",
|
||||
"client_email": "test@<project>.iam.gserviceaccount.com",
|
||||
"client_id": "",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
@@ -158,7 +158,7 @@ The result on the GCS bucket. We see only 3 daily backups:
|
||||
* GET `/api/v1/backups` - returns list of backups in remote storage.
|
||||
Example output:
|
||||
```json
|
||||
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
|
||||
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
|
||||
```
|
||||
|
||||
* POST `/api/v1/restore` - saves backup name to restore when [performing restore](#restore-commands).
|
||||
@@ -211,7 +211,7 @@ It can be changed by using flag:
|
||||
`vmbackupmanager backup list` lists backups in remote storage:
|
||||
```console
|
||||
$ ./vmbackupmanager backup list
|
||||
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
|
||||
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
|
||||
```
|
||||
|
||||
### Restore commands
|
||||
@@ -270,7 +270,15 @@ If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vm
|
||||
|
||||
### How to restore in Kubernetes
|
||||
|
||||
1. Enter container running `vmbackupmanager`
|
||||
1. Ensure there is an init container with `vmbackupmanager restore` in `vmstorage` or `vmsingle` pod.
|
||||
For [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) deployments it is required to add:
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart: "true"
|
||||
```
|
||||
See operator `VMStorage` schema [here](https://docs.victoriametrics.com/operator/api.html#vmstorage) and `VMSingle` [here](https://docs.victoriametrics.com/operator/api.html#vmsinglespec).
|
||||
2. Enter container running `vmbackupmanager`
|
||||
2. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
@@ -287,6 +295,43 @@ If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vm
|
||||
```
|
||||
4. Restart pod
|
||||
|
||||
#### Restore cluster into another cluster
|
||||
|
||||
These steps are assuming that [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) is used to manage `VMCluster`.
|
||||
Clusters here are referred to as `source` and `destination`.
|
||||
|
||||
1. Create a new cluster with access to *source* cluster `vmbackupmanager` storage and same number of storage nodes.
|
||||
Add the following section in order to enable restore on start (operator `VMStorage` schema can be found [here](https://docs.victoriametrics.com/operator/api.html#vmstorage):
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart: "true"
|
||||
```
|
||||
Note: it is safe to leave this section in the cluster configuration, since it will be ignored if restore mark doesn't exist.
|
||||
> Important! Use different `-dst` for *destination* cluster to avoid overwriting backup data of the *source* cluster.
|
||||
2. Enter container running `vmbackupmanager` in *source* cluster
|
||||
2. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
|
||||
```
|
||||
3. Use `vmbackupmanager restore create` to create restore mark at each pod of the *destination* cluster.
|
||||
Each pod in *destination* cluster should be restored from backup of respective pod in *source* cluster.
|
||||
For example: `vmstorage-source-0` in *source* cluster should be restored from `vmstorage-destination-0` in *destination* cluster.
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create s3://source_cluster/vmstorage-source-0/daily/2022-10-06
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vmbackupmanager` exports various metrics in Prometheus exposition format at `http://vmbackupmanager:8300/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://docs.victoriametrics.com/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/17798-victoriametrics-backupmanager/) for `vmbackupmanager` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Flags
|
||||
@@ -372,6 +417,8 @@ command-line flags:
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
|
||||
60
app/vmctl/backoff/backoff.go
Normal file
60
app/vmctl/backoff/backoff.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package backoff
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
backoffRetries = 5
|
||||
backoffFactor = 1.7
|
||||
backoffMinDuration = time.Second
|
||||
)
|
||||
|
||||
// retryableFunc describes call back which will repeat on errors
|
||||
type retryableFunc func() error
|
||||
|
||||
var ErrBadRequest = errors.New("bad request")
|
||||
|
||||
// Backoff describes object with backoff policy params
|
||||
type Backoff struct {
|
||||
retries int
|
||||
factor float64
|
||||
minDuration time.Duration
|
||||
}
|
||||
|
||||
// New initialize backoff object
|
||||
func New() *Backoff {
|
||||
return &Backoff{
|
||||
retries: backoffRetries,
|
||||
factor: backoffFactor,
|
||||
minDuration: backoffMinDuration,
|
||||
}
|
||||
}
|
||||
|
||||
// Retry process retries until all attempts are completed
|
||||
func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
var attempt uint64
|
||||
for i := 0; i < b.retries; i++ {
|
||||
// @TODO we should use context to cancel retries
|
||||
err := cb()
|
||||
if err == nil {
|
||||
return attempt, nil
|
||||
}
|
||||
if errors.Is(err, ErrBadRequest) {
|
||||
logger.Errorf("unrecoverable error: %s", err)
|
||||
return attempt, err // fail fast if not recoverable
|
||||
}
|
||||
attempt++
|
||||
backoff := float64(b.minDuration) * math.Pow(b.factor, float64(i))
|
||||
dur := time.Duration(backoff)
|
||||
logger.Errorf("got error: %s on attempt: %d; will retry in %v", err, attempt, dur)
|
||||
time.Sleep(time.Duration(backoff))
|
||||
}
|
||||
return attempt, fmt.Errorf("execution failed after %d retry attempts", b.retries)
|
||||
}
|
||||
96
app/vmctl/backoff/backoff_test.go
Normal file
96
app/vmctl/backoff/backoff_test.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package backoff
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRetry_Do(t *testing.T) {
|
||||
counter := 0
|
||||
tests := []struct {
|
||||
name string
|
||||
backoffRetries int
|
||||
backoffFactor float64
|
||||
backoffMinDuration time.Duration
|
||||
retryableFunc retryableFunc
|
||||
ctx context.Context
|
||||
withCancel bool
|
||||
want uint64
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "return bad request",
|
||||
retryableFunc: func() error {
|
||||
return ErrBadRequest
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "empty retries values",
|
||||
retryableFunc: func() error {
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
return nil
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 0,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "only one retry test",
|
||||
backoffRetries: 5,
|
||||
backoffFactor: 1.7,
|
||||
backoffMinDuration: time.Millisecond * 10,
|
||||
retryableFunc: func() error {
|
||||
t := time.NewTicker(time.Millisecond * 5)
|
||||
defer t.Stop()
|
||||
for range t.C {
|
||||
counter++
|
||||
if counter%2 == 0 {
|
||||
return fmt.Errorf("got some error")
|
||||
}
|
||||
if counter%3 == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 1,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "all retries failed test",
|
||||
backoffRetries: 5,
|
||||
backoffFactor: 0.1,
|
||||
backoffMinDuration: time.Millisecond * 10,
|
||||
retryableFunc: func() error {
|
||||
t := time.NewTicker(time.Millisecond * 5)
|
||||
defer t.Stop()
|
||||
for range t.C {
|
||||
return fmt.Errorf("got some error")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
ctx: context.Background(),
|
||||
want: 5,
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r := New()
|
||||
got, err := r.Retry(tt.ctx, tt.retryableFunc)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("Retry() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if got != tt.want {
|
||||
t.Errorf("Retry() got = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -353,7 +353,7 @@ var (
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeStepInterval,
|
||||
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are '%s','%s','%s'.", vmNativeFilterTimeStart, stepper.StepMonth, stepper.StepDay, stepper.StepHour),
|
||||
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are '%s','%s','%s','%s'.", vmNativeFilterTimeStart, stepper.StepMonth, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcAddr,
|
||||
@@ -410,19 +410,20 @@ var (
|
||||
)
|
||||
|
||||
const (
|
||||
remoteRead = "remote-read"
|
||||
remoteReadUseStream = "remote-read-use-stream"
|
||||
remoteReadConcurrency = "remote-read-concurrency"
|
||||
remoteReadFilterTimeStart = "remote-read-filter-time-start"
|
||||
remoteReadFilterTimeEnd = "remote-read-filter-time-end"
|
||||
remoteReadFilterLabel = "remote-read-filter-label"
|
||||
remoteReadFilterLabelValue = "remote-read-filter-label-value"
|
||||
remoteReadStepInterval = "remote-read-step-interval"
|
||||
remoteReadSrcAddr = "remote-read-src-addr"
|
||||
remoteReadUser = "remote-read-user"
|
||||
remoteReadPassword = "remote-read-password"
|
||||
remoteReadHTTPTimeout = "remote-read-http-timeout"
|
||||
remoteReadHeaders = "remote-read-headers"
|
||||
remoteRead = "remote-read"
|
||||
remoteReadUseStream = "remote-read-use-stream"
|
||||
remoteReadConcurrency = "remote-read-concurrency"
|
||||
remoteReadFilterTimeStart = "remote-read-filter-time-start"
|
||||
remoteReadFilterTimeEnd = "remote-read-filter-time-end"
|
||||
remoteReadFilterLabel = "remote-read-filter-label"
|
||||
remoteReadFilterLabelValue = "remote-read-filter-label-value"
|
||||
remoteReadStepInterval = "remote-read-step-interval"
|
||||
remoteReadSrcAddr = "remote-read-src-addr"
|
||||
remoteReadUser = "remote-read-user"
|
||||
remoteReadPassword = "remote-read-password"
|
||||
remoteReadHTTPTimeout = "remote-read-http-timeout"
|
||||
remoteReadHeaders = "remote-read-headers"
|
||||
remoteReadInsecureSkipVerify = "remote-read-insecure-skip-verify"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -493,6 +494,11 @@ var (
|
||||
"For example, --remote-read-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding remote source storage. \n" +
|
||||
"Multiple headers must be delimited by '^^': --remote-read-headers='header1:value1^^header2:value2'",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: remoteReadInsecureSkipVerify,
|
||||
Usage: "Whether to skip TLS certificate verification when connecting to the remote read address",
|
||||
Value: false,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native/stream"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -65,7 +65,7 @@ func main() {
|
||||
// disable progress bars since openTSDB implementation
|
||||
// does not use progress bar pool
|
||||
vmCfg.DisableProgressBar = true
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
importer, err := vm.NewImporter(ctx, vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
@@ -100,7 +100,7 @@ func main() {
|
||||
}
|
||||
|
||||
vmCfg := initConfigVM(c)
|
||||
importer, err = vm.NewImporter(vmCfg)
|
||||
importer, err = vm.NewImporter(ctx, vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
@@ -121,14 +121,15 @@ func main() {
|
||||
Flags: mergeFlags(globalFlags, remoteReadFlags, vmFlags),
|
||||
Action: func(c *cli.Context) error {
|
||||
rr, err := remoteread.NewClient(remoteread.Config{
|
||||
Addr: c.String(remoteReadSrcAddr),
|
||||
Username: c.String(remoteReadUser),
|
||||
Password: c.String(remoteReadPassword),
|
||||
Timeout: c.Duration(remoteReadHTTPTimeout),
|
||||
UseStream: c.Bool(remoteReadUseStream),
|
||||
Headers: c.String(remoteReadHeaders),
|
||||
LabelName: c.String(remoteReadFilterLabel),
|
||||
LabelValue: c.String(remoteReadFilterLabelValue),
|
||||
Addr: c.String(remoteReadSrcAddr),
|
||||
Username: c.String(remoteReadUser),
|
||||
Password: c.String(remoteReadPassword),
|
||||
Timeout: c.Duration(remoteReadHTTPTimeout),
|
||||
UseStream: c.Bool(remoteReadUseStream),
|
||||
Headers: c.String(remoteReadHeaders),
|
||||
LabelName: c.String(remoteReadFilterLabel),
|
||||
LabelValue: c.String(remoteReadFilterLabelValue),
|
||||
InsecureSkipVerify: c.Bool(remoteReadInsecureSkipVerify),
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("error create remote read client: %s", err)
|
||||
@@ -136,7 +137,7 @@ func main() {
|
||||
|
||||
vmCfg := initConfigVM(c)
|
||||
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
importer, err := vm.NewImporter(ctx, vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
@@ -162,7 +163,7 @@ func main() {
|
||||
fmt.Println("Prometheus import mode")
|
||||
|
||||
vmCfg := initConfigVM(c)
|
||||
importer, err = vm.NewImporter(vmCfg)
|
||||
importer, err = vm.NewImporter(ctx, vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
@@ -246,7 +247,7 @@ func main() {
|
||||
return cli.Exit(fmt.Errorf("cannot open exported block at path=%q err=%w", blockPath, err), 1)
|
||||
}
|
||||
var blocksCount uint64
|
||||
if err := parser.ParseStream(f, isBlockGzipped, func(block *parser.Block) error {
|
||||
if err := stream.Parse(f, isBlockGzipped, func(block *stream.Block) error {
|
||||
atomic.AddUint64(&blocksCount, 1)
|
||||
return nil
|
||||
}); err != nil {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -102,6 +102,7 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read block: %s", err)
|
||||
}
|
||||
var it chunkenc.Iterator
|
||||
for ss.Next() {
|
||||
var name string
|
||||
var labels []vm.LabelPair
|
||||
@@ -123,7 +124,7 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
|
||||
|
||||
var timestamps []int64
|
||||
var values []float64
|
||||
it := series.Iterator()
|
||||
it = series.Iterator(it)
|
||||
for {
|
||||
typ := it.Next()
|
||||
if typ == chunkenc.ValNone {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -58,7 +59,7 @@ func Test_prometheusProcessor_run(t *testing.T) {
|
||||
return client
|
||||
},
|
||||
im: func(vmCfg vm.Config) *vm.Importer {
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
importer, err := vm.NewImporter(context.Background(), vmCfg)
|
||||
if err != nil {
|
||||
t.Fatalf("error init importer: %s", err)
|
||||
}
|
||||
@@ -95,7 +96,7 @@ func Test_prometheusProcessor_run(t *testing.T) {
|
||||
return client
|
||||
},
|
||||
im: func(vmCfg vm.Config) *vm.Importer {
|
||||
importer, err := vm.NewImporter(vmCfg)
|
||||
importer, err := vm.NewImporter(context.Background(), vmCfg)
|
||||
if err != nil {
|
||||
t.Fatalf("error init importer: %s", err)
|
||||
}
|
||||
|
||||
319
app/vmctl/remote_read_test.go
Normal file
319
app/vmctl/remote_read_test.go
Normal file
@@ -0,0 +1,319 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/stepper"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/testdata/servers_integration_test"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
)
|
||||
|
||||
func TestRemoteRead(t *testing.T) {
|
||||
|
||||
var testCases = []struct {
|
||||
name string
|
||||
remoteReadConfig remoteread.Config
|
||||
vmCfg vm.Config
|
||||
start string
|
||||
end string
|
||||
numOfSamples int64
|
||||
numOfSeries int64
|
||||
rrp remoteReadProcessor
|
||||
chunk string
|
||||
remoteReadSeries func(start, end, numOfSeries, numOfSamples int64) []*prompb.TimeSeries
|
||||
expectedSeries []vm.TimeSeries
|
||||
}{
|
||||
{
|
||||
name: "step minute on minute time range",
|
||||
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*"},
|
||||
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
|
||||
start: "2022-11-26T11:23:05+02:00",
|
||||
end: "2022-11-26T11:24:05+02:00",
|
||||
numOfSamples: 2,
|
||||
numOfSeries: 3,
|
||||
chunk: stepper.StepMinute,
|
||||
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
|
||||
expectedSeries: []vm.TimeSeries{
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
|
||||
Timestamps: []int64{1669454585000, 1669454615000},
|
||||
Values: []float64{0, 0},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
|
||||
Timestamps: []int64{1669454585000, 1669454615000},
|
||||
Values: []float64{100, 100},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
|
||||
Timestamps: []int64{1669454585000, 1669454615000},
|
||||
Values: []float64{200, 200},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "step month on month time range",
|
||||
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*"},
|
||||
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
|
||||
start: "2022-09-26T11:23:05+02:00",
|
||||
end: "2022-11-26T11:24:05+02:00",
|
||||
numOfSamples: 2,
|
||||
numOfSeries: 3,
|
||||
chunk: stepper.StepMonth,
|
||||
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
|
||||
expectedSeries: []vm.TimeSeries{
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
|
||||
Timestamps: []int64{1664184185000},
|
||||
Values: []float64{0},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
|
||||
Timestamps: []int64{1664184185000},
|
||||
Values: []float64{100},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
|
||||
Timestamps: []int64{1664184185000},
|
||||
Values: []float64{200},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
|
||||
Timestamps: []int64{1666819415000},
|
||||
Values: []float64{0},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
|
||||
Timestamps: []int64{1666819415000},
|
||||
Values: []float64{100},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
|
||||
Timestamps: []int64{1666819415000},
|
||||
Values: []float64{200}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range testCases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
remoteReadServer := remote_read_integration.NewRemoteReadServer(t)
|
||||
defer remoteReadServer.Close()
|
||||
remoteWriteServer := remote_read_integration.NewRemoteWriteServer(t)
|
||||
defer remoteWriteServer.Close()
|
||||
|
||||
tt.remoteReadConfig.Addr = remoteReadServer.URL()
|
||||
|
||||
rr, err := remoteread.NewClient(tt.remoteReadConfig)
|
||||
if err != nil {
|
||||
t.Fatalf("error create remote read client: %s", err)
|
||||
}
|
||||
|
||||
start, err := time.Parse(time.RFC3339, tt.start)
|
||||
if err != nil {
|
||||
t.Fatalf("Error parse start time: %s", err)
|
||||
}
|
||||
|
||||
end, err := time.Parse(time.RFC3339, tt.end)
|
||||
if err != nil {
|
||||
t.Fatalf("Error parse end time: %s", err)
|
||||
}
|
||||
|
||||
rrs := tt.remoteReadSeries(start.Unix(), end.Unix(), tt.numOfSeries, tt.numOfSamples)
|
||||
|
||||
remoteReadServer.SetRemoteReadSeries(rrs)
|
||||
remoteWriteServer.ExpectedSeries(tt.expectedSeries)
|
||||
|
||||
tt.vmCfg.Addr = remoteWriteServer.URL()
|
||||
|
||||
importer, err := vm.NewImporter(ctx, tt.vmCfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create VM importer: %s", err)
|
||||
}
|
||||
defer importer.Close()
|
||||
|
||||
rmp := remoteReadProcessor{
|
||||
src: rr,
|
||||
dst: importer,
|
||||
filter: remoteReadFilter{
|
||||
timeStart: &start,
|
||||
timeEnd: &end,
|
||||
chunk: tt.chunk,
|
||||
},
|
||||
cc: 1,
|
||||
}
|
||||
|
||||
err = rmp.run(ctx, true, false)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to run remote read processor: %s", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSteamRemoteRead(t *testing.T) {
|
||||
|
||||
var testCases = []struct {
|
||||
name string
|
||||
remoteReadConfig remoteread.Config
|
||||
vmCfg vm.Config
|
||||
start string
|
||||
end string
|
||||
numOfSamples int64
|
||||
numOfSeries int64
|
||||
rrp remoteReadProcessor
|
||||
chunk string
|
||||
remoteReadSeries func(start, end, numOfSeries, numOfSamples int64) []*prompb.TimeSeries
|
||||
expectedSeries []vm.TimeSeries
|
||||
}{
|
||||
{
|
||||
name: "step minute on minute time range",
|
||||
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*", UseStream: true},
|
||||
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
|
||||
start: "2022-11-26T11:23:05+02:00",
|
||||
end: "2022-11-26T11:24:05+02:00",
|
||||
numOfSamples: 2,
|
||||
numOfSeries: 3,
|
||||
chunk: stepper.StepMinute,
|
||||
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
|
||||
expectedSeries: []vm.TimeSeries{
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
|
||||
Timestamps: []int64{1669454585000, 1669454615000},
|
||||
Values: []float64{0, 0},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
|
||||
Timestamps: []int64{1669454585000, 1669454615000},
|
||||
Values: []float64{100, 100},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
|
||||
Timestamps: []int64{1669454585000, 1669454615000},
|
||||
Values: []float64{200, 200},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "step month on month time range",
|
||||
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*", UseStream: true},
|
||||
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
|
||||
start: "2022-09-26T11:23:05+02:00",
|
||||
end: "2022-11-26T11:24:05+02:00",
|
||||
numOfSamples: 2,
|
||||
numOfSeries: 3,
|
||||
chunk: stepper.StepMonth,
|
||||
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
|
||||
expectedSeries: []vm.TimeSeries{
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
|
||||
Timestamps: []int64{1664184185000},
|
||||
Values: []float64{0},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
|
||||
Timestamps: []int64{1664184185000},
|
||||
Values: []float64{100},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
|
||||
Timestamps: []int64{1664184185000},
|
||||
Values: []float64{200},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
|
||||
Timestamps: []int64{1666819415000},
|
||||
Values: []float64{0},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
|
||||
Timestamps: []int64{1666819415000},
|
||||
Values: []float64{100},
|
||||
},
|
||||
{
|
||||
Name: "vm_metric_1",
|
||||
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
|
||||
Timestamps: []int64{1666819415000},
|
||||
Values: []float64{200}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range testCases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
remoteReadServer := remote_read_integration.NewRemoteReadStreamServer(t)
|
||||
defer remoteReadServer.Close()
|
||||
remoteWriteServer := remote_read_integration.NewRemoteWriteServer(t)
|
||||
defer remoteWriteServer.Close()
|
||||
|
||||
tt.remoteReadConfig.Addr = remoteReadServer.URL()
|
||||
|
||||
rr, err := remoteread.NewClient(tt.remoteReadConfig)
|
||||
if err != nil {
|
||||
t.Fatalf("error create remote read client: %s", err)
|
||||
}
|
||||
|
||||
start, err := time.Parse(time.RFC3339, tt.start)
|
||||
if err != nil {
|
||||
t.Fatalf("Error parse start time: %s", err)
|
||||
}
|
||||
|
||||
end, err := time.Parse(time.RFC3339, tt.end)
|
||||
if err != nil {
|
||||
t.Fatalf("Error parse end time: %s", err)
|
||||
}
|
||||
|
||||
rrs := tt.remoteReadSeries(start.Unix(), end.Unix(), tt.numOfSeries, tt.numOfSamples)
|
||||
|
||||
remoteReadServer.InitMockStorage(rrs)
|
||||
remoteWriteServer.ExpectedSeries(tt.expectedSeries)
|
||||
|
||||
tt.vmCfg.Addr = remoteWriteServer.URL()
|
||||
|
||||
importer, err := vm.NewImporter(ctx, tt.vmCfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create VM importer: %s", err)
|
||||
}
|
||||
defer importer.Close()
|
||||
|
||||
rmp := remoteReadProcessor{
|
||||
src: rr,
|
||||
dst: importer,
|
||||
filter: remoteReadFilter{
|
||||
timeStart: &start,
|
||||
timeEnd: &end,
|
||||
chunk: tt.chunk,
|
||||
},
|
||||
cc: 1,
|
||||
}
|
||||
|
||||
err = rmp.run(ctx, true, false)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to run remote read processor: %s", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
@@ -60,6 +61,8 @@ type Config struct {
|
||||
// LabelName, LabelValue stands for label=~value pair used for read requests.
|
||||
// Is optional.
|
||||
LabelName, LabelValue string
|
||||
// TLSSkipVerify defines whether to skip TLS certificate verification when connecting to the remote read address.
|
||||
InsecureSkipVerify bool
|
||||
}
|
||||
|
||||
// Filter defines a list of filters applied to requested data
|
||||
@@ -100,7 +103,7 @@ func NewClient(cfg Config) (*Client, error) {
|
||||
c := &Client{
|
||||
c: &http.Client{
|
||||
Timeout: cfg.Timeout,
|
||||
Transport: http.DefaultTransport.(*http.Transport).Clone(),
|
||||
Transport: utils.Transport(cfg.Addr, cfg.InsecureSkipVerify),
|
||||
},
|
||||
addr: strings.TrimSuffix(cfg.Addr, "/"),
|
||||
user: cfg.Username,
|
||||
@@ -154,7 +157,7 @@ func (c *Client) do(req *http.Request) (*http.Response, error) {
|
||||
// Ping checks the health of the read source
|
||||
func (c *Client) Ping() error {
|
||||
url := c.addr + healthPath
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create request to %q: %s", url, err)
|
||||
}
|
||||
@@ -171,7 +174,7 @@ func (c *Client) Ping() error {
|
||||
func (c *Client) fetch(ctx context.Context, data []byte, streamCb StreamCallback) error {
|
||||
r := bytes.NewReader(data)
|
||||
url := c.addr + remoteReadPath
|
||||
req, err := http.NewRequest("POST", url, r)
|
||||
req, err := http.NewRequest(http.MethodPost, url, r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||
}
|
||||
|
||||
368
app/vmctl/testdata/servers_integration_test/remote_read_server.go
vendored
Normal file
368
app/vmctl/testdata/servers_integration_test/remote_read_server.go
vendored
Normal file
@@ -0,0 +1,368 @@
|
||||
package remote_read_integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/prometheus/prometheus/storage/remote"
|
||||
"github.com/prometheus/prometheus/tsdb/chunks"
|
||||
)
|
||||
|
||||
const (
|
||||
maxBytesInFrame = 1024 * 1024
|
||||
)
|
||||
|
||||
type RemoteReadServer struct {
|
||||
server *httptest.Server
|
||||
series []*prompb.TimeSeries
|
||||
storage *MockStorage
|
||||
}
|
||||
|
||||
// NewRemoteReadServer creates a remote read server. It exposes a single endpoint and responds with the
|
||||
// passed series based on the request to the read endpoint. It returns a server which should be closed after
|
||||
// being used.
|
||||
func NewRemoteReadServer(t *testing.T) *RemoteReadServer {
|
||||
rrs := &RemoteReadServer{
|
||||
series: make([]*prompb.TimeSeries, 0),
|
||||
}
|
||||
rrs.server = httptest.NewServer(rrs.getReadHandler(t))
|
||||
return rrs
|
||||
}
|
||||
|
||||
// Close closes the server.
|
||||
func (rrs *RemoteReadServer) Close() {
|
||||
rrs.server.Close()
|
||||
}
|
||||
|
||||
func (rrs *RemoteReadServer) URL() string {
|
||||
return rrs.server.URL
|
||||
}
|
||||
|
||||
func (rrs *RemoteReadServer) SetRemoteReadSeries(series []*prompb.TimeSeries) {
|
||||
rrs.series = append(rrs.series, series...)
|
||||
}
|
||||
|
||||
func (rrs *RemoteReadServer) getReadHandler(t *testing.T) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if !validateReadHeaders(t, r) {
|
||||
t.Fatalf("invalid read headers")
|
||||
}
|
||||
|
||||
compressed, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("error read body: %s", err)
|
||||
}
|
||||
|
||||
reqBuf, err := snappy.Decode(nil, compressed)
|
||||
if err != nil {
|
||||
t.Fatalf("error decode compressed data:%s", err)
|
||||
}
|
||||
|
||||
var req prompb.ReadRequest
|
||||
if err := proto.Unmarshal(reqBuf, &req); err != nil {
|
||||
t.Fatalf("error unmarshal read request: %s", err)
|
||||
}
|
||||
|
||||
resp := &prompb.ReadResponse{
|
||||
Results: make([]*prompb.QueryResult, len(req.Queries)),
|
||||
}
|
||||
|
||||
for i, r := range req.Queries {
|
||||
startTs := r.StartTimestampMs
|
||||
endTs := r.EndTimestampMs
|
||||
ts := make([]*prompb.TimeSeries, len(rrs.series))
|
||||
for i, s := range rrs.series {
|
||||
var samples []prompb.Sample
|
||||
for _, sample := range s.Samples {
|
||||
if sample.Timestamp >= startTs && sample.Timestamp < endTs {
|
||||
samples = append(samples, sample)
|
||||
}
|
||||
}
|
||||
var series prompb.TimeSeries
|
||||
if len(samples) > 0 {
|
||||
series.Labels = s.Labels
|
||||
series.Samples = samples
|
||||
}
|
||||
ts[i] = &series
|
||||
}
|
||||
|
||||
resp.Results[i] = &prompb.QueryResult{Timeseries: ts}
|
||||
data, err := proto.Marshal(resp)
|
||||
if err != nil {
|
||||
t.Fatalf("error marshal response: %s", err)
|
||||
}
|
||||
|
||||
compressed = snappy.Encode(nil, data)
|
||||
|
||||
w.Header().Set("Content-Type", "application/x-protobuf")
|
||||
w.Header().Set("Content-Encoding", "snappy")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
if _, err := w.Write(compressed); err != nil {
|
||||
t.Fatalf("snappy encode error: %s", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func NewRemoteReadStreamServer(t *testing.T) *RemoteReadServer {
|
||||
rrs := &RemoteReadServer{
|
||||
series: make([]*prompb.TimeSeries, 0),
|
||||
}
|
||||
rrs.server = httptest.NewServer(rrs.getStreamReadHandler(t))
|
||||
return rrs
|
||||
}
|
||||
|
||||
func (rrs *RemoteReadServer) InitMockStorage(series []*prompb.TimeSeries) {
|
||||
rrs.storage = NewMockStorage(series)
|
||||
}
|
||||
|
||||
func (rrs *RemoteReadServer) getStreamReadHandler(t *testing.T) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if !validateStreamReadHeaders(t, r) {
|
||||
t.Fatalf("invalid read headers")
|
||||
}
|
||||
|
||||
f, ok := w.(http.Flusher)
|
||||
if !ok {
|
||||
t.Fatalf("internal http.ResponseWriter does not implement http.Flusher interface")
|
||||
}
|
||||
|
||||
stream := remote.NewChunkedWriter(w, f)
|
||||
|
||||
data, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("error read body: %s", err)
|
||||
}
|
||||
|
||||
decodedData, err := snappy.Decode(nil, data)
|
||||
if err != nil {
|
||||
t.Fatalf("error decode compressed data:%s", err)
|
||||
}
|
||||
|
||||
var req prompb.ReadRequest
|
||||
if err := proto.Unmarshal(decodedData, &req); err != nil {
|
||||
t.Fatalf("error unmarshal read request: %s", err)
|
||||
}
|
||||
|
||||
var chks []prompb.Chunk
|
||||
ctx := context.Background()
|
||||
for idx, r := range req.Queries {
|
||||
startTs := r.StartTimestampMs
|
||||
endTs := r.EndTimestampMs
|
||||
|
||||
var matchers []*labels.Matcher
|
||||
cb := func() (int64, error) { return 0, nil }
|
||||
|
||||
c := remote.NewSampleAndChunkQueryableClient(rrs.storage, nil, matchers, true, cb)
|
||||
|
||||
q, err := c.ChunkQuerier(ctx, startTs, endTs)
|
||||
if err != nil {
|
||||
t.Fatalf("error init chunk querier: %s", err)
|
||||
}
|
||||
|
||||
ss := q.Select(false, nil, matchers...)
|
||||
var iter chunks.Iterator
|
||||
for ss.Next() {
|
||||
series := ss.At()
|
||||
iter = series.Iterator(iter)
|
||||
labels := remote.MergeLabels(labelsToLabelsProto(series.Labels()), nil)
|
||||
|
||||
frameBytesLeft := maxBytesInFrame
|
||||
for _, lb := range labels {
|
||||
frameBytesLeft -= lb.Size()
|
||||
}
|
||||
|
||||
isNext := iter.Next()
|
||||
|
||||
for isNext {
|
||||
chunk := iter.At()
|
||||
|
||||
if chunk.Chunk == nil {
|
||||
t.Fatalf("error found not populated chunk returned by SeriesSet at ref: %v", chunk.Ref)
|
||||
}
|
||||
|
||||
chks = append(chks, prompb.Chunk{
|
||||
MinTimeMs: chunk.MinTime,
|
||||
MaxTimeMs: chunk.MaxTime,
|
||||
Type: prompb.Chunk_Encoding(chunk.Chunk.Encoding()),
|
||||
Data: chunk.Chunk.Bytes(),
|
||||
})
|
||||
|
||||
frameBytesLeft -= chks[len(chks)-1].Size()
|
||||
|
||||
// We are fine with minor inaccuracy of max bytes per frame. The inaccuracy will be max of full chunk size.
|
||||
isNext = iter.Next()
|
||||
if frameBytesLeft > 0 && isNext {
|
||||
continue
|
||||
}
|
||||
|
||||
resp := &prompb.ChunkedReadResponse{
|
||||
ChunkedSeries: []*prompb.ChunkedSeries{
|
||||
{Labels: labels, Chunks: chks},
|
||||
},
|
||||
QueryIndex: int64(idx),
|
||||
}
|
||||
|
||||
b, err := proto.Marshal(resp)
|
||||
if err != nil {
|
||||
t.Fatalf("error marshal response: %s", err)
|
||||
}
|
||||
|
||||
if _, err := stream.Write(b); err != nil {
|
||||
t.Fatalf("error write to stream: %s", err)
|
||||
}
|
||||
chks = chks[:0]
|
||||
rrs.storage.Reset()
|
||||
}
|
||||
if err := iter.Err(); err != nil {
|
||||
t.Fatalf("error iterate over chunk series: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func validateReadHeaders(t *testing.T, r *http.Request) bool {
|
||||
if r.Method != http.MethodPost {
|
||||
t.Fatalf("got %q method, expected %q", r.Method, http.MethodPost)
|
||||
}
|
||||
if r.Header.Get("Content-Encoding") != "snappy" {
|
||||
t.Fatalf("got %q content encoding header, expected %q", r.Header.Get("Content-Encoding"), "snappy")
|
||||
}
|
||||
if r.Header.Get("Content-Type") != "application/x-protobuf" {
|
||||
t.Fatalf("got %q content type header, expected %q", r.Header.Get("Content-Type"), "application/x-protobuf")
|
||||
}
|
||||
|
||||
remoteReadVersion := r.Header.Get("X-Prometheus-Remote-Read-Version")
|
||||
if remoteReadVersion == "" {
|
||||
t.Fatalf("got empty prometheus remote read header")
|
||||
}
|
||||
if !strings.HasPrefix(remoteReadVersion, "0.1.") {
|
||||
t.Fatalf("wrong remote version defined")
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func validateStreamReadHeaders(t *testing.T, r *http.Request) bool {
|
||||
if r.Method != http.MethodPost {
|
||||
t.Fatalf("got %q method, expected %q", r.Method, http.MethodPost)
|
||||
}
|
||||
if r.Header.Get("Content-Encoding") != "snappy" {
|
||||
t.Fatalf("got %q content encoding header, expected %q", r.Header.Get("Content-Encoding"), "snappy")
|
||||
}
|
||||
if r.Header.Get("Content-Type") != "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" {
|
||||
t.Fatalf("got %q content type header, expected %q", r.Header.Get("Content-Type"), "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse")
|
||||
}
|
||||
|
||||
remoteReadVersion := r.Header.Get("X-Prometheus-Remote-Read-Version")
|
||||
if remoteReadVersion == "" {
|
||||
t.Fatalf("got empty prometheus remote read header")
|
||||
}
|
||||
if !strings.HasPrefix(remoteReadVersion, "0.1.") {
|
||||
t.Fatalf("wrong remote version defined")
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func GenerateRemoteReadSeries(start, end, numOfSeries, numOfSamples int64) []*prompb.TimeSeries {
|
||||
var ts []*prompb.TimeSeries
|
||||
j := 0
|
||||
for i := 0; i < int(numOfSeries); i++ {
|
||||
if i%3 == 0 {
|
||||
j++
|
||||
}
|
||||
|
||||
timeSeries := prompb.TimeSeries{
|
||||
Labels: []prompb.Label{
|
||||
{Name: labels.MetricName, Value: fmt.Sprintf("vm_metric_%d", j)},
|
||||
{Name: "job", Value: strconv.Itoa(i)},
|
||||
},
|
||||
}
|
||||
|
||||
ts = append(ts, &timeSeries)
|
||||
}
|
||||
|
||||
for i := range ts {
|
||||
ts[i].Samples = generateRemoteReadSamples(i, start, end, numOfSamples)
|
||||
}
|
||||
|
||||
return ts
|
||||
}
|
||||
|
||||
func generateRemoteReadSamples(idx int, startTime, endTime, numOfSamples int64) []prompb.Sample {
|
||||
samples := make([]prompb.Sample, 0)
|
||||
delta := (endTime - startTime) / numOfSamples
|
||||
|
||||
t := startTime
|
||||
for t != endTime {
|
||||
v := 100 * int64(idx)
|
||||
samples = append(samples, prompb.Sample{
|
||||
Timestamp: t * 1000,
|
||||
Value: float64(v),
|
||||
})
|
||||
t = t + delta
|
||||
}
|
||||
|
||||
return samples
|
||||
}
|
||||
|
||||
type MockStorage struct {
|
||||
query *prompb.Query
|
||||
store []*prompb.TimeSeries
|
||||
}
|
||||
|
||||
func NewMockStorage(series []*prompb.TimeSeries) *MockStorage {
|
||||
return &MockStorage{store: series}
|
||||
}
|
||||
|
||||
func (ms *MockStorage) Read(_ context.Context, query *prompb.Query) (*prompb.QueryResult, error) {
|
||||
if ms.query != nil {
|
||||
return nil, fmt.Errorf("expected only one call to remote client got: %v", query)
|
||||
}
|
||||
ms.query = query
|
||||
|
||||
q := &prompb.QueryResult{Timeseries: make([]*prompb.TimeSeries, 0, len(ms.store))}
|
||||
for _, s := range ms.store {
|
||||
var samples []prompb.Sample
|
||||
for _, sample := range s.Samples {
|
||||
if sample.Timestamp >= query.StartTimestampMs && sample.Timestamp < query.EndTimestampMs {
|
||||
samples = append(samples, sample)
|
||||
}
|
||||
}
|
||||
var series prompb.TimeSeries
|
||||
if len(samples) > 0 {
|
||||
series.Labels = s.Labels
|
||||
series.Samples = samples
|
||||
}
|
||||
|
||||
q.Timeseries = append(q.Timeseries, &series)
|
||||
}
|
||||
return q, nil
|
||||
}
|
||||
|
||||
func (ms *MockStorage) Reset() {
|
||||
ms.query = nil
|
||||
}
|
||||
|
||||
func labelsToLabelsProto(labels labels.Labels) []prompb.Label {
|
||||
result := make([]prompb.Label, 0, len(labels))
|
||||
for _, l := range labels {
|
||||
result = append(result, prompb.Label{
|
||||
Name: l.Name,
|
||||
Value: l.Value,
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
86
app/vmctl/testdata/servers_integration_test/remote_write_server.go
vendored
Normal file
86
app/vmctl/testdata/servers_integration_test/remote_write_server.go
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
package remote_read_integration
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
)
|
||||
|
||||
type RemoteWriteServer struct {
|
||||
server *httptest.Server
|
||||
series []vm.TimeSeries
|
||||
}
|
||||
|
||||
// NewRemoteWriteServer prepares test remote write server
|
||||
func NewRemoteWriteServer(t *testing.T) *RemoteWriteServer {
|
||||
rws := &RemoteWriteServer{series: make([]vm.TimeSeries, 0)}
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle("/api/v1/import", rws.getWriteHandler(t))
|
||||
mux.Handle("/health", rws.handlePing())
|
||||
rws.server = httptest.NewServer(mux)
|
||||
return rws
|
||||
}
|
||||
|
||||
// Close closes the server.
|
||||
func (rws *RemoteWriteServer) Close() {
|
||||
rws.server.Close()
|
||||
}
|
||||
|
||||
func (rws *RemoteWriteServer) ExpectedSeries(series []vm.TimeSeries) {
|
||||
rws.series = append(rws.series, series...)
|
||||
}
|
||||
|
||||
func (rws *RemoteWriteServer) URL() string {
|
||||
return rws.server.URL
|
||||
}
|
||||
|
||||
func (rws *RemoteWriteServer) getWriteHandler(t *testing.T) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
var tss []vm.TimeSeries
|
||||
scanner := bufio.NewScanner(r.Body)
|
||||
var rows parser.Rows
|
||||
for scanner.Scan() {
|
||||
|
||||
rows.Unmarshal(scanner.Text())
|
||||
for _, row := range rows.Rows {
|
||||
var labelPairs []vm.LabelPair
|
||||
var ts vm.TimeSeries
|
||||
nameValue := ""
|
||||
for _, tag := range row.Tags {
|
||||
if string(tag.Key) == "__name__" {
|
||||
nameValue = string(tag.Value)
|
||||
continue
|
||||
}
|
||||
labelPairs = append(labelPairs, vm.LabelPair{Name: string(tag.Key), Value: string(tag.Value)})
|
||||
}
|
||||
|
||||
ts.Values = append(ts.Values, row.Values...)
|
||||
ts.Timestamps = append(ts.Timestamps, row.Timestamps...)
|
||||
ts.Name = nameValue
|
||||
ts.LabelPairs = labelPairs
|
||||
tss = append(tss, ts)
|
||||
}
|
||||
rows.Reset()
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(tss, rws.series) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
t.Fatalf("datasets not equal, expected: %#v; \n got: %#v", rws.series, tss)
|
||||
return
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
})
|
||||
}
|
||||
|
||||
func (rws *RemoteWriteServer) handlePing() http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("OK"))
|
||||
})
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user