Compare commits

...

378 Commits

Author SHA1 Message Date
Aliaksandr Valialkin
69e655ba7f docs/CHANGELOG.md: cut v1.69.0 2021-11-08 15:47:36 +02:00
Yury Molodov
b78ab88a1c vmui: migration MUI Core v4 to v5 (#1795)
* migration MUI Core v4 to v5

* app/vmui: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-11-08 15:45:59 +02:00
Aliaksandr Valialkin
fd596945e7 lib/promscrape: improve logging for scrape_config_files parse errors
Log the actual file path, which led to the parse error.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1789
2021-11-08 13:34:12 +02:00
Aliaksandr Valialkin
3419ac1d36 app/vmselect/promql: add duration_over_time(m[d], max_interval) function
This function calculates the actual lifetime of the time series on the given lookbehdind window `d`

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1780
2021-11-08 13:14:09 +02:00
Aliaksandr Valialkin
1be4838ca0 vendor: make vendor-update 2021-11-08 12:39:57 +02:00
Aliaksandr Valialkin
e44137d46b docs/MetricsQL.md: clarify documentation for lifetime function
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1780
2021-11-08 12:35:17 +02:00
Aliaksandr Valialkin
b07010839c Makefile: add TAG=v... make publish-release rule for building and publishing a release for the given TAG 2021-11-08 12:29:10 +02:00
Aliaksandr Valialkin
5edf695bc9 docs/CHANGELOG.md: document b9cdbcb5046315db96e1e7ca9923d09d0f30dc25 2021-11-08 12:11:30 +02:00
Yury Molodov
6d1d558c4f vmui: fix graph reset (#1788)
* feat: add query history

* fix: change detect keyUp for nav query history

* feat: set default query history

* feat: change graph legend

* update dependencies

* update codemirror version

* fix: correct update period time after zoom/pan

* fix: optimize data processing for the graph

* fix: eliminate memory leaks related to mouse events

* fix: correct display of straight line

* Merge branch 'master' into vmui-fix-reset-graph

* app/vmselect: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-11-05 20:56:57 +02:00
Aliaksandr Valialkin
34b5414ba8 app/{vmalert,vmbackup}/README.md: sync with docs after the commit 47d1612bf8 2021-11-05 20:45:38 +02:00
João Paulo
47d1612bf8 docs: fix multiple typos (#1787)
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-11-05 20:44:02 +02:00
Aliaksandr Valialkin
237885e0d2 docs/vmalert.md: document the addition of -defaultTenant.prometheus and -defaultTenant.graphite command-line options to enterprise version of vmalert 2021-11-05 20:04:09 +02:00
Aliaksandr Valialkin
24dce03aaa app/vmalert/datasource: use plain string literals instead of constants
This removes the unneeded level of indirection and improves code readability.

The "prometheus" and "graphite" constants aren't going to change in the future, so there is no sense in hiding them behind constants.
2021-11-05 19:57:47 +02:00
Aliaksandr Valialkin
bf814320b0 app/vmalert: remove rule.type config, since it doesnt play well with the upcoming default tenants for -clusterMode
It is better from the consistency point of view to set up rule types at group level where tenant config is set up.
2021-11-05 19:52:32 +02:00
Aliaksandr Valialkin
c43bcdb5fb app/vmagent: allow bigger number of in-memory blocks for big values of -remoteWrite.queues
This should improve the maximum data ingestion speed for highly-loaded vmagent instances
which run on beefy servers with many CPU cores and big amounts of RAM
2021-11-05 15:16:05 +02:00
Aliaksandr Valialkin
cbfc7b7c92 app/{vminsert,vmagent}: hide passwords and auth tokens by default at /config page
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1764
2021-11-05 14:41:16 +02:00
Aliaksandr Valialkin
e73a82f7a5 lib/promauth: do not show empty values in oauth2 config section at /config page 2021-11-05 12:53:39 +02:00
Aliaksandr Valialkin
3db1f2d550 deployment/dm: update Go builder from Go1.17.2 to Go1.17.3
See https://github.com/golang/go/issues?q=milestone%3AGo1.17.3+label%3ACherryPickApproved
2021-11-05 11:51:38 +02:00
Denys Holius
cd966bf552 bumped grafana dashboards revisions for guides (#1784) 2021-11-05 11:43:26 +02:00
Aliaksandr Valialkin
faa0eb6b52 docs/FAQ.md: mention that VictoriaMetrics can be queried via Graphite API 2021-11-04 22:37:56 +02:00
Aliaksandr Valialkin
4839d07f34 app/vmagent/remotewrite: fix parallel data sending to remote storage systems at e0d2ba5608 2021-11-04 16:58:28 +02:00
Aliaksandr Valialkin
a69264e885 app/vmagent: add -remoteWrite.maxRowsPerBlock command-line option, which may be used for improving data ingestion performance under high load 2021-11-04 15:39:14 +02:00
Aliaksandr Valialkin
e0d2ba5608 app/vmagent/remotewrite: send data to remote storage systems in parallel
This should improve data ingestion speed when many `-remoteWrite.url` command-line flags are configured
2021-11-04 15:04:16 +02:00
dependabot[bot]
558f77c259 build(deps-dev): bump @typescript-eslint/eslint-plugin (#183)
Bumps [@typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 5.2.0 to 5.3.0.
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/master/packages/eslint-plugin/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v5.3.0/packages/eslint-plugin)

---
updated-dependencies:
- dependency-name: "@typescript-eslint/eslint-plugin"
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-11-04 12:53:20 +02:00
Aliaksandr Valialkin
2178335618 app/vmselect: make vmui-update 2021-11-04 12:13:12 +02:00
dependabot[bot]
ebaa4e7256 build(deps-dev): bump @babel/plugin-proposal-nullish-coalescing-operator (#1769)
Bumps [@babel/plugin-proposal-nullish-coalescing-operator](https://github.com/babel/babel/tree/HEAD/packages/babel-plugin-proposal-nullish-coalescing-operator) from 7.14.5 to 7.16.0.
- [Release notes](https://github.com/babel/babel/releases)
- [Changelog](https://github.com/babel/babel/blob/main/CHANGELOG.md)
- [Commits](https://github.com/babel/babel/commits/v7.16.0/packages/babel-plugin-proposal-nullish-coalescing-operator)

---
updated-dependencies:
- dependency-name: "@babel/plugin-proposal-nullish-coalescing-operator"
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-11-04 12:09:34 +02:00
dependabot[bot]
9ed7ead84f build(deps): bump @date-io/dayjs in /app/vmui/packages/vmui (#1770)
Bumps [@date-io/dayjs](https://github.com/dmtrKovalenko/date-io) from 1.3.13 to 2.11.0.
- [Release notes](https://github.com/dmtrKovalenko/date-io/releases)
- [Commits](https://github.com/dmtrKovalenko/date-io/compare/v1.3.13...v2.11.0)

---
updated-dependencies:
- dependency-name: "@date-io/dayjs"
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-11-04 12:09:11 +02:00
Denys Holius
06114c7bb2 bumped golangci-lint to the latest 1.43 (#1781) 2021-11-04 11:34:08 +02:00
Roman Khavronenko
1e84339df0 docs: make link to logos zip absolute (#1782)
The relative link won't work for github-docs website,
so we're changing it to absolute link.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-11-04 11:32:49 +02:00
Aliaksandr Valialkin
aa534c2582 lib/promscrape: add -promscrape.maxResponseHeadersSize command-line flag for tuning the maximum http response headers size from Prometheus scrape targets 2021-11-03 22:26:56 +02:00
Aliaksandr Valialkin
27044b84d2 app/vmselect/promql: add limit_offset(limit, offset, q) function, which can be used for paging over big number of time series
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1778
2021-11-03 16:02:27 +02:00
Aliaksandr Valialkin
43a58bd618 app/vmselect/promql: add label_graphite_group() function for extracting groups from Graphite metric names 2021-11-03 13:19:08 +02:00
Aliaksandr Valialkin
da2e0e29a4 docs/CHANGELOG.md: document e3a91b186a 2021-11-02 18:39:14 +02:00
Aliaksandr Valialkin
d1eb87c831 app/{vmagent,vminsert}: add ability to restrict access to /config page with authKey query arg
The authKey can be configured via `-configAuthKey` command-line flag.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1764
2021-11-01 16:44:54 +02:00
Aliaksandr Valialkin
28b6456f3b vendor: make vendor-update 2021-11-01 15:59:38 +02:00
Aliaksandr Valialkin
cb3819d44e vendor: update github.com/VictoriaMetrics/metrics from v1.18.0 to v1.18.1 2021-11-01 15:52:53 +02:00
Aliaksandr Valialkin
701973877f docs/Articles.md: add a link to https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986 2021-10-29 14:02:51 +03:00
Aliaksandr Valialkin
1a16dab9e1 docs/vmauth.md: typo fix 2021-10-28 14:06:00 +03:00
Aliaksandr Valialkin
bb87949d5c lib/protoparser/influx: automatically detect timestamp precision depending on the number of decimal digits in the timestamp 2021-10-28 12:47:22 +03:00
Aliaksandr Valialkin
d0e7c0535e lib/logger: show only explicitly set command-line flags in logs
This reduces initial verbosity in logs
2021-10-28 11:00:52 +03:00
Aliaksandr Valialkin
acfda6d8fd app/vmbackupmanager: fix links to images
This is a follow-up after bd6b8f7e31
2021-10-27 21:35:52 +03:00
Yury Molodov
47ee3744f2 vmui: correct migration material-ui (#1758)
* migration material-ui

* fix: rollback popover

* app/vmselect: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-10-27 20:41:26 +03:00
Aliaksandr Valialkin
74b8af9891 lib/promscrape: add collapse and expand buttons per each group of targets from the same scrape job 2021-10-27 20:03:24 +03:00
Aliaksandr Valialkin
6608705652 app/{vmalert,vmagent}: improve the distribution of scrape offsets among targets / rules
Previously only the lower part of 64-bit hash was used for calculating the offset.
This may give uneven distribution in some cases. So let's use all the available 64 bits from the hash
for calculating the offset.
2021-10-27 19:59:16 +03:00
Aliaksandr Valialkin
e3a91b186a lib/protoparser/prometheus: optimize GetRowsDiff() function
This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1745 ,
since the provided profile shows that the majority of CPU and memory is spent in this function
during `streamParse` when `-promscrape.noStaleMarkers` wasn't set.
2021-10-27 18:54:45 +03:00
Aliaksandr Valialkin
95d44157fc lib/protoparser/prometheus: add a benchmark for GetRowsDiff 2021-10-27 18:53:54 +03:00
Aliaksandr Valialkin
1952ab99aa all: fix build issues and tests for Apple M1
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1653
2021-10-27 15:06:34 +03:00
Aliaksandr Valialkin
1ae7ca848c .github/workflows/main.yml: checkout code before installing dependencies
Dependencies depend on Makefile rules from the code, so code checkout must run first
2021-10-26 22:08:58 +03:00
Aliaksandr Valialkin
9ec0175e83 docs/CHANGELOG.md: mention the issue about missing proxy_url config option at /config page
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1755
2021-10-26 22:06:35 +03:00
Aliaksandr Valialkin
c560a338e8 .github/workflows/main.yml: re-use makefile rules for installing goling, errcheck and golangci-lint 2021-10-26 21:26:39 +03:00
Aliaksandr Valialkin
4821adfd95 lib/promscrape: properly show proxy_url option value at /config page
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1755
2021-10-26 21:23:54 +03:00
Aliaksandr Valialkin
51641c0840 vendor: make vendor-update 2021-10-26 19:36:50 +03:00
Yury Molodov
956cf83e7b vmui: update dependencies (#1754)
* update dependencies

* update codemirror version

* app/vmui: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-10-26 19:31:20 +03:00
Aliaksandr Valialkin
88d42c3ac1 app/vmbackup/README.md: sync with docs/vmbackup.md after e706fb5686 2021-10-26 19:20:47 +03:00
Dima Lazerka
e706fb5686 Fix doc: vmbackup splits by 1 GiB not 100 MB (#1756)
This is a follow-up for bdd0a1cdb2
2021-10-26 19:19:49 +03:00
Denys Holius
d282a7593b fixed wrong path for npm dependabot checks (#1744) 2021-10-26 11:04:32 +03:00
Aliaksandr Valialkin
a7e3cbd6ad docs/CHANGELOG.md: document 3dbdf1632e 2021-10-25 12:16:37 +03:00
Roman Khavronenko
3dbdf1632e vmalert: allow groups with empty rules for compatibility reasons (#1742)
Prometheus allows to have groups with no rules, so we should support
it in vmalert as well for compatibility reasons.
It is also allowed to hot-reload empty groups by adding or removing rules.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-10-25 12:15:02 +03:00
Aliaksandr Valialkin
d5825f13d3 docs/Cluster-VictoriaMetrics.md: add links with the explanation of active time series and series churn rate 2021-10-24 18:40:19 +03:00
Aliaksandr Valialkin
6b6a4ca51d docs/CaseStudies.md: fix a link to AbisoGaming case study 2021-10-24 18:36:58 +03:00
Aliaksandr Valialkin
df8f967040 app/vmselect/promql: reduce the precision from 15 significant digits to 13 significant digits when comparing float64 results in tests
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1738
2021-10-24 13:31:14 +03:00
Aliaksandr Valialkin
8d0fafc377 docs/CHANGELOG.md: typo fix 2021-10-22 21:11:54 +03:00
Aliaksandr Valialkin
f64f626927 go.mod: remove outdated replacement 2021-10-22 19:46:54 +03:00
Aliaksandr Valialkin
7f7cac20c1 docs/CHANGELOG.md: cut v1.68.0 2021-10-22 19:37:48 +03:00
Aliaksandr Valialkin
b76db7c772 deployment/docker: update Grafana from v8.2.0 to v8.2.2 2021-10-22 19:33:22 +03:00
Aliaksandr Valialkin
8124f202a4 vendor: make vendor-update 2021-10-22 19:27:06 +03:00
Aliaksandr Valialkin
a69f1baa13 docs/vmauth.md: make docs-sync 2021-10-22 19:21:34 +03:00
Aliaksandr Valialkin
013d626889 app/vmauth: add ability to specify http headers to send in requests to backends
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1736
2021-10-22 19:10:29 +03:00
Aliaksandr Valialkin
7fa15f7f86 lib/promscrape: do not populate response body to memory in stream parsing mode if -promscrape.noStaleMarkers is set
The response body isn't used if -promscrape.noStaleMarkers is set after the commit 2876137c92 ,
so there is no sense in pupulating it in memory. This should reduce memory usage when scraping big responses.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1728#issuecomment-949630694
2021-10-22 16:44:44 +03:00
Aliaksandr Valialkin
7e88713ca3 docs/CHANGELOG.md: document 43a7984cd8 2021-10-22 14:00:20 +03:00
Aliaksandr Valialkin
6106d4069d lib/promscrape: do not sort original labels and do not intern label string for the original labels before the sharding code is executed
This should reduce CPU and memory usage in shard mode when service discovery finds big number of scrape targets with many long labels.
See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets

This is a follow-up after 9882cda8b9

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1728
2021-10-22 13:54:30 +03:00
Aliaksandr Valialkin
2876137c92 lib/promscrape: reduce memory usage if -promscrape.noStaleMarkers command-line flag is passed
Do not store in memory the response from the last scrape per each target if -promscrape.noStaleMarkers option is enabled.
This should reduce memory usage when the scraped targets return large responses.
2021-10-22 13:10:29 +03:00
Roman Khavronenko
43a7984cd8 vmalert: correctly calculate alert ID including extra labels (#1734)
Previously, ID for alert entity was generated without alertname or groupname.
This led to collision, when multiple alerting rules within the same group
producing same labelsets. E.g. expr: `sum(metric1) by (job) > 0` and
expr: `sum(metric2) by (job) > 0` could result into same labelset `job: "job"`.

The issue affects only UI and Web API parts of vmalert, because alert ID is used
only for displaying and finding active alerts. It does not affect state restore
procedure, since this label was added right before pushing to remote storage.

The change now adds all extra labels right after receiving response from the datasource.
And removes adding extra labels before pushing to remote storage.

Additionally, change introduces a new flag `Restored` which will be displayed in UI
for alerts which have been restored from remote storage on restart.
2021-10-22 12:30:38 +03:00
Aliaksandr Valialkin
8568003bb1 docs/CHANGELOG.md: document a3684fe3de 2021-10-22 12:28:01 +03:00
Nikolay
a3684fe3de adds tab as second separator for graphite text protocol (#1733)
* adds tab as second separator for graphite text protocol

* changes indexFunc for indexAny

* Update lib/protoparser/graphite/parser_test.go

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-10-22 12:23:45 +03:00
Yury Molodov
2b266cb87e vmui: query history (#1732)
* feat: add query history

* fix: change detect keyUp for nav query history

* feat: set default query history

* app/vmselect/vmui: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-10-22 12:21:22 +03:00
Aliaksandr Valialkin
8991c8b589 lib/flagutil: do not expose sensitive info (passwords, keys and urls) at /flags page 2021-10-20 00:51:26 +03:00
Aliaksandr Valialkin
8ad95f0db7 lib/httpserver: expose command-line flags at /flags page
This should simplify debugging.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1695
2021-10-20 00:45:09 +03:00
Aliaksandr Valialkin
676ad70d9f lib/envflag: use flag.Set for setting the flags from env vars
This should make visible the set flags at flag.Visit(), which is used later for logging
and exporting the `is_set` label for these flags at /metrics page
2021-10-20 00:41:08 +03:00
Aliaksandr Valialkin
53bb58ed2a lib/storage: log a warning when the -storageDataPath has less than -storage.minFreeDiskSpaceBytes
This should improve the debuggability of the readonly feature.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1727
2021-10-19 23:59:13 +03:00
Roman Khavronenko
bdfac4ff53 vmalert: make group.ID() thread-safe (#1726)
Commit fixes potential race condition when group update
and generating of ID() happens simultaneously.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-10-19 16:44:13 +03:00
Roman Khavronenko
dcd881bb7a vmalert: properly init SIGHUP listener before starting group manager (#1725)
Regression was introduced during code refactoring. It potentially
could lead to situation when SIGHUP signals were ignored while
vmalert was still busy with initing group manager.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-10-19 16:35:27 +03:00
Aliaksandr Valialkin
b8123b862a app/vmauth: fix metric name prefixes: vmagent -> vmauth 2021-10-19 15:29:07 +03:00
Aliaksandr Valialkin
35a5eaeeb1 docs/Single-server-VictoriaMetrics.md: add a link to VMUI at VictoriaMetrics playground 2021-10-19 14:41:53 +03:00
Aliaksandr Valialkin
3408a05d12 lib/promscrape/discovery/kubernetes: log a warning if role: endpoints discovers more than 1000 targets per a single endpoint
In this case `role: endpointslice` must be used instead.

See the following references:

* https://kubernetes.io/docs/reference/labels-annotations-taints/#endpoints-kubernetes-io-over-capacity
* https://github.com/kubernetes/kubernetes/pull/99975
* https://github.com/prometheus/prometheus/issues/7572#issuecomment-934779398
2021-10-19 13:20:40 +03:00
Aliaksandr Valialkin
0d48b89afe docs/CHANGELOG.md: document 146a5b504c 2021-10-19 11:25:02 +03:00
Aliaksandr Valialkin
c64a134146 docs/CHANGELOG.md: document cbcc622786 2021-10-19 08:56:23 +03:00
Aliaksandr Valialkin
ec40affb59 deployment/docker/alerts.yml: formatting fixes after 865a60f13e 2021-10-19 08:53:03 +03:00
Nikolay
cbcc622786 changes job source for /target api (#1723)
use jobNameOriginal instead of relabeled as prometheus does

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1707
2021-10-19 08:49:36 +03:00
Roman Khavronenko
ea8f625b53 dashboards: add cardnilaity limiter panels for vmagent (#1720)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-10-18 19:15:33 +03:00
Yurii Kravets
865a60f13e Update alerts.yml
Added Series Limit day\hour alerts
2021-10-18 18:14:49 +03:00
Aliaksandr Valialkin
f744c1c6d9 vendor: return back the previous google.golang.org/genproto version, since the latest version leads to compile errors
The following errors:

    vendor/cloud.google.com/go/storage/storage.go:1447:53: o.GetCustomerEncryption().GetKeySha256 undefined (type *"google.golang.org/genproto/googleapis/storage/v2".Object_CustomerEncryption has no field or method GetKeySha256)
    vendor/cloud.google.com/go/storage/writer.go:439:10: q.GetCommittedSize undefined (type *"google.golang.org/genproto/googleapis/storage/v2".QueryWriteStatusResponse has no field or method GetCommittedSize)
2021-10-18 15:37:18 +03:00
Aliaksandr Valialkin
dea8521ab9 vendor: make vendor-update 2021-10-18 15:25:11 +03:00
Yury Molodov
a3e09a57c2 vmui: features (#1711)
* feat: initial uPlot graph

* feat: add zoom/pan for graph

* fix: add zoom by ctrl/mac

* fix: remove unused code

* feat: add toggle cache for fetch

* feat: add fix y-axis limits

* fix: stop point events while panning

* fix: change getting cursor position when scaling

* feat: add cursor tooltip to graph

* fix: uninstall chart.js

* fix: change link for create an issue

* fix: set default cache value to true

* app/vmalert: follow-up after 0e2486df56

* docs/CHANGELOG.md: document 5416e18007

* app/vmui: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-10-18 15:16:57 +03:00
Roman Khavronenko
146a5b504c vmalert: remove extra / from path in WEB interface (#1717)
The extra `/` may cause issues when additional path prefixes
are configured. Also, removing it makes it consistent
with the rest of declarations.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-10-18 15:12:47 +03:00
Roman Khavronenko
478854d36d vmctl: follow-up after 95d1d38595 (#1718)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-10-18 15:10:44 +03:00
Miro Prasil
5416e18007 vmctl influx convert bool to number (#1714)
vmctl: properly convert influx bools into integer representation

When using vmctl influx, the import would fail importing boolean fields
with:

```
failed to convert value "some".0 to float64: unexpected value type true
```

This converts `true` to `1` and `false` to `0`.

Fixes #1709
2021-10-18 10:29:34 +03:00
Alexander Rickardsson
0e2486df56 vmalert: add disablePathAppend to remote read (#1712)
* vmalert: add disablePathAppend to remoteRead

* docs: add docs for remoteRead.disablePathAppend
2021-10-18 10:24:52 +03:00
Alexander Rickardsson
c0e58ade45 vmalert: Redact passwords from error messages (#1713) 2021-10-18 10:20:26 +03:00
Aliaksandr Valialkin
da97e58979 app/vmselect/promql: randomize the static selection of time series returned from limitk()
Sort series by a hash calculated from the series labels. This should guarantee "random" selection of the returned time series.
Previously the selection could be biased, since time series were sorted alphabetically by label names and label values.
2021-10-16 21:16:49 +03:00
Aliaksandr Valialkin
c37f285466 lib/promscrape: set honor_timestamps: true by default if this option isnt set explicitly in scrape configs
This aligns the behavior to Prometheus - see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
2021-10-16 20:49:08 +03:00
Aliaksandr Valialkin
dfc719e012 docs/CaseStudies.md: add Smarkets case study 2021-10-16 19:57:23 +03:00
Aliaksandr Valialkin
a1e54fa2c9 docs/CaseStudies.md: add Fly.io case study 2021-10-16 19:45:44 +03:00
Aliaksandr Valialkin
47c6baf5ea docs/CaseStudies.md: add a case study for Razorpay 2021-10-16 19:36:33 +03:00
Aliaksandr Valialkin
3e9ffb6e33 docs/CaseStudies.md: add AbiosGaming 2021-10-16 19:26:04 +03:00
Aliaksandr Valialkin
ede9dd43e8 docs/CaseStudies.md: add Percona case study 2021-10-16 19:10:38 +03:00
Aliaksandr Valialkin
c055bc478c lib/promscrape: expose promscrape_series_limit_max_series and promscrape_series_limit_current_series metrics per each scrape target with the enabled unique series limiter 2021-10-16 18:47:13 +03:00
Aliaksandr Valialkin
9761b7f3ef vendor: update github.com/valyala/gozstd from v1.13.0 to v1.14.1
This should reduce memory usage in vmagent when compressing large scrape responses in stream parsing mode
2021-10-16 18:20:03 +03:00
Aliaksandr Valialkin
06b0982d6b lib/promscrape: always initialize http client for stream parsing mode
Stream parsing mode can be automatically enabled when scraping targets with big response bodies
exceeding the -promscrape.minResponseSizeForStreamParse , so it must be always initialized.
2021-10-16 13:18:23 +03:00
Aliaksandr Valialkin
cae174b11c app/vmselect/promql: typo fix in comment: didsn't -> didn't 2021-10-16 13:00:34 +03:00
Aliaksandr Valialkin
32793adbd9 lib/promscrape: store the last scraped response in compressed form if its size exceeds -promscrape.minResponseSizeForStreamParse
This should reduce memory usage when scraping targets with big response bodies.
2021-10-16 13:00:30 +03:00
Aliaksandr Valialkin
9866dd95c1 lib/promscrape: store the full response in stream parsing mode in scrapeWork.lastScrape byte slice
This allows sending staleness marks and properly calculate scrape_series_added metric in stream parsing mode
at the cost of the increased memory usage, since now the potentially big response is kept
in the lastScrape byte slice per each scrapeWork.

In practice the memory usage increase shouldn't be big, since the response size
is usually much smaller than the parsed metrics from this response after the relabeling,
which usually adds a big pile of target-specific labels per each metric.
2021-10-15 15:39:23 +03:00
Aliaksandr Valialkin
f6d33596ff lib/promscrape/discovery/kubernetes: rename endpointslices.go -> endpointslice.go in order to be consistent with EndpointSlice struct name
This is a follow-up for 31b42b30b6
2021-10-15 12:27:12 +03:00
Aliaksandr Valialkin
0db0410237 docs/FAQ.md: improve wording on why MetricsQL isnt 100% compatible with PromQL 2021-10-14 16:22:43 +03:00
Aliaksandr Valialkin
78425561ce docs/CHANGELOG.md: document the change at 7fcbd3fa4b 2021-10-14 14:37:44 +03:00
Aliaksandr Valialkin
1ac12597fa docs/FAQ.md: add an entry explaining why MetricsQL isn't 100% compatible with PromQL 2021-10-14 12:50:31 +03:00
Aliaksandr Valialkin
bbd34fa15e lib/promscrape: add -promscrape.minResponseSizeForStreamParse command-line option for automatic switching to stream parsing mode when scraping targets with big responses
This should reduce memory usage when vmagent scrapes targets with non-uniform response sizes.
This is common case in Kubernetes monitoring.
2021-10-14 12:29:35 +03:00
Aliaksandr Valialkin
1a7287c408 lib/promscrape: return error if sample_limit or series_limit options are set when stream parsing mode is enabled 2021-10-14 12:11:23 +03:00
Roman Khavronenko
7fcbd3fa4b Adjust http.Transport.MaxIdleConns setting for vmauth/vmalert services (#1704)
* vmalert: adjust `http.Transport.MaxIdleConns` value accordingly to `http.Transport.MaxIdleConnsPerHost`

`http.Transport.MaxIdleConnsPerHost` setting is controlled by `datasource.maxIdleConnections` flag,
while `http.Transport.MaxIdleConns` is inherited from DefaultTransport and is equal to `100`.
The fix adjusts `http.Transport.MaxIdleConns` value if it is lower than `http.Transport.MaxIdleConnsPerHost`.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* vmauth: adjust `http.Transport.MaxIdleConns` value accordingly to `http.Transport.MaxIdleConnsPerHost`

`http.Transport.MaxIdleConnsPerHost` setting is controlled by `maxIdleConnsPerBackend` flag,
while `http.Transport.MaxIdleConns` is inherited from DefaultTransport and is equal to `100`.
The fix adjusts `http.Transport.MaxIdleConns` value if it is lower than `http.Transport.MaxIdleConnsPerHost`.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-10-13 17:29:28 +03:00
Aliaksandr Valialkin
1c17fe70e0 docs/CHANGELOG.md: document e3c8304deb 2021-10-13 16:00:50 +03:00
Aliaksandr Valialkin
e3c8304deb lib/promscrape: add ability to show the original labels for discovered targets at /targets page
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1698
2021-10-13 15:59:58 +03:00
Roman Khavronenko
8df3c569c7 vmalert: add Source link to alerts UI (#1701)
The source link is controlled by `external.url` and `external.alert.source`
flags, in the same way as for alertmanager notifications.
The source link is added to Alerts list view, and specific Alert view.
2021-10-13 15:25:11 +03:00
Aliaksandr Valialkin
3d61a10367 docs/MetricsQL.md: add missing blank line before the link to github.com/VictoriaMetrics/metricsql package 2021-10-13 15:10:54 +03:00
Roman Khavronenko
c0a932a55f lib/promscrape: make errcheck happy (#1703) 2021-10-13 14:57:30 +03:00
Aliaksandr Valialkin
9882cda8b9 lib/promscrape: shard targets among cluster nodes after relabeling is applied
This guarantees that targets with the same set of labels go to the same vmagent node.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1687#issuecomment-940629495
2021-10-12 17:06:00 +03:00
Aliaksandr Valialkin
5a58c041c2 app/vmagent: expose -promscrape.config contents at /config page as Prometheus does
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1695
2021-10-12 16:25:37 +03:00
Aliaksandr Valialkin
4f242980be docs/FAQ.md: add a solution against high churn rate 2021-10-12 10:32:39 +03:00
Aliaksandr Valialkin
1eaaf8ad51 vendor: make vendor-update 2021-10-11 21:51:44 +03:00
Roman Khavronenko
2c6d86226f docs: mention "PromQL compliance" in MetricsQL docs (#1691) 2021-10-11 21:21:29 +03:00
Aliaksandr Valialkin
a5001b9c20 app/vmselect/promql: add atan2 binary operator, which is going to be added in Prometheus 2.31
See https://github.com/prometheus/prometheus/pull/9248
2021-10-11 21:15:53 +03:00
Aliaksandr Valialkin
81c6720392 app/vmselect/promql: add missing trigonometric functions, which are going to be added in Prometheus 2.31
See https://github.com/prometheus/prometheus/issues/9233
2021-10-11 21:01:33 +03:00
Aliaksandr Valialkin
8679ba71dd docs/MetricsQL.md: clarify docs for union() function 2021-10-11 17:40:44 +03:00
Aliaksandr Valialkin
873aac584e lib/promscrape: use Prometheus format for target labels at /targets page
This should simplify copy-pasting the labels to/from PromQL / MetricsQL
2021-10-11 12:41:37 +03:00
Denys Holius
dd4038f0e5 Added some fixes (#1690)
* removed not needed description

* added some fixes and fixed typos
2021-10-11 11:21:07 +03:00
Aliaksandr Valialkin
986bed8261 docs/MetricsQL.md: add a link to https://medium.com/@romanhavronenko/victoriametrics-promql-compliance-d4318203f51e 2021-10-11 11:00:56 +03:00
Roman Khavronenko
9b557a88fc docs: add "PromQL compliance" article (#1689)
* docs: add "PromQL compliance" article

* Update docs/Articles.md

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-10-11 10:58:34 +03:00
Aliaksandr Valialkin
83a2a9f2f7 deployment/docker/docker-compose.yml: upgrade Grafana from v8.1.2 to v8.2.0 2021-10-08 20:37:40 +03:00
Aliaksandr Valialkin
92b92d4d2c app/vmselect/promql: consistently return the same set of time series from limitk() function
This is the expected behaviour by most users.
2021-10-08 19:53:52 +03:00
Aliaksandr Valialkin
001750c239 lib/storage: fix unaligned access on 32-bit architectures.
The bug has been introduced at a171916ef5
2021-10-08 19:43:03 +03:00
Denys Holius
4b0cefc4bd Added fixes and improvements (#1677)
* added guide for VM operator

* Update docs/guides/getting-started-with-vm-operator.md

Co-authored-by: Roman Khavronenko <hagen1778@gmail.com>

* Update docs/guides/getting-started-with-vm-operator.md

Co-authored-by: Roman Khavronenko <hagen1778@gmail.com>

* Fixed different typos and added improvements from proposals

* move remoteWrite.url to other place

* fixed typo

* rephrased vminsert explanation

* remove not needed parameters for default setup

Co-authored-by: Roman Khavronenko <hagen1778@gmail.com>
2021-10-08 18:57:36 +03:00
Aliaksandr Valialkin
00fe5230e9 deployment/docker: update Go builder version from Go1.17.1 to Go1.17.2
See https://github.com/golang/go/issues?q=milestone%3AGo1.17.2+label%3ACherryPickApproved
2021-10-08 17:42:57 +03:00
Aliaksandr Valialkin
6058edb0d1 vendor: make vendor-update 2021-10-08 16:04:56 +03:00
Aliaksandr Valialkin
0a3a774202 docs/CHANGELOG.md: cut v1.67.0 2021-10-08 16:00:33 +03:00
Aliaksandr Valialkin
0ff8fcac6a app/vmui: follow-up after 7bfb44113e
* Run `vmui-update`
* Document the changes in README.md and CHANGELOG.md
2021-10-08 15:09:29 +03:00
Yury Molodov
7bfb44113e vmui: use uPlot as default engine for graph (#1683)
* feat: initial uPlot graph

* feat: add zoom/pan for graph

* fix: add zoom by ctrl/mac

* fix: remove unused code
2021-10-08 15:07:35 +03:00
Aliaksandr Valialkin
cf5cbd1c70 app/{vminsert,vmstorage}: follow-up after a171916ef5
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269
2021-10-08 14:35:49 +03:00
Nikolay
4290b46e8c Adds read-only mode for vmstorage node (#1680)
* adds read-only mode for vmstorage
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269

* changes order a bit

* moves isFreeDiskLimitReached var to storage struct
renames functions to be consistent
change protoparser api - with optional storage limit check for given openned storage

* renames freeSpaceLimit to ReadOnly
2021-10-08 14:35:48 +03:00
Aliaksandr Valialkin
2748255c8b app/vmselect/promql: substitute rollupFuncsCannotAdjustWindow with rollupFuncsCanAdjustWindow
The list of functions, which can adjust lookbehind window is more limited than the rest of functions,
so it is better from maintainability and readability PoV using the allowlist instead of blocklist.
2021-10-07 13:18:42 +03:00
Aliaksandr Valialkin
c45210a6f9 app/vmselect/promql: return back the behaviour for deriv() function when the lookbehind window doesnt contain enough points
It is expected that the `deriv(m[d])` returns non-empty value if the lookbehind window `d`
contains less than 2 samples in the same way as `rate()` does.

This is a follow-up after 3e084be06b .
2021-10-07 12:52:27 +03:00
Roman Khavronenko
3e084be06b app/vmselect: make predict_linear and deriv compatible with Prometheus (#1681)
Previously, `predict_linear` returned slightly different results comparing
to Prometheus. The change makes linear regression algorithm compatible
with Prometheus.

`deriv` was excluded from the list of functions which can adjust the time
window for the same reasons.
2021-10-07 12:50:49 +03:00
Aliaksandr Valialkin
a19e7c7ce8 app/vminsert: fix uneven distribution of time series among storage nodes
Use distinct seed for distribution hash calculations on the second level of vminsert nodes.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1672
2021-10-07 12:23:39 +03:00
Aliaksandr Valialkin
a71c9ad650 docs/guides: follow-up after 05a1396247 2021-10-06 14:46:46 +03:00
Thomas Danielsson
05a1396247 fix: typo metric_relabel_configs (#1674)
metric_ralabel_configs -> metric_relabel_configs
2021-10-06 14:45:39 +03:00
Ziqi Zhao
402c995d6d fix some typos (#1678)
Co-authored-by: 柘远 <zzq237937@alibaba-inc.com>
2021-10-06 14:43:10 +03:00
Aliaksandr Valialkin
ec3a87bb46 vendor: make vendor-update 2021-10-05 10:29:12 +03:00
Aliaksandr Valialkin
c7c966d0e9 docs/vmagent.md: update docs after 3e9a939a990c8b608414388c96f68eb062364ae7 2021-10-05 10:23:33 +03:00
Aliaksandr Valialkin
3dea9e02d0 vendor: make vendor-update 2021-09-30 17:52:02 +03:00
Aliaksandr Valialkin
9515e58e28 docs/vmagent.md: document how to write data to Kafka 2021-09-30 17:45:53 +03:00
Aliaksandr Valialkin
6ee66fb6b1 lib/promscrape: reduce memory allocations in mergeLabels() after 48e3e6c8df 2021-09-30 16:56:12 +03:00
Aliaksandr Valialkin
0e3de5a0cc app/vmselect/promql: add topk_last and bottomk_last functions 2021-09-30 13:22:52 +03:00
Roman Khavronenko
a31407006c app/vmselect: fix binary comparison func (#1667)
The fix makes the binary comparison func to check for NaNs
before executing the actual comparison. This prevents VM
to return values for non-existing samples for expressions
which contain bool comparisons. Please see added test
for example.
2021-09-30 12:24:17 +03:00
Roman Khavronenko
344490d89b app/vmselect: fix testRowsEqual func NaN checks (#1666)
It appeared, that `testRowsEqual` NaN comparison was incorrect.
The fix caused some tests to fail. Please see the change and
tests updated.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-30 12:08:47 +03:00
Aliaksandr Valialkin
463a5bf76e lib/protoparser: go fmt 2021-09-29 21:19:00 +03:00
Aliaksandr Valialkin
6d9f1d4227 app/vminsert: document that -relabelConfig is reloaded on SIGHUP signal 2021-09-29 21:18:58 +03:00
Aliaksandr Valialkin
58964d52a5 lib/protoparser/prometheus: compare invalid Prometheus lines in full 2021-09-29 19:41:28 +03:00
Aliaksandr Valialkin
2b623ae302 docs/CHANGELOG.md: link to Kafka integration docs 2021-09-29 12:31:23 +03:00
Aliaksandr Valialkin
d80d72efec app/{vmbackup,vmrestore}: switch from gcs://... to gs://... urls for backups to GCS
The `gs://` urls are commonly used, so prefer them instead of `gcs://` urls,
while leaving support for `gcs://` urls for backwards compatibility.
2021-09-29 12:10:29 +03:00
Aliaksandr Valialkin
396e233ac1 docs/vmagent.md: update Telegraf config in the section about Kafka 2021-09-29 11:21:15 +03:00
Aliaksandr Valialkin
0e5ab52908 docs/vmagent.md: add docs about reading metrics from Kafka 2021-09-29 01:46:12 +03:00
Yury Molodov
893af0a92c vmui: fixed bug with time range (time zone) (#1661)
* fix: set date in query string in utc format

* app/vmselect: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-09-29 01:00:44 +03:00
Nikolay
cc72f9428d changes vmagent api (#1656)
* changes vmagent api
adds auth.Token to promremotewrite InsertHandlerReader
changes remoteWrite client constructor, allows to use multiple remoteWriteUrl schemes, like kafka://
changes url path concatenation for tenant remoteWrite

Update app/vmagent/remotewrite/client.go

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>

* Update app/vmagent/remotewrite/remotewrite.go

* Apply suggestions from code review

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-09-29 00:52:07 +03:00
Roman Khavronenko
5dc84bf210 app/vmselect: disable time-window adjustment for min/max_over_time funcs (#1658)
Adjustment results into discrepancy between Prometheus and VM on time windows
smaller than scrape interval.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-29 00:43:21 +03:00
Aliaksandr Valialkin
ead59bdebf docs/CHANGELOG.md: document the bugfix from de810031bf 2021-09-29 00:41:35 +03:00
Roman Khavronenko
de810031bf app/vmselect: always return zero for stddev func if there is only one value (#1659)
The fix will always return zero if received set of items consists of one
element only, which also means no deviation.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-29 00:38:55 +03:00
Roman Khavronenko
dd536b475c app/vmselect: return NaN instead of 0 for empty value sets (#1660)
The change affects `count/stddev/stdvar_over_time` funcs and makes
them to return NaN instead of zero when there is no datapoints
in a time window.
This is needed for improving compatibility with Prometheus.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-29 00:37:04 +03:00
Roman Khavronenko
03cd93bf1a app/vmselect: rm quantile_over_time fast-path optimisations (#1662)
The removed fast path optimisations weren't consistent with
`quantile` function behavior and results into discrepancy.
Specifically, results didn't match in cases when:
* 0 < phi > 1;
* values contain only one element.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-29 00:35:14 +03:00
Aliaksandr Valialkin
50ec259750 docs/CHANGELOG.md: document 3d17112a7e 2021-09-29 00:33:08 +03:00
Nikolay
3d17112a7e changes auth validation for openstack (#1663)
* changes auth validation for openstack
must fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1655

* Apply suggestions from code review

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-09-29 00:28:49 +03:00
Aliaksandr Valialkin
91b3c601bc app/{vminsert,vmagent}: add ability to ingest data via DataDog "submit metrics" API
See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/206
2021-09-29 00:13:08 +03:00
Yury Molodov
a64155d91e vmui: use Chart.js as default engine for graph (#1634)
* feat: add Plotly as default engine for graph

* fix: remove unused components

* feat: use Chart.js as default engine graph

* fix: correct styles for loader

* feat: add zoom/pan for chart

* feat: add height for chart

* fix: remove unused code

* fix: remove empty units from duration

* fix: change debounce for pan to 500ms

* fix: add utility for plugins register globally

* fix: optimize render graph

* feat: add buffer data for zoom

* fix: add limits for zoom in/out

* fix: change update data while zooming

* app/vmselect: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-09-27 22:26:14 +03:00
Aliaksandr Valialkin
8c0283381d app/victoria-metrics/testdata/graphite/max_lookback_unset.json: fix the test after c4c77aa2dd
The commit c4c77aa2dd slightly changed how scrape_interval is detected per-time series,
so the max_lookback_unset test should be updated accordingly.
2021-09-27 21:41:14 +03:00
Aliaksandr Valialkin
2efe0acfc9 app/vmselect/promql: add rollup_scrape_interval(m[d]) function
It calculates the min, max and avg scrape intervals for m over the given lookbehind window d
2021-09-27 19:21:24 +03:00
Aliaksandr Valialkin
c4c77aa2dd app/vmselect/promql: follow-up after 526dd93b32
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1625
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1612
2021-09-27 18:55:39 +03:00
Roman Khavronenko
526dd93b32 app/vmselect: quantile func compatiblity with Prometheus (#1646)
* app/vmselect: `quantile` func compatiblity with Prometheus

The `quantile` func was previously calculated by https://github.com/valyala/histogram
package. The result of such calculation was always the closest real value to
requested quantile. While in Prometheus implementation interpolation is used.
Such difference may result into discrepancy in output between Prometheus and
VictoriaMetrics.

This commit adds a Prometheus-like `quantile` function. It also used by other
functions which depend on it, such as `quantiles`, `quantile_over_time`, `median` etc.

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1625

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* app/vmselect: `quantile` review fixes

* quantile functions were split into multiple to provide
different API for already sorted data;
* float64sPool is used for reducing allocations. Items in pool may have
different sizes, but defining a new pool was complicates due to name collisions;

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-27 18:02:41 +03:00
Aliaksandr Valialkin
80b0b92d2f vendor: make vendor-update 2021-09-27 17:57:40 +03:00
Aliaksandr Valialkin
1b23224f9c docs/BestPractices.md
docs/BestPractices.md: update the doc
2021-09-27 17:51:21 +03:00
Aliaksandr Valialkin
8ed95e82c6 app/vmselect/promql: follow-up after 57b3320478 2021-09-24 01:24:18 +03:00
Roman Khavronenko
57b3320478 app/vmselect: make sorting for query result similar to Prometheus (#1647)
* app/vmselect: make sorting for query result similar to Prometheus

Updated sorting allows to get the order of series in result similar or equal
to what Prometheus returns.
The change is needed for compatibility reasons.

* Update app/vmselect/promql/exec_test.go

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2021-09-24 01:03:12 +03:00
Aliaksandr Valialkin
e564411a62 app/vmselect/promql: align the behavior of or, and and unless operators with on (labels) modifier to Prometheus
Previously VictoriaMetrics could return unexpected result of the right-hand side operand
had multiple time series with the given set of labels mentioned in `on(labels)`.

See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1643
2021-09-24 00:46:25 +03:00
Nikolay
3f1e6da1d7 moves prod images build into alpine container with musl (#1640)
adds gcc and musl-dev to builder container
2021-09-24 00:14:11 +03:00
Aliaksandr Valialkin
9f19649672 docs/CHANGELOG.md: cut v1.66.2 2021-09-23 22:53:36 +03:00
Aliaksandr Valialkin
718eca33ab lib/storage: properly handle {__name__=~"prefix(suffix1|suffix2)",other_label="..."} queries
They were broken in the commit 00cbb099b6

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1644
2021-09-23 21:48:51 +03:00
Aliaksandr Valialkin
c5bb95a417 docs: make docs-sync 2021-09-23 20:51:35 +03:00
Aliaksandr Valialkin
f5896b7420 docs/CHANGELOG.md: document 0e35fc9538
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1641
2021-09-23 20:46:23 +03:00
Roman Khavronenko
9dc4d16664 app/vmctl: fix misleading comment about cluster version for native mode (#1648)
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1637
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-23 17:57:25 +03:00
Roman Khavronenko
0e35fc9538 app/vmalert: remove unnecessary omitempty tag for interval param (#1649)
`omitempty` tag resulted into skipping this param on marshaling,
which was used as a checksum for groups configuration. Since on
config reload checksums are compared before applying changes,
any change to `interval` only didn't trigger config reload.

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1641
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2021-09-23 17:55:59 +03:00
Aaron France
6f061dab19 fix: typo in vmagent.md (#1642) 2021-09-23 16:51:02 +03:00
Aliaksandr Valialkin
176348cbcc vendor: make vendor-update 2021-09-23 15:05:27 +03:00
Aliaksandr Valialkin
a0313c046b lib/promscrape: add vm_promscrape_max_scrape_size_exceeded_errors_total metric for counting of the failed scrapes due to the exceeded response size
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1639
2021-09-23 14:47:54 +03:00
Aliaksandr Valialkin
d5c2741e8f docs/CaseStudies.md: add a case study for Grammarly 2021-09-23 13:11:48 +03:00
Aliaksandr Valialkin
73cd74075d docs/Articles.md: add https://cer6erus.medium.com/superset-bi-with-victoria-metrics-a109d3e91bc6 2021-09-23 10:43:29 +03:00
Aliaksandr Valialkin
00277583f9 vendor: update github.com/valyala/gozstd from v1.12.0 to v1.13.0 2021-09-22 20:06:44 +03:00
Aliaksandr Valialkin
99a6c212e8 docs/CaseStudies.md: fix a link to third-party articles 2021-09-22 03:52:35 +03:00
Aliaksandr Valialkin
9b3d1a1996 docs/CHANGELOG.md: cut v1.66.1 2021-09-22 01:47:05 +03:00
Aliaksandr Valialkin
a13c3de36f docs/CHANGELOG.md: document 9ca1cbced1
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1635
2021-09-21 23:17:08 +03:00
Aliaksandr Valialkin
9ca1cbced1 lib/httpserver: add -enterprise and/or -cluster suffixes to short_version label of vm_app_version metric
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1635
2021-09-21 23:12:42 +03:00
Aliaksandr Valialkin
207c5760ce lib/promrelabel: fix parsing regex: true in relabeling rules 2021-09-21 23:00:53 +03:00
Aliaksandr Valialkin
9884a55f3c vendor: temporarily stick to v0.93.3 for cloud.google.com/go until the binary size bloat issue is resolved
This returns back VictoriaMetrics binary size from 24Mb to 18Mb.

See https://github.com/googleapis/google-cloud-go/issues/4783
2021-09-21 18:13:47 +03:00
Roman Khavronenko
ac1abe2faf app/vmalert: support http.pathPrefix flag in UI (#1636)
The change makes UI to respect `http.pathPrefix` flag
for API or navigation items links.
2021-09-21 14:41:01 +03:00
Aliaksandr Valialkin
a22aa0608b app/vmselect: fix accessing /graphite/* endpoints 2021-09-21 13:56:35 +03:00
Aliaksandr Valialkin
94148d5ad7 docs/vmagent.md: typo fix 2021-09-20 16:49:20 +03:00
Aliaksandr Valialkin
51657b1e04 docs/vmagent.md: typo fixes in Prometheus staleness markers docs 2021-09-20 16:44:09 +03:00
Aliaksandr Valialkin
76811c2f60 docs/CHANGELOG.md: cut v1.66.0 2021-09-20 15:20:25 +03:00
Nikolay
ad08d9dfc0 changes protoparser apis for accepting reading from io.Reader (#1624)
adds InsertHandlerForReader apis to vmagent
2021-09-20 14:49:28 +03:00
Aliaksandr Valialkin
15ea4c6dae vendor: make vendor-update 2021-09-20 14:38:55 +03:00
n4mine
1ac8d55147 fix: typo, dddresses -> addresses (#1630) 2021-09-20 14:28:59 +03:00
Aliaksandr Valialkin
a06ff456f8 docs/Articles.md: add a link to Open-source strategy at VictoriaMetrics 2021-09-20 11:46:10 +03:00
Aliaksandr Valialkin
9a3d0c43b5 app/vmselect/promql: add quantiles_over_time("phiLabel", phi1, ..., phiN, m[d]) function for calculating multiple quantiles at once 2021-09-17 23:35:10 +03:00
Aliaksandr Valialkin
e1e5a20b36 docs/CHANGELOG.md: document 0e09fdb8b0 2021-09-17 18:47:06 +03:00
Nikolay
0e09fdb8b0 makes filters optional for ec2 api requests (#1627)
filters can be applied only for DescribeInstances requests, like prometheus does.
related issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1626
2021-09-17 18:00:37 +03:00
Aliaksandr Valialkin
2951dd0a57 app/vmselect/promql: add histogram_quantiles("phiLabel", phi1, ..., phiN, buckets) function
This function calculates multiple quantiles over the given buckets at once

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1573
2021-09-17 13:32:39 +03:00
Aliaksandr Valialkin
8c504d6efa docs/CHANGELOG.md: document the change in enterprise apps, which allows passing -version without -eula flag
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1621
2021-09-17 12:37:45 +03:00
Aliaksandr Valialkin
5a44be0e52 app/vmselect/promql: optimize quantiles() calculation
Calculate quantiles in one go instead of calculating each quantile individually
2021-09-17 12:33:42 +03:00
Aliaksandr Valialkin
948fb638f5 docs/FAQ.md: extend VictoriaMetrics vs TimescaleDB section with real user experience
See also https://github.com/timescale/promscale/issues/427 , which is mentioned in the https://abiosgaming.com/press/high-cardinality-aggregations/
2021-09-16 20:46:25 +03:00
Roman Khavronenko
b75455c650 vmalert: add new metric vmalert_remotewrite_flush_duration_seconds (#1622) 2021-09-16 14:00:16 +03:00
Roman Khavronenko
f83fa31985 docs: fix indentation for FAQ document (#1620) 2021-09-16 13:59:22 +03:00
f41gh7
9375b60c5f adds stub for functions api 2021-09-16 13:49:52 +03:00
Aliaksandr Valialkin
e60dfc96ff app/vmselect/promql: add mad(q) and outliers_mad(tolerance, q) functions to MetricsQL 2021-09-16 13:33:53 +03:00
Aliaksandr Valialkin
eca75cc650 app/vmselect/prometheus: make more clear log messages for errors during sending data to remote clients 2021-09-16 12:56:58 +03:00
Aliaksandr Valialkin
26cd0d36b4 vendor: make vendor-update 2021-09-15 18:22:59 +03:00
Aliaksandr Valialkin
44b01fff13 app/{vminsert,vmselect}: automatically add missing port in -storageNode lists passed to vminsert and vmselect
This should simplify manual setup of the cluster according to https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-setup
2021-09-15 18:08:30 +03:00
Aliaksandr Valialkin
06ed694ad9 docs/CHANGELOG.md: document 777ff75874 2021-09-15 17:45:08 +03:00
Aliaksandr Valialkin
2f86d4cf38 app/vmui: follow-up after 777ff75874
The commit contains the following changes:

- Show vmui when requesting /graph page in order to be compatible with Prometheus datasource in Grafana.
- Properly encode query args at vmui url.
- Set the number of points on the graph to the number of horizontal pixels divided by 2. Previously it was hardcoded to 30.
- Do not save server url to persistent storage at browser, since it should be always obtained from the url.
- Run `make vmui-update` for updating vmui embedded into VictoriaMetrics.
2021-09-15 17:40:48 +03:00
Yury Molodov
777ff75874 vmui: change query params compatible with prometheus (#1619)
* feat: change url params for compatible prometheus

* style: add comment for TimeParams

* fix: change get default server for single version

* fix: change function for get query string value
2021-09-15 09:42:49 +03:00
Aliaksandr Valialkin
cf9efde50c vendor: update github.com/valyala/quicktemplate from v1.6.3. to v1.7.0 2021-09-15 09:34:07 +03:00
Aliaksandr Valialkin
3cba77765a vendor: update github.com/VictoriaMetrics/fastcache from v1.6.0 to v1.7.0 2021-09-15 09:34:07 +03:00
Aliaksandr Valialkin
77682f516a vendor: update github.com/VictoriaMetrics/fasthttp from v1.0.16 to v1.1.0 2021-09-15 09:34:07 +03:00
Aliaksandr Valialkin
68ea3d18f7 vendor: update github.com/valyala/histogram from v1.1.2 to v1.2.0
This fixes the non-repeatable quantile_over_time() results when the number of input samples exceeds 1000.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1612
2021-09-15 09:34:07 +03:00
Roman Khavronenko
ecd3069b6c vmalert: create basic auth config only if args aren't empty (#1618)
* vmalert: create basic auth config only if args aren't empty

follow-up after 68721f6

* vmalert: make lint happy
2021-09-15 01:53:31 +03:00
Roman Khavronenko
84b41e498f docs: add "Choosing a Time Series Database for High Cardinality Aggregations" article (#1617) 2021-09-15 01:51:56 +03:00
Aliaksandr Valialkin
3e1683756b docs/vmalert.md: follow-up after 68721f6e7d
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1608
2021-09-14 14:47:47 +03:00
Roman Khavronenko
68721f6e7d vmalert: support bearer token for datasource, remotewrite and remoteread (#1614)
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1608
2021-09-14 14:32:06 +03:00
Dima Lazerka
56069a3022 Remove port 3000 and add https to play-grafana link (#1616)
* Remove port 3000 and add https to play-grafana link

* Fix typo

Co-authored-by: Dzmitry Lazerka <dlazerka@gmail.com>
2021-09-14 14:24:30 +03:00
Aliaksandr Valialkin
8f685d81c6 lib/storage: follow up after 00cbb099b6 2021-09-14 14:16:25 +03:00
faceair
00cbb099b6 lib/storage: optimize convert multiple values regexp filter to composite tag filter (#1610)
* lib/storage: optimize convert multiple values regexp filter to composite tag filter

* Apply suggestions from code review

Co-authored-by: Aliaksandr Valialkin <valyala@gmail.com>
2021-09-14 12:47:07 +03:00
dependabot[bot]
bc2d05be8e build(deps): bump codecov/codecov-action from 2.0.3 to 2.1.0 (#1615)
Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 2.0.3 to 2.1.0.
- [Release notes](https://github.com/codecov/codecov-action/releases)
- [Changelog](https://github.com/codecov/codecov-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/codecov/codecov-action/compare/v2.0.3...v2.1.0)

---
updated-dependencies:
- dependency-name: codecov/codecov-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-09-14 12:23:01 +03:00
Aliaksandr Valialkin
adedc83b3b app/vmauth: do not log invalid auth tokens by default for security reasons
The logging can be enabled by passing `-logInvalidAuthTokens` command-line flag to vmauth
2021-09-14 12:20:03 +03:00
Aliaksandr Valialkin
e46bd9e47f docs/Single-server-VictoriaMetrics.md: link to cardinality limiter docs in vmagent 2021-09-13 21:26:59 +03:00
Aliaksandr Valialkin
07b9c7994f docs/vmagent.md: mention out of order sample errors, which are typically emitted by Thanos, Cortex or Prometheus 2021-09-13 19:36:31 +03:00
Aliaksandr Valialkin
8a6a36429a app/vminsert/netstorage: disable rerouting by default
Production clusters work more stable with the disabled rerouting during rolling restarts and/or
during spikes in time series churn rate. So it would be better disabling the rerouting by default.

The re-routing can be enabled by passing `-disableRerouting=false` command-line flag to `vminsert` nodes.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/791
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1054
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1165
2021-09-13 18:51:56 +03:00
Aliaksandr Valialkin
c4f11a49f8 docs/CHANGELOG.md: document 5494bc02a6 2021-09-13 17:11:23 +03:00
Aliaksandr Valialkin
7f0a8d4bdb docs: consistency renaming: Influx -> InfluxDB 2021-09-13 17:05:16 +03:00
Aliaksandr Valialkin
143a3b34ee app/vmui/Dockerfile-web: update Go builder from 1.16.7 to 1.17.1 and Alpine base image from 3.14.1 to 3.14.2 2021-09-13 17:05:16 +03:00
Roman Khavronenko
5494bc02a6 vmalert: add flag to limit the max value for auto-resovle duration for alerts (#1609)
* vmalert: add flag to limit the max value for auto-resovle duration for alerts

The new flag `rule.maxResolveDuration` suppose to limit max value for
alert.End param, which is used by notifiers like Alertmanager for alerts auto resolve.

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1586
2021-09-13 15:48:18 +03:00
Aliaksandr Valialkin
b9727a36dc docs/vmbackup.md: update the outdated link to vmbackupmanager 2021-09-13 14:16:53 +03:00
Roman Khavronenko
75f35c3b11 vmalert: display extra filter labels in UI (#1613) 2021-09-13 14:11:38 +03:00
Aliaksandr Valialkin
d1a16e0891 app/vmselect/promql: use Prometheus-compatible label value formatting for count_values function 2021-09-13 13:48:06 +03:00
Aliaksandr Valialkin
fb6ed0ce19 lib/promscrape/discovery/docker: support host networking mode
See https://github.com/prometheus/prometheus/issues/9116
2021-09-13 13:30:16 +03:00
Aliaksandr Valialkin
6295861acd lib/promscrape/discovery/kubernetes: properly use https scheme for wildcard TLS certificates in ingress target discovery 2021-09-13 13:03:42 +03:00
Aliaksandr Valialkin
2814388891 vendor: make vendor-update 2021-09-12 15:26:44 +03:00
Aliaksandr Valialkin
2394b5018b deployment/docker: update Go builder from v1.17.0 to v1.17.1
See https://github.com/golang/go/issues?q=milestone%3AGo1.17.1+label%3ACherryPickApproved
2021-09-12 15:23:53 +03:00
Aliaksandr Valialkin
728c4c3841 lib/promscrape: generate scrape_timeout_seconds metric per each scrape target in the same way as Prometheus 2.30 does
See https://github.com/prometheus/prometheus/pull/9247
2021-09-12 15:20:44 +03:00
Aliaksandr Valialkin
0b4eb0fa7d lib/promscrape: make fmt 2021-09-12 13:34:15 +03:00
Aliaksandr Valialkin
48e3e6c8df lib/promscrape: add ability to configure scrape_timeout and scrape_interval via relabeling
See https://github.com/prometheus/prometheus/pull/8911
2021-09-12 13:33:41 +03:00
Aliaksandr Valialkin
f3e89754a9 lib/promscrape: reduce CPU usage for common case when calculating scrape_series_added metric
Also reduce CPU usage when applying `series_limit` to scrape targets with constant set of metrics.

The main idea is to perform the calculations on scrape_series_added and series_limit
only if the set of metrics exposed by the target has been changed.
Scrape targets rarely change the set of exposed metrics,
so this optimization should reduce CPU usage in general case.
2021-09-12 12:53:14 +03:00
Aliaksandr Valialkin
674a6eee6c docs/Single-server-VictoriaMetrics.md: refer to relabeling section for vmagent
This removes duplicate docs about additional relabeling actions supported by VictoriaMetrics components
2021-09-12 11:39:19 +03:00
Aliaksandr Valialkin
77168e3e94 docs/vmagent.md: sync with app/vmagent/README.md by running make docs-sync 2021-09-11 11:04:42 +03:00
Aliaksandr Valialkin
cebcb15ba4 lib/storage: verify that the tsidsFound contain the needed tsids in tests added at f4dead529f 2021-09-11 10:57:13 +03:00
Aliaksandr Valialkin
9286107e82 lib/promscrape: send stale markers for disappeared metrics like Prometheus does 2021-09-11 10:51:04 +03:00
Aliaksandr Valialkin
cfed015bb6 docs/vmalert.md: typo fix in Multitenancy chapter 2021-09-10 17:57:14 +03:00
Aliaksandr Valialkin
f4dead529f lib/storage: properly search series by multiple tag filters matching empty labels such as foo{bar=~"baz|",x=~"y|"}
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1601
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/395
2021-09-09 21:09:21 +03:00
Aliaksandr Valialkin
ea943911bc app/vmselect/promql: keep metric name in rollup_candlestick results, since they don't change the original series meaning
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1600
2021-09-09 19:21:18 +03:00
Aliaksandr Valialkin
f27980dcb3 docs/CHANGELOG.md: typo fix 2021-09-09 18:57:30 +03:00
Aliaksandr Valialkin
4aeb8db83f lib/promscrape: add ability to set series_limit and stream_parse options via relabeling
This allows managing these options on a per-target basis.

Typical use case: to manage these options for pods via Kubernetes annotations.
2021-09-09 18:49:39 +03:00
Aliaksandr Valialkin
468f941f7e lib/promscrape: add the actual job name to the labels of promscrape_series_limit_rows_dropped_total metric 2021-09-09 17:37:37 +03:00
Aliaksandr Valialkin
086b5d0cf1 lib/promscrape: add scrape_ prefix to job and target labels exported by promscrape_series_limit_rows_dropped_total metric
This is needed in order to prevent from possible clash with the corresponding (job, target) labels for the job, which scrapes this metric.
2021-09-09 17:29:21 +03:00
Aliaksandr Valialkin
d2708a1fb7 docs/vmagent.md: typo fix in Relabeling chapter 2021-09-09 16:39:40 +03:00
Denys Holius
abba6e8370 Bump alpine linux to latest (#1607) 2021-09-09 16:29:15 +03:00
Aliaksandr Valialkin
d6bd956930 lib/promrelabel: add keep_metrics and drop_metrics actions to relabeling rules
These actions simlify metrics filtering. For example,

- action: keep_metrics
  regex: 'foo|bar|baz'

would leave only metrics with `foo`, `bar` and `baz` names, while the rest of metrics will be deleted.

The commit also makes possible to split long regexps into multiple lines. For example, the following config is equivalent to the config above:

- action: keep_metrics
  regex:
  - foo
  - bar
  - baz
2021-09-09 16:18:21 +03:00
Aliaksandr Valialkin
3a827b98cd docs/vmalert.md: make docs-sync after 21f022e5f0 2021-09-09 16:16:25 +03:00
Aliaksandr Valialkin
a8053d9fc6 docs/MetricsQL.md: add a link to VictoriaMetrics github 2021-09-08 00:14:59 +03:00
Aliaksandr Valialkin
e84fa9eb38 app/vmalert: document GroupAlerts
This makes golint happy
2021-09-07 22:50:08 +03:00
Aliaksandr Valialkin
e6c9869d86 app/vmalert: follow-up after 21f022e5f0 2021-09-07 22:43:37 +03:00
Roman Khavronenko
21f022e5f0 vmalert: add initial UI implementation (#1602)
New UI pages:
/ - welcome page with API handlers list;
/groups - list of all rules per group;
/alerts - list of all active alerts;
/groupID/alertID/status - status of the active alert;
2021-09-07 22:39:22 +03:00
Aliaksandr Valialkin
6fbaf8f978 docs/CHANGELOG.md: document 42e07cfaea 2021-09-07 22:34:39 +03:00
dependabot[bot]
0c7110d1a5 build(deps): bump github.com/aws/aws-sdk-go from 1.40.34 to 1.40.37 (#1598)
Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.40.34 to 1.40.37.
- [Release notes](https://github.com/aws/aws-sdk-go/releases)
- [Changelog](https://github.com/aws/aws-sdk-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws/aws-sdk-go/compare/v1.40.34...v1.40.37)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-09-07 20:55:49 +03:00
Aliaksandr Valialkin
e34f7081d0 docs/vmagent.md: add API path for Prometheus text exposition format 2021-09-07 16:14:51 +03:00
Aliaksandr Valialkin
0166eae7c4 docs/Articles.md: add a link to case studies 2021-09-06 10:57:59 +03:00
Aliaksandr Valialkin
5e3ef376b5 docs/Articles.md: add an url to https://www.vultr.com/docs/install-and-configure-victoriametrics-on-debian 2021-09-03 11:49:37 +03:00
Aliaksandr Valialkin
ef27786e37 docs/Single-server-VictoriaMetrics.md: fix a link multitenancy docs for cluster version in VictoriaMetrics 2021-09-02 17:43:35 +03:00
Aliaksandr Valialkin
f529058d3a docs/CHANGELOG.md: cut v1.65.0 2021-09-01 17:12:32 +03:00
Aliaksandr Valialkin
bddd1c35e2 docs/FAQ.md: add questions on how to migrate data from various systems (Prometheus, InfluxDB, OpenTSDB, Graphite) to VictoriaMetrics 2021-09-01 16:47:30 +03:00
Aliaksandr Valialkin
ed818fceef docs: update -help output for victoria-metrics and vmagent after f77dde837a 2021-09-01 16:34:32 +03:00
Aliaksandr Valialkin
ae90225b46 .github/dependabot.yml: increase check intervals for gomod and docker ecosystems from daily to weekly
Daily checks are too verbose and result into too many automatic pull requests and commits
2021-09-01 16:07:00 +03:00
Aliaksandr Valialkin
f77dde837a lib/promscrape: add the ability to limit the number of unique series per each scrape target
The number of series per target can be limited with the following options:

* Global limit with `-promscrape.maxSeriesPerTarget` command-line option.
* Per-target limit with `max_series: N` option in `scrape_config` section.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1561
2021-09-01 16:03:59 +03:00
Roman Khavronenko
867e426070 dashboards: bump vmagent version requirement 2021-09-01 14:20:50 +03:00
dependabot[bot]
c2d17ec655 build(deps): bump github.com/aws/aws-sdk-go from 1.40.33 to 1.40.34 (#1591)
Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.40.33 to 1.40.34.
- [Release notes](https://github.com/aws/aws-sdk-go/releases)
- [Changelog](https://github.com/aws/aws-sdk-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws/aws-sdk-go/compare/v1.40.33...v1.40.34)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-09-01 12:53:10 +03:00
dependabot[bot]
4bebafd885 build(deps): bump google.golang.org/api from 0.55.0 to 0.56.0 (#1590)
Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.55.0 to 0.56.0.
- [Release notes](https://github.com/googleapis/google-api-go-client/releases)
- [Changelog](https://github.com/googleapis/google-api-go-client/blob/master/CHANGES.md)
- [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.55.0...v0.56.0)

---
updated-dependencies:
- dependency-name: google.golang.org/api
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-09-01 12:52:17 +03:00
dependabot[bot]
1a6b9157e2 build(deps): bump cloud.google.com/go/storage from 1.16.0 to 1.16.1 (#1589)
Bumps [cloud.google.com/go/storage](https://github.com/googleapis/google-cloud-go) from 1.16.0 to 1.16.1.
- [Release notes](https://github.com/googleapis/google-cloud-go/releases)
- [Changelog](https://github.com/googleapis/google-cloud-go/blob/master/CHANGES.md)
- [Commits](https://github.com/googleapis/google-cloud-go/compare/pubsub/v1.16.0...storage/v1.16.1)

---
updated-dependencies:
- dependency-name: cloud.google.com/go/storage
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-09-01 12:51:42 +03:00
Aliaksandr Valialkin
111ea89a7d docs: make docs-sync 2021-09-01 12:02:34 +03:00
Aliaksandr Valialkin
9e41b05401 docs/CHANGELOG.md: document eff940aa76 2021-09-01 12:00:02 +03:00
Aliaksandr Valialkin
fce87bfe8d docs/CHANGELOG.md: document 7c70dcbe3b
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1471
2021-09-01 11:56:23 +03:00
Roman Khavronenko
0f4bcc00b2 Single dashboards upd (#1593)
* dasbhoard: replace `null` datasources

null datasource value may confuse Grafana and make it drop panel query in some
versions.

* docker: bump grafana image version

* dashboards: add URL variable selector to vmagent dashboard

* dashboards: add new panel `Remote write connection saturation` to vmagent dashboard

* alerts: add new alert for `Remote write connection saturation` panel of vmagent dashboard

* dashboards: add "Logging rate" panel to vmagent dashboard
2021-09-01 11:46:22 +03:00
Roman Khavronenko
de26b1d4a2 vmctl: update README and flags description (#1588)
The purpose of update is to make README and flags description more
clear to the reader. Especially, show that vm-account-id flag is required
for clustered version of VM.
2021-09-01 09:31:44 +03:00
Roman Khavronenko
2ed2878a57 docs: fix the link for cluster docker compose 2021-09-01 09:21:45 +03:00
Roman Khavronenko
0d6735106b docs: update docker env description 2021-09-01 09:18:56 +03:00
Roman Khavronenko
cfb6436be5 Vmalert extra params (#1587)
* vmalert: allow extra GET params in datasource package

ExtraParams will be added as GET params to every HTTP request made by datasource.
The `roundDigits` param, for example, was substituted by corresponding extra param.

* vmalert: add nocache=1 param for replay process

The `nocache=1` param is VictoriaMetrics specific parameter which prevents it
from caching and boundaries aligning for queries. We set it to avoid cache
pollution in `replay` mode and also to avoid unnecessary time range boundaries
alignment.

* vmalert: mention nocache=1 in replay description

* vmalert: fix bug with unused param
2021-08-31 14:57:47 +03:00
Nikolay
7c70dcbe3b adds external_labels per group for vmalert (#1485)
* adds external_label per group for vmalert
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1471
2021-08-31 14:52:34 +03:00
Roman Khavronenko
eff940aa76 Vmalert metrics update (#1580)
* vmalert: remove `vmalert_execution_duration_seconds` metric

The summary for `vmalert_execution_duration_seconds` metric gives no additional
value comparing to `vmalert_iteration_duration_seconds` metric.

* vmalert: update config reload success metric properly

Previously, if there was unsuccessfull attempt to reload config and then
rollback to previous version - the metric remained set to 0.

* vmalert: add Grafana dashboard to overview application metrics

* docker: include vmalert target into list for scraping

* vmalert: extend notifier metrics with addr label

The change adds an `addr` label to metrics for alerts_sent and alerts_send_errors
to identify which exact address is having issues.
The according change was made to vmalert dashboard.

* vmalert: update documentation and docker environment for vmalert's dashboard

Mention Grafana's dashboard in vmalert's README in a new section #Monitoring.

Update docker-compose env to automatically add vmalert's dashboard.
Update docker-compose README with additional info about services.
2021-08-31 12:28:02 +03:00
Aliaksandr Valialkin
f41b3d6118 vendor: make vendor-update 2021-08-31 12:03:21 +03:00
Aliaksandr Valialkin
6e085e6dac docs/Single-server-VictoriaMetrics.md: remove outdated link to VictoriaMetrics wiki
VictoriaMetrics wiki became outdated after publishing all the docs at https://docs.victoriametrics.com
2021-08-31 11:48:32 +03:00
Aliaksandr Valialkin
8b228a5873 docs/CHANGELOG.md: add a link to Prometheus staleness tracking 2021-08-31 11:48:32 +03:00
dependabot[bot]
6c388f63b3 build(deps): bump github.com/aws/aws-sdk-go from 1.40.30 to 1.40.33 (#1582)
Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.40.30 to 1.40.33.
- [Release notes](https://github.com/aws/aws-sdk-go/releases)
- [Changelog](https://github.com/aws/aws-sdk-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws/aws-sdk-go/compare/v1.40.30...v1.40.33)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-31 11:10:31 +03:00
dependabot[bot]
525e6ae1b8 build(deps): bump google.golang.org/api from 0.54.0 to 0.55.0 (#1583)
Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.54.0 to 0.55.0.
- [Release notes](https://github.com/googleapis/google-api-go-client/releases)
- [Changelog](https://github.com/googleapis/google-api-go-client/blob/master/CHANGES.md)
- [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.54.0...v0.55.0)

---
updated-dependencies:
- dependency-name: google.golang.org/api
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-31 11:09:55 +03:00
dependabot[bot]
462fa70967 build(deps): bump github.com/klauspost/compress from 1.13.4 to 1.13.5 (#1584)
Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.13.4 to 1.13.5.
- [Release notes](https://github.com/klauspost/compress/releases)
- [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml)
- [Commits](https://github.com/klauspost/compress/compare/v1.13.4...v1.13.5)

---
updated-dependencies:
- dependency-name: github.com/klauspost/compress
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-31 11:01:58 +03:00
Aliaksandr Valialkin
5c63d69454 lib/promscrape/discovery/kubernetes: return back support role: endpointslices, since it is used by VictoriaMetrics operator
This is a follow up commit after 31b42b30b6
2021-08-29 12:37:03 +03:00
Aliaksandr Valialkin
db330232ac lib/protoparser/opentsdb: follow-up after 8ee75ca45a 2021-08-29 11:49:21 +03:00
envzhu
8ee75ca45a lib/protoparser/opentsdb: accept multiple spaces between fields in a row as a deliminator. (#1575) 2021-08-29 11:38:32 +03:00
Aliaksandr Valialkin
31b42b30b6 lib/promscrape/discovery/kubernetes: rename role: endpointslices to role: endpointslice to be consistent with Prometheus
See 2ec6c7dbb8/discovery/kubernetes/kubernetes.go (L99)
2021-08-29 11:23:08 +03:00
Aliaksandr Valialkin
2e001db4de lib/promscrape/discovery/kubernetes: use v1 API instead of v1beta1 API for role: ingress and role: endpointslices
This should fix service discovery for these roles in Kubernetes v1.22 and newer versions.
See https://kubernetes.io/docs/reference/using-api/deprecation-guide/#ingress-v122

The corresponding change in Prometheus - https://github.com/prometheus/prometheus/pull/9205
2021-08-29 11:16:59 +03:00
Aliaksandr Valialkin
189507d9d0 docs/Single-server-VictoriaMetrics.md: mention that downsampling doesnt improve query performance on high churn rate 2021-08-27 18:50:26 +03:00
Aliaksandr Valialkin
5ea689d61b app/vmselect/promql: add quantile("phiLabel", phi1, ..., phiN, q) aggregate function to MetricsQL
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1573
2021-08-27 18:37:20 +03:00
Aliaksandr Valialkin
bec18e4fe9 app/vmselect: add -search.disableAutoCacheReset command-line option for disabling automatic cache reset when a sample with old timestamp outside -search.cacheTimestampOffset is inserted
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1570
2021-08-27 17:15:31 +03:00
Aliaksandr Valialkin
67189be1cb docs/{vmgateway,vmbackupmanager}: mention that enterprise binaries are free for download and evaluation 2021-08-27 14:54:09 +03:00
Aliaksandr Valialkin
321da535fa docs: link to active time series, churn rate and high cardinality questions 2021-08-27 14:44:53 +03:00
Aliaksandr Valialkin
c8c153fb91 docs/CHANGELOG.md: document the bugfix for possible timeout error in vmbackupmanager when making snapshots
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1571
2021-08-27 13:04:06 +03:00
Aliaksandr Valialkin
2bc79042f6 docs: mention that enterprise binaries can be downloaded and evaluated for free 2021-08-27 12:48:14 +03:00
Aliaksandr Valialkin
4b3877b798 vendor: make vendor-update 2021-08-26 09:42:23 +03:00
Aliaksandr Valialkin
2bef940add docs/vmagent.md: document the ability to load scrape configs from multiple files
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1559
2021-08-26 09:13:14 +03:00
Aliaksandr Valialkin
10f960fa0c lib/promscrape: add ability to load scrape configs from multiple files
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1559
2021-08-26 08:51:16 +03:00
Aliaksandr Valialkin
25ee4a3644 vendor: make vendor-update 2021-08-25 13:41:02 +03:00
dependabot[bot]
66626db92f build(deps): bump codecov/codecov-action from 2.0.2 to 2.0.3 (#1563)
Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 2.0.2 to 2.0.3.
- [Release notes](https://github.com/codecov/codecov-action/releases)
- [Changelog](https://github.com/codecov/codecov-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/codecov/codecov-action/compare/v2.0.2...v2.0.3)

---
updated-dependencies:
- dependency-name: codecov/codecov-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-25 13:40:21 +03:00
Aliaksandr Valialkin
e24203cde8 docs/CHANGELOG.md: document 48f33d098b 2021-08-25 13:31:35 +03:00
benclive
48f33d098b Remove trailing slash for URLPrefixes with specific path (#1554) 2021-08-25 13:28:50 +03:00
Aliaksandr Valialkin
9fc9d76a7f docs/Cluster-VictoriaMetrics.md: mention that the -replicationFactor at vmselect is an optional parameter 2021-08-25 13:10:57 +03:00
Aliaksandr Valialkin
c27ee35c5c lib/promscrape: expose promscrape_discovery_http_errors_total metric for tracking errors per each http_sd config 2021-08-25 13:05:49 +03:00
Aliaksandr Valialkin
ffc0ab1774 lib/{mergeset,storage}: improve the detection of the needed free space for background merge
This should prevent from possible out of disk space crashes during big merges.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1560
2021-08-25 09:35:44 +03:00
Aliaksandr Valialkin
a287a48634 docs/FAQ.md: add more entries for frequently asked questions
The following topics are covered:

* Active time series
* High cardinality
* High churn rate
* Slow inserts
2021-08-24 11:34:44 +03:00
Aliaksandr Valialkin
8358890e33 docs/MetricsQL.md: typo fix: histogram_qunatile -> histogram_quantile 2021-08-23 23:08:16 +03:00
Aliaksandr Valialkin
6c5760db9c app/vmselect/promql: make fmt after 0078486ea7 2021-08-23 23:06:00 +03:00
Aliaksandr Valialkin
7c57745f40 docs/MetricsQL.md: fix the indentation for median function 2021-08-23 12:04:31 +03:00
Aliaksandr Valialkin
a78672f95a docs/MetricsQL.md: typo fix: convesions->conversions 2021-08-23 12:02:03 +03:00
Aliaksandr Valialkin
17a1241022 docs/MetricsQL.md: typo fixes 2021-08-23 11:59:12 +03:00
Aliaksandr Valialkin
60ac3e1e46 docs/MetricsQL.md: rehaul the documentation on MetricsQL
* Document all the functions supported by MetricsQL, including PromQL functions
* Group functions by their type: rollup functions, transform functions, label manipulation functions and aggregate functions.
* Document implicit query transformations.
2021-08-23 11:45:52 +03:00
Aliaksandr Valialkin
0078486ea7 app/vmselect/promql: rename sign() function to sgn() in order to be consistent with Prometheus
See https://github.com/prometheus/prometheus/pull/8457 for details.
2021-08-23 11:45:51 +03:00
Aliaksandr Valialkin
69c291353b deployment/docker: update Go builder from Go1.16.0 to Go1.17.0
This improves data ingestion and query performance by up to 5% according to benchmarks.

See https://go.dev/blog/go1.17
2021-08-21 22:20:49 +03:00
Aliaksandr Valialkin
d5622b32e2 lib/promscrape: reduce memory and CPU usage when Prometheus staleness tracking is enabled for metrics from deleted / disappeared scrape targets
Store the scraped response body instead of storing the parsed and relabeld metrics.
This should reduce memory usage, since the response body takes less memory than the parsed and relabeled metrics.
This is especially true for Kubernetes service discovery, which adds many long labels for all the scraped metrics.

This should also reduce CPU usage, since the marshaling of the parsed
and relabeld metrics has been substituted by response body copying.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1526
2021-08-21 21:17:26 +03:00
Aliaksandr Valialkin
2288e75f03 docs/vmalert.md: run make docs-sync after 9ee3d0378f 2021-08-21 20:24:56 +03:00
Aliaksandr Valialkin
6ffbb46aef docs/CHANGELOG.md: document 9ee3d0378f 2021-08-21 20:20:08 +03:00
Aliaksandr Valialkin
89a4e8fd9b vendor: make vendor-update 2021-08-21 20:16:19 +03:00
Roman Khavronenko
9ee3d0378f vmalert: add flag disableAlertgroupLabel for disabling extra label added to series (#1534)
The new label added in https://github.com/VictoriaMetrics/VictoriaMetrics/issues/611
may negatively impact deduplication in Alertmanager. The new flag supposed to give
an option to disable adding this label.

To enable flag just add `-disableAlertgroupLabel` to binary execution command.

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1532
2021-08-21 20:08:55 +03:00
Aliaksandr Valialkin
4f3a5742eb app/vmselect/prometheus: do not extend [d] to the detected interval between samples for first_over_time(m[d])
This is for the sake of consistency with similar change for the last_over_time(m[d]) at a724229b5d
2021-08-21 19:56:14 +03:00
dependabot[bot]
41fdfdb895 build(deps): bump github.com/aws/aws-sdk-go from 1.40.25 to 1.40.26 (#1551)
Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.40.25 to 1.40.26.
- [Release notes](https://github.com/aws/aws-sdk-go/releases)
- [Changelog](https://github.com/aws/aws-sdk-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws/aws-sdk-go/compare/v1.40.25...v1.40.26)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-21 19:52:46 +03:00
Alexander Rickardsson
f4cecaf296 vmalert: accept http.StatusOK for remotewrite (#1550) 2021-08-20 11:58:32 +03:00
Aliaksandr Valialkin
f46a73dcdd lib/promscrape: use scrapeTimestamp when storing stale markers for failed scrape
This will make timestamps for stale markers more consistent for timestamps for other samples
2021-08-19 14:18:05 +03:00
Aliaksandr Valialkin
c14edc860b docs/CHANGELOG.md: document b5d6a0e499 2021-08-19 14:03:20 +03:00
Roman Khavronenko
b5d6a0e499 vmselect: update vm_request_duration_seconds value when request fails (#1537)
Before, metric `vm_request_duration_seconds` was update only on successful
attempts which could be misleading. For example, timeout errors on netstorage
request may be not accounted in the metric and won't be visible on dashboards.
Using `defer` statement to update the metric after query arguments validation
may improve the situation.
2021-08-19 13:58:54 +03:00
dependabot[bot]
cbab5f3b42 build(deps): bump github.com/aws/aws-sdk-go from 1.40.22 to 1.40.25 (#1548)
Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.40.22 to 1.40.25.
- [Release notes](https://github.com/aws/aws-sdk-go/releases)
- [Changelog](https://github.com/aws/aws-sdk-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws/aws-sdk-go/compare/v1.40.22...v1.40.25)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-19 13:56:42 +03:00
Aliaksandr Valialkin
80ddade4ed docs/CHANGELOG.md: clarify the change, which adds -search.noStaleMarkers command-line flag 2021-08-19 13:56:04 +03:00
Aliaksandr Valialkin
a724229b5d app/vmselect/promql: do not override [d] at last_over_time(m[d]) if [d] is smaller than scrape_interval
Since most users do not expect the overriding of explicitly set `[d]`.
2021-08-19 10:31:48 +03:00
Aliaksandr Valialkin
ce0c270e75 docs/CHANGELOG.md: cut v1.64.1
This is mostly bugfix release, which includes fixes for staleness handling and a security update for Alpine base image
2021-08-18 22:06:05 +03:00
Aliaksandr Valialkin
c09446a9aa lib/promscrape: send stale markers for the previously scraped metrics on failed scrapes like Prometheus does 2021-08-18 21:59:03 +03:00
Aliaksandr Valialkin
f6e6056c17 vendor: update github.com/valyala/gozstd from v1.11.0 to v1.12.0
This should improve query scalability on systems with big number of CPU cores
2021-08-18 14:57:19 +03:00
Aliaksandr Valialkin
04c3e9916d docs/CHANGELOG.md: document 06bf21c21b 2021-08-18 14:01:04 +03:00
Aliaksandr Valialkin
cdc372bb98 app/vmselect: add -search.noStaleMarkers command-line flag for disabling stale markers handling in queries
This option allows reducing CPU usage a bit when VictoriaMetrics is used
for collecting and processing non-Prometheus data. For example, InfluxDB line protocol, Graphite, OpenTSDB, CSV, etc.
2021-08-18 13:59:02 +03:00
Aliaksandr Valialkin
226143f31b lib/promscrape: add ability to disable sending Prometheus staleness markers with -promscrape.disableStaleMarkers command-line flag
This option can be useful when vmagent consumes too much additional memory
for staleness markers functionality and when staleness markers aren't needed.
2021-08-18 13:43:21 +03:00
Aliaksandr Valialkin
06bf21c21b deployment/docker: upgrade Alpine base docker image from v3.14.0 to v3.14.1
See https://www.alpinelinux.org/posts/Alpine-3.14.1-released.html

This fixes https://vuldb.com/?source_cve.180051
See also https://vuldb.com/?id.180051 and https://snyk.io/vuln/SNYK-ALPINE314-APKTOOLS-1533752
2021-08-18 11:04:11 +03:00
Aliaksandr Valialkin
db1e62495b app/vmselect/promql: add bitmap_and(), bitmap_or() and bitmap_xor() functions to MetricsQL
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1541
2021-08-17 13:21:21 +03:00
Aliaksandr Valialkin
277538d655 docs/Single-server-VictoriaMetrics.md: mention that vmctl can migrate data from OpenTSDB to VictoriaMetrics 2021-08-17 11:12:16 +03:00
Aliaksandr Valialkin
bd14b0887e app/vmselect/promql: move common condition to dropStaleNaNs in order to improve code maintainability 2021-08-17 11:01:16 +03:00
Aliaksandr Valialkin
03c959f1df lib/promscrape: stop scrapers for the removed targets before starting scrapers for the added targets
This should prevent from possible time series overlap when old target is substituted by new target (for example, during Kubernetes deployments).

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1526
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1530
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/748
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1509
2021-08-17 00:55:51 +03:00
Aliaksandr Valialkin
90434ba25b app/vmalert: mention -remoteWrite.disablePathAppend in the description for -remoteWrite.url 2021-08-16 15:22:47 +03:00
Aliaksandr Valialkin
f37b963619 app/vmalert: follow-up for 2400f85761 2021-08-16 15:20:22 +03:00
Aliaksandr Valialkin
4547d4f692 docs/CHANGELOG.md: update urls to Prometheus 2.29 release
Previously these urls were pointing to rc0 release
2021-08-16 14:53:38 +03:00
Aliaksandr Valialkin
ae9f923449 docs/CHANGELOG.md: typo fix: satureated -> saturated 2021-08-16 14:53:38 +03:00
Alexander Rickardsson
2400f85761 vmalert: enable configuring explicit path (#1536)
* vmalert: allow to disable automatically added path to remote write address via disablePathAppend flag
* docs: update docs to include remoteWrite.disablePathAppend
2021-08-16 14:20:57 +03:00
dependabot[bot]
9af8c71975 build(deps): bump github.com/aws/aws-sdk-go from 1.40.21 to 1.40.22 (#1539)
Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.40.21 to 1.40.22.
- [Release notes](https://github.com/aws/aws-sdk-go/releases)
- [Changelog](https://github.com/aws/aws-sdk-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws/aws-sdk-go/compare/v1.40.21...v1.40.22)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-16 11:31:22 +03:00
dependabot[bot]
8297ad8f03 build(deps): bump google.golang.org/api from 0.53.0 to 0.54.0 (#1538)
Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.53.0 to 0.54.0.
- [Release notes](https://github.com/googleapis/google-api-go-client/releases)
- [Changelog](https://github.com/googleapis/google-api-go-client/blob/master/CHANGES.md)
- [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.53.0...v0.54.0)

---
updated-dependencies:
- dependency-name: google.golang.org/api
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-08-16 11:30:08 +03:00
1346 changed files with 369982 additions and 58183 deletions

View File

@@ -7,7 +7,7 @@ updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "daily"
interval: "weekly"
- package-ecosystem: "bundler"
directory: "/docs"
schedule:
@@ -15,12 +15,12 @@ updates:
- package-ecosystem: "gomod"
directory: "/app/vmui/packages/vmui/web"
schedule:
interval: "daily"
interval: "weekly"
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "npm"
directory: "/app/vmui"
directory: "/app/vmui/packages/vmui"
schedule:
interval: "daily"
interval: "weekly"

View File

@@ -18,13 +18,13 @@ jobs:
with:
go-version: 1.16
id: go
- name: Dependencies
run: |
go get -u golang.org/x/lint/golint
go get -u github.com/kisielk/errcheck
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.29.0
- name: Code checkout
uses: actions/checkout@master
- name: Dependencies
run: |
make install-golint
make install-errcheck
make install-golangci-lint
- name: Build
env:
GO111MODULE: on
@@ -60,7 +60,7 @@ jobs:
GOOS=darwin go build -mod=vendor ./app/vmctl
CGO_ENABLED=0 GOOS=windows go build -mod=vendor ./app/vmagent
- name: Publish coverage
uses: codecov/codecov-action@v2.0.2
uses: codecov/codecov-action@v2.1.0
with:
file: ./coverage.txt

View File

@@ -88,6 +88,12 @@ release-snap:
snapcraft
snapcraft upload "victoriametrics_$(PKG_TAG)_multi.snap" --release beta,edge,candidate
publish-release:
git checkout $(TAG) && $(MAKE) release publish && \
git checkout $(TAG)-cluster && $(MAKE) release publish && \
git checkout $(TAG)-enterprise && $(MAKE) release publish && \
git checkout $(TAG)-enterprise-cluster && $(MAKE) release publish
release: \
release-victoria-metrics \
release-vmutils
@@ -261,7 +267,7 @@ golangci-lint: install-golangci-lint
golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m
install-golangci-lint:
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.40.1
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.43.0
install-wwhrd:
which wwhrd || GO111MODULE=off go get github.com/frapposelli/wwhrd

431
README.md
View File

@@ -12,24 +12,23 @@
VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database.
It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/), [Snap package](https://snapcraft.io/victoriametrics)
and in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics and see [how to start it](#how-to-start-victoriametrics).
If you use Ubuntu, then just run `snap install victoriametrics` in order to install and run it.
VictoriaMetrics is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
in [Docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/), in [Snap packages](https://snapcraft.io/victoriametrics)
and in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics follow [these instructions](#how-to-start-victoriametrics).
Then read [Prometheus setup](#prometheus-setup) and [Grafana setup](#grafana-setup) docs.
Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
Cluster version of VictoriaMetrics is available [here](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
See additional docs at our [Wiki](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki).
[Contact us](mailto:info@victoriametrics.com) if you need paid enterprise support for VictoriaMetrics.
See [features available for enterprise customers](https://victoriametrics.com/enterprise.html).
[Contact us](mailto:info@victoriametrics.com) if you need enterprise support for VictoriaMetrics.
See [features available in enterprise package](https://victoriametrics.com/enterprise.html).
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
## Case studies and talks
Case studies:
* [AbiosGaming](https://docs.victoriametrics.com/CaseStudies.html#abiosgaming)
* [adidas](https://docs.victoriametrics.com/CaseStudies.html#adidas)
* [Adsterra](https://docs.victoriametrics.com/CaseStudies.html#adsterra)
* [ARNES](https://docs.victoriametrics.com/CaseStudies.html#arnes)
@@ -37,11 +36,16 @@ Case studies:
* [CERN](https://docs.victoriametrics.com/CaseStudies.html#cern)
* [COLOPL](https://docs.victoriametrics.com/CaseStudies.html#colopl)
* [Dreamteam](https://docs.victoriametrics.com/CaseStudies.html#dreamteam)
* [Fly.io](https://docs.victoriametrics.com/CaseStudies.html#flyio)
* [German Research Center for Artificial Intelligence](https://docs.victoriametrics.com/CaseStudies.html#german-research-center-for-artificial-intelligence)
* [Grammarly](https://docs.victoriametrics.com/CaseStudies.html#grammarly)
* [Groove X](https://docs.victoriametrics.com/CaseStudies.html#groove-x)
* [Idealo.de](https://docs.victoriametrics.com/CaseStudies.html#idealode)
* [MHI Vestas Offshore Wind](https://docs.victoriametrics.com/CaseStudies.html#mhi-vestas-offshore-wind)
* [Razorpay](https://docs.victoriametrics.com/CaseStudies.html#razorpay)
* [Percona](https://docs.victoriametrics.com/CaseStudies.html#percona)
* [Sensedia](https://docs.victoriametrics.com/CaseStudies.html#sensedia)
* [Smarkets](https://docs.victoriametrics.com/CaseStudies.html#smarkets)
* [Synthesio](https://docs.victoriametrics.com/CaseStudies.html#synthesio)
* [Wedos.com](https://docs.victoriametrics.com/CaseStudies.html#wedoscom)
* [Wix.com](https://docs.victoriametrics.com/CaseStudies.html#wixcom)
@@ -53,188 +57,101 @@ See also [articles and slides about VictoriaMetrics from our users](https://docs
## Prominent features
* VictoriaMetrics can be used as long-term storage for Prometheus or for [vmagent](https://docs.victoriametrics.com/vmagent.html).
See [these docs](#prometheus-setup) for details.
* VictoriaMetrics supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
* VictoriaMetrics implements [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) query language backwards compatible with PromQL.
* VictoriaMetrics provides global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics.
Later this data may be queried via a single query.
* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
[Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
* [Uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893)
and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
when dealing with millions of unique time series (aka high cardinality).
* Optimized for time series with high churn rate. Think about [prometheus-operator](https://github.com/coreos/prometheus-operator) metrics from frequent deployments in Kubernetes.
* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
may be crammed into limited storage comparing to TimescaleDB
and [up to 7x less storage space is required comparing to Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
* Optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc).
See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
[comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
* Easy operation:
VictoriaMetrics has the following prominent features:
* It can be used as long-term storage for Prometheus. See [these docs](#prometheus-setup) for details.
* It can be used as drop-in replacement for Prometheus in Grafana, because it supports [Prometheus querying API](#prometheus-querying-api-usage).
* It can be used as drop-in replacement for Graphite in Grafana, because it supports [Graphite API](#graphite-api-usage).
* It features easy setup and operation:
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
* All the configuration is done via explicit command-line flags with reasonable defaults.
* All the data is stored in a single directory pointed by `-storageDataPath` command-line flag.
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
to S3 or GCS with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html).
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
* Storage is protected from corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* Supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
* [Metrics from Prometheus exporters](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format)
such as [node_exporter](https://github.com/prometheus/node_exporter). See [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter) for details.
* [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
* [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) over HTTP, TCP and UDP.
* [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
if `-graphiteListenAddr` is set.
* [OpenTSDB put message](#sending-data-via-telnet-put-protocol) if `-opentsdbListenAddr` is set.
* [HTTP OpenTSDB /api/put requests](#sending-opentsdb-data-via-http-apiput-requests) if `-opentsdbHTTPListenAddr` is set.
* [JSON line format](#how-to-import-data-in-json-line-format).
* [Native binary format](#how-to-import-data-in-native-format).
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) to S3 or GCS can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools. See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
* It implements PromQL-based query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
* It provides global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics. Later this data may be queried via a single query.
* It provides high performance and good vertical and horizontal scalability for both [data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4). It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f) when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
* It is optimized for time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate).
* It provides high data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) may be crammed into limited storage comparing to TimescaleDB and [up to 7x less storage space is required compared to Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB. See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae), [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683) and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* It supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
* [Metrics scraping from Prometheus exporters](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
* [Prometheus remote write API](#prometheus-setup).
* [Prometheus exposition format](#how-to-import-data-in-prometheus-exposition-format).
* [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) over HTTP, TCP and UDP.
* [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon).
* [OpenTSDB put message](#sending-data-via-telnet-put-protocol).
* [HTTP OpenTSDB /api/put requests](#sending-opentsdb-data-via-http-apiput-requests).
* [JSON line format](#how-to-import-data-in-json-line-format).
* [Arbitrary CSV data](#how-to-import-csv-data).
* Supports metrics' relabeling. See [these docs](#relabeling) for details.
* Can deal with high cardinality and high churn rate issues using [series limiter](#cardinality-limiter).
* Ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various Enterprise workloads.
* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
* See also technical [Articles about VictoriaMetrics](https://docs.victoriametrics.com/Articles.html).
* [Native binary format](#how-to-import-data-in-native-format).
* It supports metrics' relabeling. See [these docs](#relabeling) for details.
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various [Enterprise workloads](https://victoriametrics.com/enterprise.html).
* It has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
See also [various Articles about VictoriaMetrics](https://docs.victoriametrics.com/Articles.html).
## Operation
### Table of contents
* [How to start VictoriaMetrics](#how-to-start-victoriametrics)
* [Environment variables](#environment-variables)
* [Configuration with snap package](#configuration-with-snap-package)
* [Prometheus setup](#prometheus-setup)
* [Grafana setup](#grafana-setup)
* [How to upgrade VictoriaMetrics](#how-to-upgrade-victoriametrics)
* [How to apply new config to VictoriaMetrics](#how-to-apply-new-config-to-victoriametrics)
* [How to scrape Prometheus exporters such as node_exporter](#how-to-scrape-prometheus-exporters-such-as-node-exporter)
* [How to send data from InfluxDB-compatible agents such as Telegraf](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
* [How to send data from Graphite-compatible agents such as StatsD](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
* [Querying Graphite data](#querying-graphite-data)
* [How to send data from OpenTSDB-compatible agents](#how-to-send-data-from-opentsdb-compatible-agents)
* [Prometheus querying API usage](#prometheus-querying-api-usage)
* [Prometheus querying API enhancements](#prometheus-querying-api-enhancements)
* [Graphite API usage](#graphite-api-usage)
* [Graphite Render API usage](#graphite-render-api-usage)
* [Graphite Metrics API usage](#graphite-metrics-api-usage)
* [Graphite Tags API usage](#graphite-tags-api-usage)
* [How to build from sources](#how-to-build-from-sources)
* [Development build](#development-build)
* [Production build](#production-build)
* [ARM build](#arm-build)
* [Pure Go build (CGO_ENABLED=0)](#pure-go-build-cgo_enabled0)
* [Building docker images](#building-docker-images)
* [Start with docker-compose](#start-with-docker-compose)
* [Setting up service](#setting-up-service)
* [How to work with snapshots](#how-to-work-with-snapshots)
* [How to delete time series](#how-to-delete-time-series)
* [Forced merge](#forced-merge)
* [How to export time series](#how-to-export-time-series)
* [How to export data in native format](#how-to-export-data-in-native-format)
* [How to export data in JSON line format](#how-to-export-data-in-json-line-format)
* [How to export CSV data](#how-to-export-csv-data)
* [How to import time series data](#how-to-import-time-series-data)
* [How to import data in native format](#how-to-import-data-in-native-format)
* [How to import data in json line format](#how-to-import-data-in-json-line-format)
* [How to import CSV data](#how-to-import-csv-data)
* [How to import data in Prometheus exposition format](#how-to-import-data-in-prometheus-exposition-format)
* [Relabeling](#relabeling)
* [Federation](#federation)
* [Capacity planning](#capacity-planning)
* [High availability](#high-availability)
* [Deduplication](#deduplication)
* [Retention](#retention)
* [Multiple retentions](#multiple-retentions)
* [Downsampling](#downsampling)
* [Multi-tenancy](#multi-tenancy)
* [Scalability and cluster version](#scalability-and-cluster-version)
* [Alerting](#alerting)
* [Security](#security)
* [Tuning](#tuning)
* [Monitoring](#monitoring)
* [TSDB stats](#tsdb-stats)
* [Cardinality limiter](#cardinality-limiter)
* [Troubleshooting](#troubleshooting)
* [Data migration](#data-migration)
* [Backfilling](#backfilling)
* [Data updates](#data-updates)
* [Replication](#replication)
* [Backups](#backups)
* [Profiling](#profiling)
* [Integrations](#integrations)
* [Third-party contributions](#third-party-contributions)
* [Contacts](#contacts)
* [Community and contributions](#community-and-contributions)
* [Reporting bugs](#reporting-bugs)
* [VictoriaMetrics Logo](#victoria-metrics-logo)
* [Logo Usage Guidelines](#logo-usage-guidelines)
* [Font used](#font-used)
* [Color Palette](#color-palette)
* [We kindly ask](#we-kindly-ask)
* [List of command-line flags](#list-of-command-line-flags)
## How to start VictoriaMetrics
Start VictoriaMetrics [executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
or [docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) with the desired command-line flags.
Just download [VictoriaMetrics executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or [Docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and start it with the desired command-line flags.
The following command-line flags are used the most:
* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in the current working directory.
* `-storageDataPath` - VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in the current working directory.
* `-retentionPeriod` - retention for stored data. Older data is automatically deleted. Default retention is 1 month. See [these docs](#retention) for more details.
Other flags have good enough default values, so set them only if you really need this. Pass `-help` to see [all the available flags with description and default values](#list-of-command-line-flags).
See how to [ingest data to VictoriaMetrics](#how-to-import-time-series-data), how to [query VictoriaMetrics](#grafana-setup)
and how to [handle alerts](#alerting).
See how to [ingest data to VictoriaMetrics](#how-to-import-time-series-data), how to [query VictoriaMetrics via Grafana](#grafana-setup), how to [query VictoriaMetrics via Graphite API](#graphite-api-usage) and how to [handle alerts](#alerting).
VictoriaMetrics accepts [Prometheus querying API requests](#prometheus-querying-api-usage) on port `8428` by default.
It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.
### Environment variables
Each flag value can be set via environment variables according to these rules:
* The `-envflag.enable` flag must be set
* Each `.` char in flag name must be substituted by `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`)
* For repeating flags an alternative syntax can be used by joining the different values into one using `,` char as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`)
* It is possible setting prefix for environment vars with `-envflag.prefix`. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_`
* The `-envflag.enable` flag must be set.
* Each `.` char in flag name must be substituted with `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`).
* For repeating flags an alternative syntax can be used by joining the different values into one using `,` char as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`).
* Environment var prefix can be set via `-envflag.prefix` flag. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_`.
### Configuration with snap package
Command-line flags can be changed with following command:
Snap package for VictoriaMetrics is available [here](https://snapcraft.io/victoriametrics).
Command-line flags for Snap package can be set with following command:
```text
echo 'FLAGS="-selfScrapeInterval=10s -search.logSlowQueryDuration=20s"' > $SNAP_DATA/var/snap/victoriametrics/current/extra_flags
snap restart victoriametrics
```
Or add needed command-line flags to the file `$SNAP_DATA/var/snap/victoriametrics/current/extra_flags`.
Note you cannot change value for `-storageDataPath` flag, for safety snap package has limited access to host system.
Do not change value for `-storageDataPath` flag, because snap package has limited access to host filesystem.
Changing scrape configuration is possible with text editor:
```text
vi $SNAP_DATA/var/snap/victoriametrics/current/etc/victoriametrics-scrape-config.yaml
```
After changes was made, trigger config re-read with command `curl 127.0.0.1:8248/-/reload`.
Changing scrape configuration is possible with text editor:
```text
vi $SNAP_DATA/var/snap/victoriametrics/current/etc/victoriametrics-scrape-config.yaml
```
After changes were made, trigger config re-read with the command `curl 127.0.0.1:8248/-/reload`.
## Prometheus setup
Prometheus must be configured with [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
in order to send data to VictoriaMetrics. Add the following lines
to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):
Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`) in order to send data to VictoriaMetrics:
```yml
remote_write:
@@ -252,7 +169,7 @@ Prometheus writes incoming data to local storage and replicates it to remote sto
This means that data remains available in local storage for `--storage.tsdb.retention.time` duration
even if remote storage is unavailable.
If you plan to send data to VictoriaMetrics from multiple Prometheus instances, then add the following lines into `global` section
If you plan sending data to VictoriaMetrics from multiple Prometheus instances, then add the following lines into `global` section
of [Prometheus config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file):
```yml
@@ -261,11 +178,11 @@ global:
datacenter: dc-123
```
This instructs Prometheus to add `datacenter=dc-123` label to each time series sent to remote storage.
This instructs Prometheus to add `datacenter=dc-123` label to each sample before sending it to remote storage.
The label name can be arbitrary - `datacenter` is just an example. The label value must be unique
across Prometheus instances, so those time series may be filtered and grouped by this label.
across Prometheus instances, so time series could be filtered and grouped by this label.
For highly loaded Prometheus instances (400k+ samples per second) the following tuning may be applied:
For highly loaded Prometheus instances (200k+ samples per second) the following tuning may be applied:
```yaml
remote_write:
@@ -276,14 +193,13 @@ remote_write:
max_shards: 30
```
Using remote write increases memory usage for Prometheus up to ~25% and depends on the shape of data. If you are experiencing issues with
too high memory consumption try to lower `max_samples_per_send` and `capacity` params (keep in mind that these two params are tightly connected).
Using remote write increases memory usage for Prometheus by up to ~25%. If you are experiencing issues with
too high memory consumption of Prometheus, then try to lower `max_samples_per_send` and `capacity` params. Keep in mind that these two params are tightly connected.
Read more about tuning remote write for Prometheus [here](https://prometheus.io/docs/practices/remote_write).
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer, since previous versions may have issues with `remote_write`.
Take a look also at [vmagent](https://docs.victoriametrics.com/vmagent.html)
and [vmalert](https://docs.victoriametrics.com/vmalert.html),
Take a look also at [vmagent](https://docs.victoriametrics.com/vmagent.html) and [vmalert](https://docs.victoriametrics.com/vmalert.html),
which can be used as faster and less resource-hungry alternative to Prometheus.
@@ -297,27 +213,22 @@ http://<victoriametrics-addr>:8428
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
Then build graphs with the created datasource using [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html). VictoriaMetrics supports [Prometheus querying API](#prometheus-querying-api-usage),
which is used by Grafana.
Then build graphs and dashboards for the created datasource using [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
## How to upgrade VictoriaMetrics
It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
say otherwise. It is safe skipping multiple versions during the upgrade unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise.
It is recommended performing regular upgrades to the latest version, since it may contain important bug fixes, performance optimizations or new features.
It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise. It is safe skipping multiple versions during the upgrade unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise. It is recommended performing regular upgrades to the latest version, since it may contain important bug fixes, performance optimizations or new features.
It is also safe downgrading to the previous version unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise.
It is also safe downgrading to older versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise.
The following steps must be performed during the upgrade / downgrade:
The following steps must be performed during the upgrade / downgrade procedure:
* Send `SIGINT` signal to VictoriaMetrics process in order to gracefully stop it.
* Wait until the process stops. This can take a few seconds.
* Start the upgraded VictoriaMetrics.
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. The same applies also to [vmagent](https://docs.victoriametrics.com/vmagent.html).
## How to apply new config to VictoriaMetrics
@@ -328,15 +239,12 @@ VictoriaMetrics is configured via command-line flags, so it must be restarted wh
* Wait until the process stops. This can take a few seconds.
* Start VictoriaMetrics with the new command-line flags.
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. The same applies alos to [vmagent](https://docs.victoriametrics.com/vmagent.html).
## How to scrape Prometheus exporters such as [node-exporter](https://github.com/prometheus/node_exporter)
VictoriaMetrics can be used as drop-in replacement for Prometheus for scraping targets configured in `prometheus.yml` config file according to [the specification](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file).
Just set `-promscrape.config` command-line flag to the path to `prometheus.yml` config - and VictoriaMetrics should start scraping the configured targets.
Currently the following [scrape_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) types are supported:
VictoriaMetrics can be used as drop-in replacement for Prometheus for scraping targets configured in `prometheus.yml` config file according to [the specification](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file). Just set `-promscrape.config` command-line flag to the path to `prometheus.yml` config - and VictoriaMetrics should start scraping the configured targets. Currently the following [scrape_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) types are supported:
* [static_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config)
* [file_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config)
@@ -353,7 +261,7 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
* [http_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config)
Other `*_sd_config` types will be supported in the future.
File a [feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need support for other `*_sd_config` types.
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
@@ -362,6 +270,52 @@ VictoriaMetrics also supports [importing data in Prometheus exposition format](#
See also [vmagent](https://docs.victoriametrics.com/vmagent.html), which can be used as drop-in replacement for Prometheus.
## How to send data from DataDog agent
VictoriaMetrics accepts data from [DataDog agent](https://docs.datadoghq.com/agent/) or [DogStatsD]() via ["submit metrics" API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics) at `/datadog/api/v1/series` path.
Run DataDog agent with `DD_DD_URL=http://victoriametrics-host:8428/datadog` environment variable in order to write data to VictoriaMetrics at `victoriametrics-host` host. Another option is to set `dd_url` param at [DataDog agent configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) to `http://victoriametrics-host:8428/datadog`.
Example on how to send data to VictoriaMetrics via DataDog "submit metrics" API from command line:
```bash
echo '
{
"series": [
{
"host": "test.example.com",
"interval": 20,
"metric": "system.load.1",
"points": [[
0,
0.5
]],
"tags": [
"environment:test"
],
"type": "rate"
}
]
}
' | curl -X POST --data-binary @- http://localhost:8428/datadog/api/v1/series
```
The imported data can be read via [export API](https://docs.victoriametrics.com/#how-to-export-data-in-json-line-format):
```bash
curl http://localhost:8428/api/v1/export -d 'match[]=system.load.1'
```
This command should return the following output if everything is OK:
```
{"metric":{"__name__":"system.load.1","environment":"test","host":"test.example.com"},"values":[0.5],"timestamps":[1632833641000]}
```
Extra labels may be added to all the written time series by passing `extra_label=name=value` query args.
For example, `/datadog/api/v1/series?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
## How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)
Use `http://<victoriametric-addr>:8428` url instead of InfluxDB url in agents' configs.
@@ -372,21 +326,18 @@ For instance, put the following lines into `Telegraf` config, so it sends data t
urls = ["http://<victoriametrics-addr>:8428"]
```
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag
and stream plain Influx line protocol data to the configured TCP and/or UDP addresses.
Another option is to enable TCP and UDP receiver for InfluxDB line protocol via `-influxListenAddr` command-line flag
and stream plain InfluxDB line protocol data to the configured TCP and/or UDP addresses.
VictoriaMetrics maps Influx data using the following rules:
VictoriaMetrics performs the following transformations to the ingested InfluxDB data:
* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
unless `db` tag exists in the Influx line.
* Field names are mapped to time series names prefixed with `{measurement}{separator}` value,
where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag.
See also `-influxSkipSingleField` command-line flag.
If `{measurement}` is empty or `-influxSkipMeasurement` command-line flag is set, then time series names correspond to field names.
unless `db` tag exists in the InfluxDB line.
* Field names are mapped to time series names prefixed with `{measurement}{separator}` value, where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag. See also `-influxSkipSingleField` command-line flag. If `{measurement}` is empty or if `-influxSkipMeasurement` command-line flag is set, then time series names correspond to field names.
* Field values are mapped to time series values.
* Tags are mapped to Prometheus labels as-is.
For example, the following Influx line:
For example, the following InfluxDB line:
```raw
foo,tag1=value1,tag2=value2 field1=12,field2=40
@@ -399,7 +350,7 @@ foo_field1{tag1="value1", tag2="value2"} 12
foo_field2{tag1="value1", tag2="value2"} 40
```
Example for writing data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
Example for writing data with [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
to local VictoriaMetrics using `curl`:
```bash
@@ -420,7 +371,7 @@ The `/api/v1/export` endpoint should return the following response:
{"metric":{"__name__":"measurement_field2","tag1":"value1","tag2":"value2"},"values":[1.23],"timestamps":[1560272508147]}
```
Note that Influx line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
Note that InfluxDB line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
while VictoriaMetrics stores them with *milliseconds* precision.
Extra labels may be added to all the written time series by passing `extra_label=name=value` query args.
@@ -467,7 +418,7 @@ The `/api/v1/export` endpoint should return the following response:
Data sent to VictoriaMetrics via `Graphite plaintext protocol` may be read via the following APIs:
* [Graphite API](#graphite-api-usage)
* [Prometheus querying API](#prometheus-querying-api-usage). VictoriaMetrics supports `__graphite__` pseudo-label for selecting time series with Graphite-compatible filters in [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html). For example, `{__graphite__="foo.*.bar"}` is equivalent to `{__name__=~"foo[.][^.]*[.]bar"}`, but it works faster and it is easier to use when migrating from Graphite to VictoriaMetrics.
* [Prometheus querying API](#prometheus-querying-api-usage). VictoriaMetrics supports `__graphite__` pseudo-label for selecting time series with Graphite-compatible filters in [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html). For example, `{__graphite__="foo.*.bar"}` is equivalent to `{__name__=~"foo[.][^.]*[.]bar"}`, but it works faster and it is easier to use when migrating from Graphite to VictoriaMetrics. VictoriaMetrics also supports [label_graphite_group](https://docs.victoriametrics.com/MetricsQL.html#label_graphite_group) function for extracting the given groups from Graphite metric name.
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/main/cmd/carbonapi/carbonapi.example.victoriametrics.yaml)
## How to send data from OpenTSDB-compatible agents
@@ -579,7 +530,7 @@ By default, VictoriaMetrics returns time series for the last 5 minutes from `/ap
Additionally VictoriaMetrics provides the following handlers:
* `/vmui` - Basic Web UI
* `/vmui` - Basic Web UI. See [these docs](#vmui).
* `/api/v1/series/count` - returns the total number of time series in the database. Some notes:
* the handler scans all the inverted index, so it can be slow if the database contains tens of millions of time series;
* the handler may count [deleted time series](#how-to-delete-time-series) additionally to normal time series due to internal implementation restrictions;
@@ -610,15 +561,15 @@ visible to the given tenant. It is expected that the `extra_label` query arg is
[Contact us](mailto:sales@victoriametrics.com) if you need assistance with such a proxy.
VictoriaMetrics supports `__graphite__` pseudo-label for filtering time series with Graphite-compatible filters in [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
For example, `{__graphite__="foo.*.bar"}` is equivalent to `{__name__=~"foo[.][^.]*[.]bar"}`, but it works faster
and it is easier to use when migrating from Graphite to VictoriaMetrics.
For example, `{__graphite__="foo.*.bar"}` is equivalent to `{__name__=~"foo[.][^.]*[.]bar"}`, but it works faster and it is easier to use when migrating from Graphite to VictoriaMetrics. See also [label_graphite_group](https://docs.victoriametrics.com/MetricsQL.html#label_graphite_group) function.
### Graphite Render API usage
[VictoriaMetrics Enterprise](https://victoriametrics.com/enterprise.html) supports [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) subset
at `/render` endpoint, which is used by [Graphite datasource in Grafana](https://grafana.com/docs/grafana/latest/datasources/graphite/).
It supports `Storage-Step` http request header, which must be set to a step between data points stored in VictoriaMetrics when configuring Graphite datasource in Grafana.
When configuring Graphite datasource in Grafana, the `Storage-Step` http request header must be set to a step between Graphite data points stored in VictoriaMetrics. For example, `Storage-Step: 10s` would mean 10 seconds distance between Graphite datapoints stored in VictoriaMetrics.
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
### Graphite Metrics API usage
@@ -649,6 +600,21 @@ VictoriaMetrics supports the following handlers from [Graphite Tags API](https:/
* [/tags/delSeries](https://graphite.readthedocs.io/en/stable/tags.html#removing-series-from-the-tagdb)
## vmui
VictoriaMetrics provides UI for query troubleshooting and exploration. The UI is available at `http://victoriametrics:8428/vmui`.
The UI allows exploring query results via graphs and tables. Graphs support scrolling and zooming:
* Drag the graph to the left / right in order to move the displayed time range into the past / future.
* Hold `Ctrl` (or `Cmd` on MacOS) and scroll up / down in order to zoom in / out the graph.
Query history can be navigated by holding `Ctrl` (or `Cmd` on MacOS) and pressing `up` or `down` arrows on the keyboard while the cursor is located in the query input field.
When querying the [backfilled data](https://docs.victoriametrics.com/#backfilling), it may be useful disabling response cache by clicking `Enable cache` checkbox.
See the [example VMUI at VictoriaMetrics playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/?g0.expr=100%20*%20sum(rate(process_cpu_seconds_total))%20by%20(job)&g0.range_input=1d).
## How to build from sources
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
@@ -758,7 +724,7 @@ Note that background merges may never occur for data from previous months, so st
In this case [forced merge](#forced-merge) may help freeing up storage space.
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
before actually deleting the metrics. By default this query will only scan active series in the past 5 minutes, so you may need to
before actually deleting the metrics. By default this query will only scan series in the past 5 minutes, so you may need to
adjust `start` and `end` to a suitable range to achieve match hits.
The `/api/v1/admin/tsdb/delete_series` handler may be protected with `authKey` if `-deleteAuthKey` command-line flag is set.
@@ -889,7 +855,8 @@ The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv
Time series data can be imported via any supported ingestion protocol:
* [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). See [these docs](#prometheus-setup) for details.
* Influx line protocol. See [these docs](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for details.
* DataDog `submit metrics` API. See [these docs](#how-to-send-data-from-datadog-agent) for details.
* InfluxDB line protocol. See [these docs](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for details.
* Graphite plaintext protocol. See [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details.
* OpenTSDB telnet put protocol. See [these docs](#sending-data-via-telnet-put-protocol) for details.
* OpenTSDB http `/api/put` protocol. See [these docs](#sending-opentsdb-data-via-http-apiput-requests) for details.
@@ -1068,14 +1035,7 @@ Example contents for `-relabelConfig` file:
regex: true
```
VictoriaMetrics provides the following extra actions for relabeling rules:
* `replace_all`: replaces all the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the result in the `target_label`.
* `labelmap_all`: replaces all the occurences of `regex` in all the label names with the `replacement`.
* `keep_if_equal`: keeps the entry if all label values from `source_labels` are equal.
* `drop_if_equal`: drops the entry if all the label values from `source_labels` are equal.
See also [relabeling in vmagent](https://docs.victoriametrics.com/vmagent.html#relabeling).
See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details about relabeling in VictoriaMetrics.
## Federation
@@ -1094,7 +1054,7 @@ with scrape intervals exceeding `5m`.
VictoriaMetrics uses lower amounts of CPU, RAM and storage space on production workloads compared to competing solutions (Prometheus, Thanos, Cortex, TimescaleDB, InfluxDB, QuestDB, M3DB) according to [our case studies](https://docs.victoriametrics.com/CaseStudies.html).
VictoriaMetrics capacity scales linearly with the available resources. The needed amounts of CPU and RAM highly depends on the workload - the number of active time series, series churn rate, query types, query qps, etc. It is recommended setting up a test VictoriaMetrics for your production workload and iteratively scaling CPU and RAM resources until it becomes stable according to [troubleshooting docs](#troubleshooting). A single-node VictoriaMetrics works perfectly with the following production workload according to [our case studies](https://docs.victoriametrics.com/CaseStudies.html):
VictoriaMetrics capacity scales linearly with the available resources. The needed amounts of CPU and RAM highly depends on the workload - the number of [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-active-time-series), series [churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate), query types, query qps, etc. It is recommended setting up a test VictoriaMetrics for your production workload and iteratively scaling CPU and RAM resources until it becomes stable according to [troubleshooting docs](#troubleshooting). A single-node VictoriaMetrics works perfectly with the following production workload according to [our case studies](https://docs.victoriametrics.com/CaseStudies.html):
* Ingestion rate: 1.5+ million samples per second
* Active time series: 50+ million
@@ -1203,6 +1163,7 @@ There is no downsampling support at the moment, but:
in [this article](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
* VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
for details.
* The downsampling doesn't improve query performance on a long time range if the time range contains big number of time series due to [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). The query performance depends on the number of unique time series on the selected time range, while downsampling doesn't reduce the number of unique time series in the database - it can reduce only the number of samples per each time series.
These properties reduce the need of downsampling. We plan to implement downsampling in the future.
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details.
@@ -1214,7 +1175,7 @@ only a single data point out of 20 initial data points per each 5m interval.
## Multi-tenancy
Single-node VictoriaMetrics doesn't support multi-tenancy. Use [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) instead.
Single-node VictoriaMetrics doesn't support multi-tenancy. Use [cluster version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) instead.
## Scalability and cluster version
@@ -1225,7 +1186,7 @@ such as Thanos, Uber M3, InfluxDB or TimescaleDB. See [vertical scalability benc
So try single-node VictoriaMetrics at first and then [switch to cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) if you still need
horizontally scalable long-term remote storage for really large Prometheus deployments.
[Contact us](mailto:info@victoriametrics.com) for paid support.
[Contact us](mailto:info@victoriametrics.com) for enterprise support.
## Alerting
@@ -1251,6 +1212,7 @@ Consider setting the following command-line flags:
* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
* `-forceMergeAuthKey` for protecting `/internal/force_merge` endpoint. See [force merge docs](#forced-merge).
* `-search.resetCacheAuthKey` for protecting `/internal/resetRollupResultCache` endpoint. See [backfilling](#backfilling) for more details.
* `-configAuthKey` for pretecting `/config` endpoint, since it may contain sensitive information such as passwords.
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
@@ -1294,18 +1256,18 @@ It is recommended setting up alerts in [vmalert](https://docs.victoriametrics.co
The most interesting metrics are:
* `vm_cache_entries{type="storage/hour_metric_ids"}` - the number of time series with new data points during the last hour
aka active time series.
* `increase(vm_new_timeseries_created_total[1h])` - time series churn rate during the previous hour.
aka [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-active-time-series).
* `increase(vm_new_timeseries_created_total[1h])` - time series [churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) during the previous hour.
* `sum(vm_rows{type=~"storage/.*"})` - total number of `(timestamp, value)` data points in the database.
* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
* `sum(vm_data_size_bytes)` - the total size of data on disk.
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
of the current number of active time series.
of the current number of [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-active-time-series).
* `increase(vm_slow_metric_name_loads_total[5m])` - the number of slow loads of metric names during the last 5 minutes.
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
of the current number of active time series.
of the current number of [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-active-time-series).
VictoriaMetrics also exposes currently running queries with their execution times at `/api/v1/status/active_queries` page.
@@ -1325,8 +1287,8 @@ VictoriaMetrics returns TSDB stats at `/api/v1/status/tsdb` page in the way simi
By default VictoriaMetrics doesn't limit the number of stored time series. The limit can be enforced by setting the following command-line flags:
* `-storage.maxHourlySeries` - limits the number of time series that can be added during the last hour. Useful for limiting the number of active time series.
* `-storage.maxDailySeries` - limits the number of time series that can be added during the last day. Useful for limiting daily churn rate.
* `-storage.maxHourlySeries` - limits the number of time series that can be added during the last hour. Useful for limiting the number of [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-active-time-series).
* `-storage.maxDailySeries` - limits the number of time series that can be added during the last day. Useful for limiting daily [churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate).
Both limits can be set simultaneously. If any of these limits is reached, then incoming samples for new time series are dropped. A sample of dropped series is put in the log with `WARNING` level.
@@ -1337,6 +1299,8 @@ The exceeded limits can be [monitored](#monitoring) with the following metrics:
These limits are approximate, so VictoriaMetrics can underflow/overflow the limit by a small percentage (usually less than 1%).
See also more advanced [cardinality limiter in vmagent](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter).
## Troubleshooting
@@ -1367,7 +1331,7 @@ These limits are approximate, so VictoriaMetrics can underflow/overflow the limi
See [this article for technical details](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
then it is likely you have too many active time series for the current amount of RAM.
then it is likely you have too many [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-active-time-series) for the current amount of RAM.
VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics such as `vm_slow_row_inserts_total` and `vm_slow_metric_name_loads_total`, which could be used
as an indicator of low amounts of RAM. It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
ingestion and query performance in this case.
@@ -1394,7 +1358,7 @@ These limits are approximate, so VictoriaMetrics can underflow/overflow the limi
It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes
each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals.
* Metrics and labels leading to high cardinality or high churn rate can be determined at `/api/v1/status/tsdb` page. See [these docs](#tsdb-stats) for details.
* Metrics and labels leading to [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) can be determined at `/api/v1/status/tsdb` page. See [these docs](#tsdb-stats) for details.
* New time series can be logged if `-logNewSeries` command-line flag is passed to VictoriaMetrics.
@@ -1404,6 +1368,7 @@ These limits are approximate, so VictoriaMetrics can underflow/overflow the limi
* If you store Graphite metrics like `foo.bar.baz` in VictoriaMetrics, then use `{__graphite__="foo.*.baz"}` syntax for selecting such metrics.
This expression is equivalent to `{__name__=~"foo[.][^.]*[.]baz"}`, but it works faster and it is easier to use when migrating from Graphite.
See also [label_graphite_group](https://docs.victoriametrics.com/MetricsQL.html#label_graphite_group) function, which allows extracting the given groups from Graphite metric names.
* VictoriaMetrics ignores `NaN` values during data ingestion.
@@ -1420,6 +1385,7 @@ Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It
* From Prometheus to VictoriaMetrics
* From InfluxDB to VictoriaMetrics
* From VictoriaMetrics to VictoriaMetrics
* From OpenTSDB to VictoriaMetrics
See [vmctl docs](https://docs.victoriametrics.com/vmctl.html) for more details.
@@ -1463,7 +1429,8 @@ See also [high availability docs](#high-availability) and [backup docs](#backups
VictoriaMetrics supports backups via [vmbackup](https://docs.victoriametrics.com/vmbackup.html)
and [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools.
We also provide `vmbackupmanager` tool for paid enterprise subscribers - see [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for details.
We also provide [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) tool for enterprise subscribers.
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
## Profiling
@@ -1548,7 +1515,7 @@ Report bugs and propose new features [here](https://github.com/VictoriaMetrics/V
## VictoriaMetrics Logo
[Zip](VM_logo.zip) contains three folders with different image orientations (main color and inverted version).
[Zip](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/VM_logo.zip) contains three folders with different image orientations (main color and inverted version).
Files included in each folder:
@@ -1583,8 +1550,13 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
```
-bigMergeConcurrency int
The maximum number of CPU cores to use for big merges. Default value is used if set to 0
-configAuthKey string
Authorization key for accessing /config page. It must be passed via authKey query arg
-csvTrimTimestamp duration
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
-dedup.minScrapeInterval duration
Leave only the first sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication for details
-deleteAuthKey string
@@ -1636,18 +1608,18 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
Supports an array of values separated by comma or specified via multiple flags.
-influx.maxLineSize size
The maximum size in bytes for a single Influx line during parsing
The maximum size in bytes for a single InfluxDB line during parsing
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
-influxListenAddr string
TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8189 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write
-influxMeasurementFieldSeparator string
Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol (default "_")
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
-influxSkipMeasurement
Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'
-influxSkipSingleField
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field
-influxTrimTimestamp duration
Trim timestamps for Influx line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-insert.maxQueueDuration duration
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
-logNewSeries
@@ -1679,7 +1651,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
Auth key for /metrics. It overrides httpAuth settings
Auth key for /metrics. It must be passed via authKey query arg. It overrides httpAuth.* settings
-opentsdbHTTPListenAddr string
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
-opentsdbListenAddr string
@@ -1692,7 +1664,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-opentsdbhttpTrimTimestamp duration
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-pprofAuthKey string
Auth key for /debug/pprof. It overrides httpAuth settings
Auth key for /debug/pprof. It must be passed via authKey query arg. It overrides httpAuth.* settings
-precisionBits int
The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64)
-promscrape.cluster.memberNum int
@@ -1747,11 +1719,21 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config for details (default 30s)
-promscrape.maxDroppedTargets int
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
-promscrape.maxResponseHeadersSize size
The maximum size of http response headers from Prometheus scrape targets
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 4096)
-promscrape.maxScrapeSize size
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
-promscrape.minResponseSizeForStreamParse size
The minimum target response size for automatic switching to stream parsing mode, which can reduce memory usage. See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 1000000)
-promscrape.noStaleMarkers
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
-promscrape.openstackSDCheckInterval duration
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s)
-promscrape.seriesLimitPerTarget int
Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info
-promscrape.streamParse
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
-promscrape.suppressDuplicateScrapeTargetErrors
@@ -1759,14 +1741,16 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-promscrape.suppressScrapeErrors
Whether to suppress scrape errors logging. The last error for each target is always available at '/targets' page even if scrape errors logging is suppressed
-relabelConfig string
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. See https://docs.victoriametrics.com/#relabeling for details
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
-relabelDebug
Whether to log metrics before and after relabeling with -relabelConfig. If the -relabelDebug is enabled, then the metrics aren't sent to storage. This is useful for debugging the relabeling configs
-retentionPeriod value
Data with timestamps outside the retentionPeriod is automatically deleted
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
-search.cacheTimestampOffset duration
The maximum duration since the current time for response data, which is always queried from the original raw data, without using the response cache. Increase this value if you see gaps in responses due to time synchronization issues between VictoriaMetrics and data sources (default 5m0s)
The maximum duration since the current time for response data, which is always queried from the original raw data, without using the response cache. Increase this value if you see gaps in responses due to time synchronization issues between VictoriaMetrics and data sources. See also -search.disableAutoCacheReset (default 5m0s)
-search.disableAutoCacheReset
Whether to disable automatic response cache reset if a sample with timestamp outside -search.cacheTimestampOffset is inserted into VictoriaMetrics
-search.disableCache
Whether to disable response caching. This may be useful during data backfilling
-search.latencyOffset duration
@@ -1808,6 +1792,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of unique time series each search can scan. This option allows limiting memory usage (default 300000)
-search.minStalenessInterval duration
The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
-search.noStaleMarkers
Set this flag to true if the database doesn't contain Prometheus stale markers, so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets
-search.queryStats.lastQueriesCount int
Query stats for /api/v1/status/top_queries is tracked on this number of last queries. Zero value disables query stats tracking (default 20000)
-search.queryStats.minQueryDuration duration
@@ -1832,6 +1818,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries
-storage.maxHourlySeries int
The maximum number of unique series can be added to the storage during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See also -storage.maxDailySeries
-storage.minFreeDiskSpaceBytes size
The minimum free disk space at -storageDataPath after which the storage stops accepting new data
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 10000000)
-storageDataPath string
Path to storage data (default "victoria-metrics-data")
-tls

View File

@@ -93,7 +93,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"/vmui", "Web UI"},
{"/targets", "discovered targets list"},
{"/api/v1/targets", "advanced information about discovered targets in JSON format"},
{"/config", "-promscrape.config contents"},
{"/metrics", "available service metrics"},
{"/flags", "command-line flags"},
{"/api/v1/status/tsdb", "tsdb status page"},
{"/api/v1/status/top_queries", "top queries"},
{"/api/v1/status/active_queries", "active queries"},

View File

@@ -19,6 +19,7 @@
["{TIME_S-110s}","3"],
["{TIME_S-100s}","3"],
["{TIME_S-90s}","3"],
["{TIME_S-80s}","3"],
["{TIME_S-60s}","2"],
["{TIME_S-50s}","2"],
["{TIME_S-40s}","2"],

View File

@@ -17,17 +17,19 @@ to `vmagent` such as the ability to push metrics instead of pulling them. We did
## Features
* Can be used as a drop-in replacement for Prometheus for scraping targets such as [node_exporter](https://github.com/prometheus/node_exporter).
See [Quick Start](#quick-start) for details.
* Can be used as a drop-in replacement for Prometheus for scraping targets such as [node_exporter](https://github.com/prometheus/node_exporter). See [Quick Start](#quick-start) for details.
* Can read data from Kafka. See [these docs](#reading-metrics-from-kafka).
* Can write data to Kafka. See [these docs](#writing-metrics-to-kafka).
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
* Accepts data via all ingestion protocols supported by VictoriaMetrics:
* Influx line protocol via `http://<vmagent>:8429/write`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf).
* DataDog "submit metrics" API. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-datadog-agent).
* InfluxDB line protocol via `http://<vmagent>:8429/write`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf).
* Graphite plaintext protocol if `-graphiteListenAddr` command-line flag is set. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-graphite-compatible-agents-such-as-statsd).
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-opentsdb-compatible-agents).
* Prometheus remote write protocol via `http://<vmagent>:8429/api/v1/write`.
* JSON lines import protocol via `http://<vmagent>:8429/api/v1/import`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-import-data-in-json-line-format).
* Native data import protocol via `http://<vmagent>:8429/api/v1/import/native`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-import-data-in-native-format).
* Data in Prometheus exposition format. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-import-data-in-prometheus-exposition-format) for details.
* Prometheus exposition format via `http://<vmagent>:8429/api/v1/import/prometheus`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-import-data-in-prometheus-exposition-format) for details.
* Arbitrary CSV data via `http://<vmagent>:8429/api/v1/import/csv`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-import-csv-data).
* Can replicate collected metrics simultaneously to multiple remote storage systems.
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
@@ -36,8 +38,8 @@ to `vmagent` such as the ability to push metrics instead of pulling them. We did
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared with Prometheus.
* Scrape targets can be spread among multiple `vmagent` instances when big number of targets must be scraped. See [these docs](#scraping-big-number-of-targets).
* Can efficiently scrape targets that expose millions of time series such as [/federate endpoint in Prometheus](https://prometheus.io/docs/prometheus/latest/federation/). See [these docs](#stream-parsing-mode).
* Can deal with high cardinality and high churn rate issues by limiting the number of unique time series sent to remote storage systems. See [these docs](#cardinality-limiter).
* Can deal with [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues by limiting the number of unique time series at scrape time and before sending them to remote storage systems. See [these docs](#cardinality-limiter).
* Can load scrape configs from multiple files. See [these docs](#loading-scrape-configs-from-multiple-files).
## Quick Start
@@ -53,13 +55,13 @@ Example command line:
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
If you only need to collect Influx data, then the following command is sufficient:
If you only need to collect InfluxDB data, then the following command is sufficient:
```
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
Then send Influx data to `http://vmagent-host:8429`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for more details.
Then send InfluxDB data to `http://vmagent-host:8429`. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for more details.
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags).
@@ -196,7 +198,12 @@ Please file feature requests to [our issue tracker](https://github.com/VictoriaM
to save network bandwidth.
* `disable_keepalive: true` - to disable [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis.
By default, `vmagent` uses keep-alive connections to scrape targets to reduce overhead on connection re-establishing.
* `series_limit: N` - for limiting the number of unique time series a single scrape target can expose. See [these docs](#cardinality-limiter).
* `stream_parse: true` - for scraping targets in a streaming manner. This may be useful for targets exporting big number of metrics. See [these docs](#stream-parsing-mode).
* `scrape_align_interval: duration` - for aligning scrapes to the given interval instead of using random offset in the range `[0 ... scrape_interval]` for scraping each target. The random offset helps spreading scrapes evenly in time.
* `scrape_offset: duration` - for specifying the exact offset for scraping instead of using random offset in the range `[0 ... scrape_interval]`.
* `relabel_debug: true` - for enabling debug logging during relabeling of the discovered targets. See [these docs](#relabeling).
* `metric_relabel_debug: true` - for enabling debug logging during relabeling of the scraped metrics. See [these docs](#relabeling).
Note that `vmagent` doesn't support `refresh_interval` option for these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs`
@@ -205,6 +212,30 @@ entries to 60s. Run `vmagent -help` in order to see default values for the `-pro
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders which are substituted by the corresponding `ENV_VAR` environment variable values.
## Loading scrape configs from multiple files
`vmagent` supports loading scrape configs from multiple files specified in the `scrape_config_files` section of `-promscrape.config` file. For example, the following `-promscrape.config` instructs `vmagent` loading scrape configs from all the `*.yml` files under `configs` directory plus a `single_scrape_config.yml` file:
```yml
scrape_config_files:
- configs/*.yml
- single_scrape_config.yml
```
Every referred file can contain arbitrary number of any [supported scrape configs](#how-to-collect-metrics-in-prometheus-format). There is no need in specifying top-level `scrape_configs` section in these files. For example:
```yml
- job_name: foo
static_configs:
- targets: ["vmagent:8429"]
- job_name: bar
kubernetes_sd_configs:
- role: pod
```
`vmagent` dynamically reloads these files on `SIGHUP` signal or on the request to `http://vmagent:8429/-/reload`.
## Adding labels to metrics
Labels can be added to metrics by the following mechanisms:
@@ -219,13 +250,30 @@ Labels can be added to metrics by the following mechanisms:
## Relabeling
`vmagent` supports [Prometheus relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config).
and also provides the following actions:
`vmagent` and VictoriaMetrics support Prometheus-compatible relabeling.
They provide the following additional actions on top of actions from the [Prometheus relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config):
* `replace_all`: replaces all of the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the results in the `target_label`.
* `labelmap_all`: replaces all of the occurences of `regex` in all the label names with the `replacement`.
* `keep_if_equal`: keeps the entry if all the label values from `source_labels` are equal.
* `drop_if_equal`: drops the entry if all the label values from `source_labels` are equal.
* `keep_metrics`: keeps all the metrics with names matching the given `regex`.
* `drop_metrics`: drops all the metrics with names matching the given `regex`.
The `regex` value can be split into multiple lines for improved readability and maintainability. These lines are automatically joined with `|` char when parsed. For example, the following configs are equivalent:
```yaml
- action: keep_metrics
regex: "metric_a|metric_b|foo_.+"
```
```yaml
- action: keep_metrics
regex:
- "metric_a"
- "metric_b"
- "foo_.+"
```
The relabeling can be defined in the following places:
@@ -244,25 +292,46 @@ You can read more about relabeling in the following articles:
* [relabel_configs vs metric_relabel_configs](https://www.robustperception.io/relabel_configs-vs-metric_relabel_configs)
## Prometheus staleness markers
`vmagent` sends [Prometheus staleness markers](https://www.robustperception.io/staleness-and-promql) to `-remoteWrite.url` in the following cases:
* If they are passed to `vmagent` via [Prometheus remote_write protocol](#prometheus-remote_write-proxy).
* If the metric disappears from the list of scraped metrics, then stale marker is sent to this particular metric.
* If the scrape target becomes temporarily unavailable, then stale markers are sent for all the metrics scraped from this target.
* If the scrape target is removed from the list of targets, then stale markers are sent for all the metrics scraped from this target.
* Stale markers are sent for all the scraped metrics on graceful shutdown of `vmagent`.
Prometheus staleness markers' tracking needs additional memory, since it must store the previous response body per each scrape target in order to compare it to the current response body. The memory usage may be reduced by passing `-promscrape.noStaleMarkers` command-line flag to `vmagent`. This disables staleness tracking. This also disables tracking the number of new time series per each scrape with the auto-generated `scrape_series_added` metric. See [these docs](https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series) for details.
## Stream parsing mode
By default `vmagent` reads the full response from scrape target into memory, then parses it, applies [relabeling](#relabeling) and then pushes the resulting metrics to the configured `-remoteWrite.url`. This mode works good for the majority of cases when the scrape target exposes small number of metrics (e.g. less than 10 thousand). But this mode may take big amounts of memory when the scrape target exposes big number of metrics. In this case it is recommended enabling stream parsing mode. When this mode is enabled, then `vmagent` reads response from scrape target in chunks, then immediately processes every chunk and pushes the processed metrics to remote storage. This allows saving memory when scraping targets that expose millions of metrics. Stream parsing mode may be enabled either globally for all of the scrape targets by passing `-promscrape.streamParse` command-line flag or on a per-scrape target basis with `stream_parse: true` option. For example:
By default `vmagent` reads the full response body from scrape target into memory, then parses it, applies [relabeling](#relabeling) and then pushes the resulting metrics to the configured `-remoteWrite.url`. This mode works good for the majority of cases when the scrape target exposes small number of metrics (e.g. less than 10 thousand). But this mode may take big amounts of memory when the scrape target exposes big number of metrics. In this case it is recommended enabling stream parsing mode. When this mode is enabled, then `vmagent` reads response from scrape target in chunks, then immediately processes every chunk and pushes the processed metrics to remote storage. This allows saving memory when scraping targets that expose millions of metrics.
```yml
scrape_configs:
- job_name: 'big-federate'
stream_parse: true
static_configs:
- targets:
- big-prometeus1
- big-prometeus2
honor_labels: true
metrics_path: /federate
params:
'match[]': ['{__name__!=""}']
```
Stream parsing mode is automatically enabled for scrape targets returning response bodies with sizes bigger than the `-promscrape.minResponseSizeForStreamParse` command-line flag value. Additionally, the stream parsing mode can be explicitly enabled in the following places:
Note that `sample_limit` option doesn't prevent from data push to remote storage if stream parsing is enabled because the parsed data is pushed to remote storage as soon as it is parsed.
- Via `-promscrape.streamParse` command-line flag. In this case all the scrape targets defined in the file pointed by `-promscrape.config` are scraped in stream parsing mode.
- Via `stream_parse: true` option at `scrape_configs` section. In this case all the scrape targets defined in this section are scraped in stream parsing mode.
- Via `__stream_parse__=true` label, which can be set via [relabeling](#relabeling) at `relabel_configs` section. In this case stream parsing mode is enabled for the corresponding scrape targets. Typical use case: to set the label via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) for targets exposing big number of metrics.
Examples:
```yml
scrape_configs:
- job_name: 'big-federate'
stream_parse: true
static_configs:
- targets:
- big-prometeus1
- big-prometeus2
honor_labels: true
metrics_path: /federate
params:
'match[]': ['{__name__!=""}']
```
Note that `sample_limit` and `series_limit` options cannot be used in stream parsing mode because the parsed data is pushed to remote storage as soon as it is parsed.
## Scraping big number of targets
@@ -330,6 +399,16 @@ scrape_configs:
## Cardinality limiter
By default `vmagent` doesn't limit the number of time series each scrape target can expose. The limit can be enforced in the following places:
- Via `-promscrape.seriesLimitPerTarget` command-line option. This limit is applied individually to all the scrape targets defined in the file pointed by `-promscrape.config`.
- Via `series_limit` config option at `scrape_config` section. This limit is applied individually to all the scrape targets defined in the given `scrape_config`.
- Via `__series_limit__` label, which can be set with [relabeling](#relabeling) at `relabel_configs` section. This limit is applied to the corresponding scrape targets. Typical use case: to set the limit via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) for targets, which may expose too high number of time series.
All the scraped metrics are dropped for time series exceeding the given limit. The exceeded limit can be [monitored](#monitoring) via `promscrape_series_limit_rows_dropped_total` metric.
See also `sample_limit` option at [scrape_config section](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config).
By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`. The limit can be enforced by setting the following command-line flags:
* `-remoteWrite.maxHourlySeries` - limits the number of unique time series `vmagent` can write to remote storage systems during the last hour. Useful for limiting the number of active time series.
@@ -372,10 +451,12 @@ It may be useful to perform `vmagent` rolling update without any scrape loss.
as `vmagent` establishes at least a single TCP connection per target.
* If `vmagent` uses too big amounts of memory, then the following options can help:
* Enabling stream parsing. See [these docs](#stream-parsing-mode).
* Disabling staleness tracking with `-promscrape.noStaleMarkers` option. See [these docs](#prometheus-staleness-markers).
* Enabling stream parsing mode if `vmagent` scrapes targets with millions of metrics per target. See [these docs](#stream-parsing-mode).
* Reducing the number of output queues with `-remoteWrite.queues` command-line option.
* Reducing the amounts of RAM vmagent can use for in-memory buffering with `-memory.allowedPercent` or `-memory.allowedBytes` command-line option. Another option is to reduce memory limits in Docker and/or Kuberntes if `vmagent` runs under these systems.
* Reducing the number of CPU cores vmagent can use by passing `GOMAXPROCS=N` environment variable to `vmagent`, where `N` is the desired limit on CPU cores. Another option is to reduce CPU limits in Docker or Kubernetes if `vmagent` runs under these systems.
* Passing `-promscrape.dropOriginalLabels` command-line option to `vmagent`, so it drops `"discoveredLabels"` and `"droppedTargets"` lists at `/api/v1/targets` page. This reduces memory usage when scraping big number of targets at the cost of reduced debuggability for improperly configured per-target relabeling.
* When `vmagent` scrapes many unreliable targets, it can flood the error log with scrape errors. These errors can be suppressed
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`
@@ -384,21 +465,13 @@ It may be useful to perform `vmagent` rolling update without any scrape loss.
* The `/api/v1/targets` page could be useful for debugging relabeling process for scrape targets.
This page contains original labels for targets dropped during relabeling (see "droppedTargets" section in the page output). By default the `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling, then increase `-promscrape.maxDroppedTargets` command-line flag value to see all the dropped targets. Note that tracking each dropped target requires up to 10Kb of RAM. Therefore big values for `-promscrape.maxDroppedTargets` may result in increased memory usage if a big number of scrape targets are dropped during relabeling.
* If `vmagent` scrapes a big number of targets then the `-promscrape.dropOriginalLabels` command-line option may be passed to `vmagent` in order to reduce memory usage.
This option drops `"discoveredLabels"` and `"droppedTargets"` lists at `/api/v1/targets` page, which may result in reduced debuggability for improperly configured per-target relabeling.
* We recommend you increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page grows constantly. It is also recommended increasing `-remoteWrite.maxBlockSize` and `-remoteWrite.maxRowsPerBlock` command-line options in this case. This can improve data ingestion performance to the configured remote storage systems at the cost of higher memory usage.
* If `vmagent` scrapes targets with millions of metrics per target (for example, when scraping [federation endpoints](https://prometheus.io/docs/prometheus/latest/federation/)),
we recommend enabling [stream parsing mode](#stream-parsing-mode) in order to reduce memory usage during scraping.
* We recommend you increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page grows constantly.
* If you see gaps in the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set, try increasing `-remoteWrite.queues`.
Such gaps may appear because `vmagent` cannot keep up with sending the collected data to remote storage. Therefore it starts dropping the buffered data
if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
* If you see gaps in the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set, try increasing `-remoteWrite.queues`. Such gaps may appear because `vmagent` cannot keep up with sending the collected data to remote storage. Therefore it starts dropping the buffered data if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
* `vmagent` drops data blocks if remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses. The number of dropped blocks can be monitored via `vmagent_remotewrite_packets_dropped_total` metric exported at [/metrics page](#monitoring).
* Use `-remoteWrite.queues=1` when `-remoteWrite.url` points to remote storage, which doesn't accept out-of-order samples (aka data backfilling). Such storage systems include Prometheus, Cortex and Thanos.
* Use `-remoteWrite.queues=1` when `-remoteWrite.url` points to remote storage, which doesn't accept out-of-order samples (aka data backfilling). Such storage systems include Prometheus, Cortex and Thanos, which typically emit `out of order sample` errors. The best solution is to use remote storage with [backfilling support](https://docs.victoriametrics.com/#backfilling).
* `vmagent` buffers scraped data at the `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
The directory can grow large when remote storage is unavailable for extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
@@ -445,6 +518,108 @@ It may be useful to perform `vmagent` rolling update without any scrape loss.
regex: true
```
## Kafka integration
[Enterprise version](https://victoriametrics.com/enterprise.html) of `vmagent` can read and write metrics from / to Kafka:
* [Reading metrics from Kafka](#reading-metrics-from-kafka)
* [Writing metrics to Kafka](#writing-metrics-to-kafka)
The enterprise version of vmagent is available for evaluation at [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) page in `vmutils-*-enteprise.tar.gz` archives and in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
### Reading metrics from Kafka
[Enterprise version](https://victoriametrics.com/enterprise.html) of `vmagent` can read metrics in various formats from Kafka messages. These formats can be configured with `-kafka.consumer.topic.defaultFormat` or `-kafka.consumer.topic.format` command-line options. The following formats are supported:
* `promremotewrite` - [Prometheus remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). Messages in this format can be sent by vmagent - see [these docs](#writing-metrics-to-kafka).
* `influx` - [InfluxDB line protocol format](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/).
* `prometheus` - [Prometheus text exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format) and [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md).
* `graphite` - [Graphite plaintext format](https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol).
* `jsonline` - [JSON line format](https://docs.victoriametrics.com/#how-to-import-data-in-json-line-format).
Every Kafka message may contain multiple lines in `influx`, `prometheus`, `graphite` and `jsonline` format delimited by `\n`.
`vmagent` consumes messages from Kafka topics specified by `-kafka.consumer.topic` command-line flag. Multiple topics can be specified by passing multiple `-kafka.consumer.topic` command-line flags to `vmagent`.
`vmagent` consumes messages from Kafka brokers specified by `-kafka.consumer.topic.brokers` command-line flag. Multiple brokers can be specified per each `-kafka.consumer.topic` by passing a list of brokers delimited by `;`. For example, `-kafka.consumer.topic.brokers=host1:9092;host2:9092`.
The following command starts `vmagent`, which reads metrics in InfluxDB line protocol format from Kafka broker at `localhost:9092` from the topic `metrics-by-telegraf` and sends them to remote storage at `http://localhost:8428/api/v1/write`:
```bash
./bin/vmagent -remoteWrite.url=http://localhost:8428/api/v1/write \
-kafka.consumer.topic.brokers=localhost:9092 \
-kafka.consumer.topic.format=influx \
-kafka.consumer.topic=metrics-by-telegraf \
-kafka.consumer.topic.groupID=some-id
```
It is expected that [Telegraf](https://github.com/influxdata/telegraf) sends metrics to the `metrics-by-telegraf` topic with the following config:
```yaml
[[outputs.kafka]]
brokers = ["localhost:9092"]
topic = "influx"
data_format = "influx"
```
#### Command-line flags for Kafka consumer
These command-line flags are available only in [enterprise](https://victoriametrics.com/enterprise.html) version of `vmagent`, which can be downloaded for evaluation from [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) page (see `vmutils-*-enteprise.tar.gz` archives) and from [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
```
-kafka.consumer.topic array
Kafka topic names for data consumption.
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.basicAuth.password array
Optional basic auth password for -kafka.consumer.topic. Must be used in conjunction with any supported auth methods for kafka client, specified by flag -kafka.consumer.topic.options='security.protocol=SASL_SSL;sasl.mechanisms=PLAIN'
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.basicAuth.username array
Optional basic auth username for -kafka.consumer.topic. Must be used in conjunction with any supported auth methods for kafka client, specified by flag -kafka.consumer.topic.options='security.protocol=SASL_SSL;sasl.mechanisms=PLAIN'
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.brokers array
List of brokers to connect for given topic, e.g. -kafka.consumer.topic.broker=host-1:9092;host-2:9092
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.defaultFormat string
Expected data format in the topic if -kafka.consumer.topic.format is skipped. (default "promremotewrite")
-kafka.consumer.topic.format array
data format for corresponding kafka topic. Valid formats: influx, prometheus, promremotewrite, graphite, jsonline
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.groupID array
Defines group.id for topic
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.isGzipped array
Enables gzip setting for topic messages payload. Only prometheus, jsonline and influx formats accept gzipped messages.
Supports array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.options array
Optional key=value;key1=value2 settings for topic consumer. See full configuration options at https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md.
Supports an array of values separated by comma or specified via multiple flags.
```
### Writing metrics to Kafka
[Enterprise version](https://victoriametrics.com/enterprise.html) of `vmagent` writes data to Kafka with `at-least-once` semantics if `-remoteWrite.url` contains e.g. Kafka url. For example, if `vmagent` is started with `-remoteWrite.url=kafka://localhost:9092/?topic=prom-rw`, then it would send Prometheus remote_write messages to Kafka bootstrap server at `localhost:9092` with the topic `prom-rw`. These messages can be read later from Kafka by another `vmagent` - see [these docs](#reading-metrics-from-kafka) for details.
Additional Kafka options can be passed as query params to `-remoteWrite.url`. For instance, `kafka://localhost:9092/?topic=prom-rw&client.id=my-favorite-id` sets `client.id` Kafka option to `my-favorite-id`. The full list of Kafka options is available [here](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md).
#### Kafka broker authorization and authentication
Two types of auth are supported:
* sasl with username and password:
```bash
./bin/vmagent -remoteWrite.url=kafka://localhost:9092/?topic=prom-rw&security.protocol=SASL_SSL&sasl.mechanisms=PLAIN -remoteWrite.basicAuth.username=user -remoteWrite.basicAuth.password=password
```
* tls certificates:
```bash
./bin/vmagent -remoteWrite.url=kafka://localhost:9092/?topic=prom-rw&security.protocol=SSL -remoteWrite.tlsCAFile=/opt/ca.pem -remoteWrite.tlsCertFile=/opt/cert.pem -remoteWrite.tlsKeyFile=/opt/key.pem
```
## How to build from sources
@@ -525,8 +700,13 @@ vmagent collects metrics data via popular data ingestion protocols and routes th
See the docs at https://docs.victoriametrics.com/vmagent.html .
-configAuthKey string
Authorization key for accessing /config page. It must be passed via authKey query arg
-csvTrimTimestamp duration
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
-dryRun
Whether to check only config files without running vmagent. The following files are checked: -promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . Unknown config entries are allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse
-enableTCP6
@@ -566,18 +746,18 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
Supports an array of values separated by comma or specified via multiple flags.
-influx.maxLineSize size
The maximum size in bytes for a single Influx line during parsing
The maximum size in bytes for a single InfluxDB line during parsing
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
-influxListenAddr string
TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8189 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write
-influxMeasurementFieldSeparator string
Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol (default "_")
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
-influxSkipMeasurement
Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'
-influxSkipSingleField
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field
-influxTrimTimestamp duration
Trim timestamps for Influx line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-insert.maxQueueDuration duration
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
-loggerDisableTimestamps
@@ -605,7 +785,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
Auth key for /metrics. It overrides httpAuth settings
Auth key for /metrics. It must be passed via authKey query arg. It overrides httpAuth.* settings
-opentsdbHTTPListenAddr string
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
-opentsdbListenAddr string
@@ -618,7 +798,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-opentsdbhttpTrimTimestamp duration
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-pprofAuthKey string
Auth key for /debug/pprof. It overrides httpAuth settings
Auth key for /debug/pprof. It must be passed via authKey query arg. It overrides httpAuth.* settings
-promscrape.cluster.memberNum int
The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster
-promscrape.cluster.membersCount int
@@ -671,11 +851,21 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config for details (default 30s)
-promscrape.maxDroppedTargets int
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
-promscrape.maxResponseHeadersSize size
The maximum size of http response headers from Prometheus scrape targets
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 4096)
-promscrape.maxScrapeSize size
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
-promscrape.minResponseSizeForStreamParse size
The minimum target response size for automatic switching to stream parsing mode, which can reduce memory usage. See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 1000000)
-promscrape.noStaleMarkers
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
-promscrape.openstackSDCheckInterval duration
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s)
-promscrape.seriesLimitPerTarget int
Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info
-promscrape.streamParse
Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control
-promscrape.suppressDuplicateScrapeTargetErrors
@@ -703,15 +893,17 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.maxBlockSize size
The maximum size in bytes of unpacked request to send to remote storage. It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics
The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxRowsPerBlock
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 8388608)
-remoteWrite.maxDailySeries int
The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -remoteWrite.maxHourlySeries
The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
-remoteWrite.maxDiskUsagePerURL size
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. Disk usage is unlimited if the value is set to 0
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
-remoteWrite.maxHourlySeries int
The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See also -remoteWrite.maxDailySeries
The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
-remoteWrite.maxRowsPerBlock int
The maximum number of samples to send in each block to remote storage. Higher number may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxBlockSize (default 10000)
-remoteWrite.multitenantURL array
Base path for multitenant remote storage URL to write data to. See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url
Supports an array of values separated by comma or specified via multiple flags.

View File

@@ -0,0 +1,99 @@
package datadog
import (
"fmt"
"net/http"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="datadog"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="datadog"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="datadog"}`)
)
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
//
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
ce := req.Header.Get("Content-Encoding")
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
return insertRows(at, series, extraLabels)
})
})
}
func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range series {
ss := &series[i]
rowsTotal += len(ss.Points)
labelsLen := len(labels)
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: ss.Metric,
})
labels = append(labels, prompbmarshal.Label{
Name: "host",
Value: ss.Host,
})
for _, tag := range ss.Tags {
n := strings.IndexByte(tag, ':')
if n < 0 {
return fmt.Errorf("cannot find ':' in tag %q", tag)
}
name := tag[:n]
value := tag[n+1:]
if name == "host" {
name = "exported_host"
}
labels = append(labels, prompbmarshal.Label{
Name: name,
Value: value,
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
for _, pt := range ss.Points {
samples = append(samples, prompbmarshal.Sample{
Timestamp: pt.Timestamp(),
Value: pt.Value(),
})
}
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.PushWithAuthToken(at, &ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -21,8 +21,8 @@ import (
)
var (
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field")
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field")
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
)
@@ -35,9 +35,9 @@ var (
// InsertHandlerForReader processes remote write for influx line protocol.
//
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
func InsertHandlerForReader(r io.Reader) error {
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, false, "", "", func(db string, rows []parser.Row) error {
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
return insertRows(nil, db, rows, nil)
})
})

View File

@@ -3,6 +3,7 @@ package main
import (
"flag"
"fmt"
"io"
"net/http"
"os"
"strings"
@@ -10,6 +11,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
@@ -41,13 +43,14 @@ var (
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for InfluxDB line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write")
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
"Usually :4242 must be set. Doesn't work if empty")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . "+
"Unknown config entries are allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse")
@@ -93,7 +96,9 @@ func main() {
common.StartUnmarshalWorkers()
writeconcurrencylimiter.Init()
if len(*influxListenAddr) > 0 {
influxServer = influxserver.MustStart(*influxListenAddr, influx.InsertHandlerForReader)
influxServer = influxserver.MustStart(*influxListenAddr, func(r io.Reader) error {
return influx.InsertHandlerForReader(r, false)
})
}
if len(*graphiteListenAddr) > 0 {
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, graphite.InsertHandler)
@@ -155,7 +160,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
httpserver.WriteAPIHelp(w, [][2]string{
{"/targets", "discovered targets list"},
{"/api/v1/targets", "advanced information about discovered targets in JSON format"},
{"/config", "-promscrape.config contents"},
{"/metrics", "available service metrics"},
{"/flags", "command-line flags"},
{"/-/reload", "reload configuration"},
})
return true
@@ -221,10 +228,53 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
influxQueryRequests.Inc()
influxutils.WriteDatabaseNames(w)
return true
case "/datadog/api/v1/series":
datadogWriteRequests.Inc()
if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
datadogWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/datadog/api/v1/validate":
datadogValidateRequests.Inc()
// See https://docs.datadoghq.com/api/latest/authentication/#validate-api-key
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, `{"valid":true}`)
return true
case "/datadog/api/v1/check_run":
datadogCheckRunRequests.Inc()
// See https://docs.datadoghq.com/api/latest/service-checks/#submit-a-service-check
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/datadog/intake/":
datadogIntakeRequests.Inc()
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, `{}`)
return true
case "/targets":
promscrapeTargetsRequests.Inc()
promscrape.WriteHumanReadableTargetsStatus(w, r)
return true
case "/config":
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
return true
}
promscrapeConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
promscrape.WriteConfigData(w)
return true
case "/api/v1/targets":
promscrapeAPIV1TargetsRequests.Inc()
w.Header().Set("Content-Type", "application/json; charset=utf-8")
@@ -327,6 +377,35 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
influxQueryRequests.Inc()
influxutils.WriteDatabaseNames(w)
return true
case "datadog/api/v1/series":
datadogWriteRequests.Inc()
if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
datadogWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "datadog/api/v1/validate":
datadogValidateRequests.Inc()
// See https://docs.datadoghq.com/api/latest/authentication/#validate-api-key
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, `{"valid":true}`)
return true
case "datadog/api/v1/check_run":
datadogCheckRunRequests.Inc()
// See https://docs.datadoghq.com/api/latest/service-checks/#submit-a-service-check
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "datadog/intake/":
datadogIntakeRequests.Inc()
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, `{}`)
return true
default:
httpserver.Errorf(w, r, "unsupported multitenant path suffix: %q", p.Suffix)
return true
@@ -349,14 +428,23 @@ var (
nativeimportRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/import/native", protocol="nativeimport"}`)
nativeimportErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/import/native", protocol="nativeimport"}`)
influxWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/write", protocol="influx"}`)
influxWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/write", protocol="influx"}`)
influxWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/write", protocol="influx"}`)
influxWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/influx/write", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/query", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/influx/query", protocol="influx"}`)
datadogWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogValidateRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/validate", protocol="datadog"}`)
datadogCheckRunRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/api/v1/check_run", protocol="datadog"}`)
datadogIntakeRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/datadog/intake/", protocol="datadog"}`)
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
)

View File

@@ -1,6 +1,7 @@
package prometheusimport
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
@@ -38,6 +39,15 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
})
}
// InsertHandlerForReader processes metrics from given reader with optional gzip format
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, 0, isGzipped, func(rows []parser.Row) error {
return insertRows(nil, rows, nil)
}, nil)
})
}
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)

View File

@@ -1,6 +1,7 @@
package promremotewrite
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
@@ -29,12 +30,21 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(tss []prompb.TimeSeries) error {
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
return insertRows(at, tss, extraLabels)
})
})
}
// InsertHandlerForReader processes metrics from given reader
func InsertHandlerForReader(at *auth.Token, r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, func(tss []prompb.TimeSeries) error {
return insertRows(at, tss, nil)
})
})
}
func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)

View File

@@ -67,7 +67,8 @@ type client struct {
fq *persistentqueue.FastQueue
hc *http.Client
authCfg *promauth.Config
sendBlock func(block []byte) bool
authCfg *promauth.Config
rl rateLimiter
@@ -84,7 +85,7 @@ type client struct {
stopCh chan struct{}
}
func newClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
authCfg, err := getAuthConfig(argIdx)
if err != nil {
logger.Panicf("FATAL: cannot initialize auth config: %s", err)
@@ -104,11 +105,11 @@ func newClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqu
if !strings.Contains(pURL, "://") {
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: it must start with `http://`, `https://` or `socks5://`", pURL)
}
urlProxy, err := url.Parse(pURL)
pu, err := url.Parse(pURL)
if err != nil {
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: %s", pURL, err)
}
tr.Proxy = http.ProxyURL(urlProxy)
tr.Proxy = http.ProxyURL(pu)
}
c := &client{
sanitizedURL: sanitizedURL,
@@ -121,6 +122,11 @@ func newClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqu
},
stopCh: make(chan struct{}),
}
c.sendBlock = c.sendBlockHTTP
return c
}
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
if bytesPerSec := rateLimit.GetOptionalArgOrDefault(argIdx, 0); bytesPerSec > 0 {
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
c.rl.perSecondLimit = int64(bytesPerSec)
@@ -143,7 +149,6 @@ func newClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqu
}()
}
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
return c
}
func (c *client) MustStop() {
@@ -160,7 +165,7 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
if username != "" || password != "" || passwordFile != "" {
basicAuthCfg = &promauth.BasicAuthConfig{
Username: username,
Password: password,
Password: promauth.NewSecret(password),
PasswordFile: passwordFile,
}
}
@@ -174,7 +179,7 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
if clientSecretFile != "" || clientSecret != "" {
oauth2Cfg = &promauth.OAuth2Config{
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
ClientSecret: clientSecret,
ClientSecret: promauth.NewSecret(clientSecret),
ClientSecretFile: clientSecretFile,
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
@@ -237,9 +242,9 @@ func (c *client) runWorker() {
}
}
// sendBlock returns false only if c.stopCh is closed.
// sendBlockHTTP returns false only if c.stopCh is closed.
// Otherwise it tries sending the block to remote storage indefinitely.
func (c *client) sendBlock(block []byte) bool {
func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.register(len(block), c.stopCh)
retryDuration := time.Second
retriesCount := 0

View File

@@ -20,16 +20,10 @@ import (
var (
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
"This option takes effect only when less than 10K data points per second are pushed to -remoteWrite.url")
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum size in bytes of unpacked request to send to remote storage. "+
"It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics")
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxRowsPerBlock")
maxRowsPerBlock = flag.Int("remoteWrite.maxRowsPerBlock", 10000, "The maximum number of samples to send in each block to remote storage. Higher number may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxBlockSize")
)
// the maximum number of rows to send per each block.
const maxRowsPerBlock = 10000
// the maximum number of labels to send per each block.
const maxLabelsPerBlock = 10 * maxRowsPerBlock
type pendingSeries struct {
mu sync.Mutex
wr writeRequest
@@ -153,10 +147,13 @@ func (wr *writeRequest) adjustSampleValues() {
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
tssDst := wr.tss
maxSamplesPerBlock := *maxRowsPerBlock
// Allow up to 10x of labels per each block on average.
maxLabelsPerBlock := 10 * maxSamplesPerBlock
for i := range src {
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
if len(wr.samples) >= maxRowsPerBlock || len(wr.labels) >= maxLabelsPerBlock {
if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
wr.tss = tssDst
wr.flush()
tssDst = wr.tss

View File

@@ -3,6 +3,7 @@ package remotewrite
import (
"flag"
"fmt"
"net/url"
"strconv"
"sync"
"sync/atomic"
@@ -53,9 +54,9 @@ var (
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
`Enabled sorting for labels can slow down ingestion performance a bit`)
maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See also -remoteWrite.maxDailySeries")
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -remoteWrite.maxHourlySeries")
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
)
var (
@@ -170,28 +171,32 @@ func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
logger.Panicf("BUG: urls must be non-empty")
}
maxInmemoryBlocks := memory.Allowed() / len(urls) / maxRowsPerBlock / 100
if maxInmemoryBlocks > 400 {
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
if maxInmemoryBlocks / *queues > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 400
maxInmemoryBlocks = 100 * *queues
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
rwctxs := make([]*remoteWriteCtx, len(urls))
for i, remoteWriteURL := range urls {
for i, remoteWriteURLRaw := range urls {
remoteWriteURL, err := url.Parse(remoteWriteURLRaw)
if err != nil {
logger.Fatalf("invalid -remoteWrite.url=%q: %s", remoteWriteURL, err)
}
sanitizedURL := fmt.Sprintf("%d:secret-url", i+1)
if at != nil {
// Construct full remote_write url for the given tenant according to https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
remoteWriteURL = fmt.Sprintf("%s/insert/%d:%d/prometheus/api/v1/write", remoteWriteURL, at.AccountID, at.ProjectID)
remoteWriteURL.Path = fmt.Sprintf("%s/insert/%d:%d/prometheus/api/v1/write", remoteWriteURL.Path, at.AccountID, at.ProjectID)
sanitizedURL = fmt.Sprintf("%s:%d:%d", sanitizedURL, at.AccountID, at.ProjectID)
}
if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
}
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxs[i] = newRemoteWriteCtx(i, at, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
}
return rwctxs
}
@@ -218,6 +223,13 @@ func Stop() {
}
}
rwctxsMap = nil
if sl := hourlySeriesLimiter; sl != nil {
sl.MustStop()
}
if sl := dailySeriesLimiter; sl != nil {
sl.MustStop()
}
}
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
@@ -262,6 +274,9 @@ func PushWithAuthToken(at *auth.Token, wr *prompbmarshal.WriteRequest) {
rctx = getRelabelCtx()
}
tss := wr.Timeseries
maxSamplesPerBlock := *maxRowsPerBlock
// Allow up to 10x of labels per each block on average.
maxLabelsPerBlock := 10 * maxSamplesPerBlock
for len(tss) > 0 {
// Process big tss in smaller blocks in order to reduce the maximum memory usage
samplesCount := 0
@@ -271,7 +286,7 @@ func PushWithAuthToken(at *auth.Token, wr *prompbmarshal.WriteRequest) {
samplesCount += len(tss[i].Samples)
labelsCount += len(tss[i].Labels)
i++
if samplesCount >= maxRowsPerBlock || labelsCount >= maxLabelsPerBlock {
if samplesCount >= maxSamplesPerBlock || labelsCount >= maxLabelsPerBlock {
break
}
}
@@ -289,11 +304,7 @@ func PushWithAuthToken(at *auth.Token, wr *prompbmarshal.WriteRequest) {
}
sortLabelsIfNeeded(tssBlock)
tssBlock = limitSeriesCardinality(tssBlock)
if len(tssBlock) > 0 {
for _, rwctx := range rwctxs {
rwctx.Push(tssBlock)
}
}
pushBlockToRemoteStorages(rwctxs, tssBlock)
if rctx != nil {
rctx.reset()
}
@@ -303,6 +314,23 @@ func PushWithAuthToken(at *auth.Token, wr *prompbmarshal.WriteRequest) {
}
}
func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) {
if len(tssBlock) == 0 {
// Nothing to push
return
}
// Push block to remote storages in parallel in order to reduce the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
for _, rwctx := range rwctxs {
wg.Add(1)
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
rwctx.Push(tssBlock)
}(rwctx)
}
wg.Wait()
}
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
func sortLabelsIfNeeded(tss []prompbmarshal.TimeSeries) {
if !*sortLabels {
@@ -396,17 +424,29 @@ type remoteWriteCtx struct {
relabelMetricsDropped *metrics.Counter
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL string, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
h := xxhash.Sum64([]byte(remoteWriteURL))
path := fmt.Sprintf("%s/persistent-queue/%d_%016X", *tmpDataPath, argIdx+1, h)
fq := persistentqueue.MustOpenFastQueue(path, sanitizedURL, maxInmemoryBlocks, maxPendingBytesPerURL.N)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, path, sanitizedURL), func() float64 {
func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL
pqURL.RawQuery = ""
pqURL.Fragment = ""
h := xxhash.Sum64([]byte(pqURL.String()))
queuePath := fmt.Sprintf("%s/persistent-queue/%d_%016X", *tmpDataPath, argIdx+1, h)
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytesPerURL.N)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetPendingBytes())
})
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, path, sanitizedURL), func() float64 {
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetInmemoryQueueLen())
})
c := newClient(argIdx, remoteWriteURL, sanitizedURL, fq, *queues)
var c *client
switch remoteWriteURL.Scheme {
case "http", "https":
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
default:
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
}
c.init(argIdx, *queues, sanitizedURL)
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
pssLen := *queues
@@ -425,7 +465,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL string, maxInmemoryBlocks int,
c: c,
pss: pss,
relabelMetricsDropped: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, path, sanitizedURL)),
relabelMetricsDropped: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
}
}

View File

@@ -1,6 +1,7 @@
package vmimport
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
@@ -31,12 +32,22 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(rows []parser.Row) error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
})
}
// InsertHandlerForReader processes metrics from given reader
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, isGzipped, func(rows []parser.Row) error {
return insertRows(nil, rows, nil)
})
})
}
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)

View File

@@ -56,6 +56,7 @@ test-vmalert:
go test -v -race -cover ./app/vmalert/datasource
go test -v -race -cover ./app/vmalert/notifier
go test -v -race -cover ./app/vmalert/config
go test -v -race -cover ./app/vmalert/remotewrite
run-vmalert: vmalert
./bin/vmalert -rule=app/vmalert/config/testdata/rules2-good.rules \

View File

@@ -21,10 +21,9 @@ implementation and aims to be compatible with its syntax.
* `vmalert` execute queries against remote datasource which has reliability risks because of network.
It is recommended to configure alerts thresholds and rules expressions with understanding that network request
may fail;
* by default, rules execution is sequential within one group, but persisting of execution results to remote
storage is asynchronous. Hence, user shouldn't rely on recording rules chaining when result of previous
* by default, rules execution is sequential within one group, but persistence of execution results to remote
storage is asynchronous. Hence, user shouldn't rely on chaining of recording rules when result of previous
recording rule is reused in next one;
* `vmalert` has no UI, just an API for getting groups and rules statuses.
## QuickStart
@@ -56,7 +55,7 @@ Then configure `vmalert` accordingly:
-external.label=replica=a # Multiple external labels may be set
```
See the fill list of configuration flags in [configuration](#configuration) section.
See the full list of configuration flags in [configuration](#configuration) section.
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
@@ -86,7 +85,7 @@ name: <string>
[ concurrency: <integer> | default = 1 ]
# Optional type for expressions inside the rules. Supported values: "graphite" and "prometheus".
# By default "prometheus" rule type is used.
# By default "prometheus" type is used.
[ type: <string> ]
# Optional list of label filters applied to every rule's
@@ -95,6 +94,13 @@ name: <string>
extra_filter_labels:
[ <labelname>: <labelvalue> ... ]
# Optional list of labels added to every rule within a group.
# It has priority over the external labels.
# Labels are commonly used for adding environment
# or tenant-specific tag.
labels:
[ <labelname>: <labelvalue> ... ]
rules:
[ - <rule> ... ]
```
@@ -107,14 +113,14 @@ expression and then act according to the Rule type.
There are two types of Rules:
* [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) -
Alerting rules allows to define alert conditions via `expr` field and to send notifications
Alerting rules allow to define alert conditions via `expr` field and to send notifications to
[Alertmanager](https://github.com/prometheus/alertmanager) if execution result is not empty.
* [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) -
Recording rules allows to define `expr` which result will be than backfilled to configured
`-remoteWrite.url`. Recording rules are used to precompute frequently needed or computationally
Recording rules allow to define `expr` which result will be then backfilled to configured
`-remoteWrite.url`. Recording rules are used to precompute frequently needed or computationally
expensive expressions and save their result as a new set of time series.
`vmalert` forbids to define duplicates - rules with the same combination of name, expression and labels
`vmalert` forbids defining duplicates - rules with the same combination of name, expression and labels
within one group.
#### Alerting rules
@@ -124,12 +130,8 @@ The syntax for alerting rule is the following:
# The name of the alert. Must be a valid metric name.
alert: <string>
# Optional type for the rule. Supported values: "graphite", "prometheus".
# By default "prometheus" rule type is used.
[ type: <string> ]
# The expression to evaluate. The expression language depends on the type value.
# By default PromQL/MetricsQL expression is used. If type="graphite", then the expression
# By default PromQL/MetricsQL expression is used. If group.type="graphite", then the expression
# must contain valid Graphite expression.
expr: <string>
@@ -160,12 +162,8 @@ The syntax for recording rules is following:
# The name of the time series to output to. Must be a valid metric name.
record: <string>
# Optional type for the rule. Supported values: "graphite", "prometheus".
# By default "prometheus" rule type is used.
[ type: <string> ]
# The expression to evaluate. The expression language depends on the type value.
# By default MetricsQL expression is used. If type="graphite", then the expression
# By default MetricsQL expression is used. If group.type="graphite", then the expression
# must contain valid Graphite expression.
expr: <string>
@@ -184,31 +182,31 @@ the process alerts state will be lost. To avoid this situation, `vmalert` should
* `-remoteWrite.url` - URL to VictoriaMetrics (Single) or vminsert (Cluster). `vmalert` will persist alerts state
into the configured address in the form of time series named `ALERTS` and `ALERTS_FOR_STATE` via remote-write protocol.
These are regular time series and may be queried from VM just as any other time series.
The state stored to the configured address on every rule evaluation.
The state is stored to the configured address on every rule evaluation.
* `-remoteRead.url` - URL to VictoriaMetrics (Single) or vmselect (Cluster). `vmalert` will try to restore alerts state
from configured address by querying time series with name `ALERTS_FOR_STATE`.
Both flags are required for the proper state restoring. Restore process may fail if time series are missing
in configured `-remoteRead.url`, weren't updated in the last `1h` (controlled by `-remoteRead.lookback`)
Both flags are required for proper state restoring. Restore process may fail if time series are missing
in configured `-remoteRead.url`, weren't updated in the last `1h` (controlled by `-remoteRead.lookback`)
or received state doesn't match current `vmalert` rules configuration.
### Multitenancy
There are the following approaches for alerting and recording rules across
The following are the approaches for alerting and recording rules across
[multiple tenants](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy):
* To run a separate `vmalert` instance per each tenant.
The corresponding tenant must be specified in `-datasource.url` command-line flag
according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format).
* To run a separate `vmalert` instance per each tenant.
The corresponding tenant must be specified in `-datasource.url` command-line flag
according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format).
For example, `/path/to/vmalert -datasource.url=http://vmselect:8481/select/123/prometheus`
would run alerts against `AccountID=123`. For recording rules the `-remoteWrite.url` command-line
flag must contain the url for the specific tenant as well.
For example, `-remoteWrite.url=http://vminsert:8480/insert/123/prometheus` would write recording
would run alerts against `AccountID=123`. For recording rules the `-remoteWrite.url` command-line
flag must contain the url for the specific tenant as well.
For example, `-remoteWrite.url=http://vminsert:8480/insert/123/prometheus` would write recording
rules to `AccountID=123`.
* To specify `tenant` parameter per each alerting and recording group if
[enterprise version of vmalert](https://victoriametrics.com/enterprise.html) is used
* To specify `tenant` parameter per each alerting and recording group if
[enterprise version of vmalert](https://victoriametrics.com/enterprise.html) is used
with `-clusterMode` command-line flag. For example:
```yaml
@@ -224,18 +222,22 @@ groups:
# Rules for accountID=456, projectID=789
```
If `-clusterMode` is enabled, then `-datasource.url`, `-remoteRead.url` and `-remoteWrite.url` must
contain only the hostname without tenant id. For example: `-datasource.url=http://vmselect:8481`.
`vmselect` automatically adds the specified tenant to urls per each recording rule in this case.
If `-clusterMode` is enabled, then `-datasource.url`, `-remoteRead.url` and `-remoteWrite.url` must
contain only the hostname without tenant id. For example: `-datasource.url=http://vmselect:8481`.
`vmalert` automatically adds the specified tenant to urls per each recording rule in this case.
The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz` files
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) and in `*-enterprise`
If `-clusterMode` is enabled and the `tenant` in a particular group is missing, then the tenant value
is obtained from `-defaultTenant.prometheus` or `-defaultTenant.graphite` depending on the `type` of the group.
The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz` files
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) and in `*-enterprise`
tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags).
### WEB
### Web
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
* `http://<vmalert-addr>` - UI;
* `http://<vmalert-addr>/api/v1/groups` - list of all loaded groups and rules;
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
* `http://<vmalert-addr>/api/v1/<groupID>/<alertID>/status" ` - get alert status by ID.
@@ -255,7 +257,7 @@ to set `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL
## Rules backfilling
vmalert supports alerting and recording rules backfilling (aka `replay`). In replay mode vmalert
can read the same rules configuration as normally, evaluate them on the given time range and backfill
can read the same rules configuration as normal, evaluate them on the given time range and backfill
results via remote write to the configured storage. vmalert supports any PromQL/MetricsQL compatible
data source for backfilling.
@@ -301,9 +303,11 @@ max range per request: 8h20m0s
In `replay` mode all groups are executed sequentially one-by-one. Rules within the group are
executed sequentially as well (`concurrency` setting is ignored). Vmalert sends rule's expression
to [/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) endpoint
of the configured `-datasource.url`. Returned data then processed according to the rule type and
backfilled to `-remoteWrite.url` via [Remote Write protocol](https://prometheus.io/docs/prometheus/latest/storage/#remote-storage-integrations).
of the configured `-datasource.url`. Returned data is then processed according to the rule type and
backfilled to `-remoteWrite.url` via [remote Write protocol](https://prometheus.io/docs/prometheus/latest/storage/#remote-storage-integrations).
Vmalert respects `evaluationInterval` value set by flag or per-group during the replay.
Vmalert automatically disables caching on VictoriaMetrics side by sending `nocache=1` param. It allows
to prevent cache pollution and unwanted time range boundaries adjustment during backfilling.
#### Recording rules
@@ -340,6 +344,17 @@ See full description for these flags in `./vmalert --help`.
* `query` template function is disabled for performance reasons (might be changed in future);
## Monitoring
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus so that the exported
metrics may be analyzed later.
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview.
If you have suggestions for improvements or have found a bug - please open an issue on github or add
a review to the dashboard.
## Configuration
Pass `-help` to `vmalert` in order to see the full list of supported
@@ -351,8 +366,14 @@ The shortlist of configuration flags is the following:
Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.
-datasource.basicAuth.password string
Optional basic auth password for -datasource.url
-datasource.basicAuth.passwordFile string
Optional path to basic auth password to use for -datasource.url
-datasource.basicAuth.username string
Optional basic auth username for -datasource.url
-datasource.bearerToken string
Optional bearer auth token to use for -datasource.url.
-datasource.bearerTokenFile string
Optional path to bearer token file to use for -datasource.url.
-datasource.lookback duration
Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
-datasource.maxIdleConnections int
@@ -373,6 +394,8 @@ The shortlist of configuration flags is the following:
Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used
-datasource.url string
VictoriaMetrics or vmselect url. Required parameter. E.g. http://127.0.0.1:8428
-disableAlertgroupLabel
Whether to disable adding group's name as label to generated alerts and time series.
-dryRun -rule
Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.
-enableTCP6
@@ -460,8 +483,16 @@ The shortlist of configuration flags is the following:
Auth key for /debug/pprof. It overrides httpAuth settings
-remoteRead.basicAuth.password string
Optional basic auth password for -remoteRead.url
-remoteRead.basicAuth.passwordFile string
Optional path to basic auth password to use for -remoteRead.url
-remoteRead.basicAuth.username string
Optional basic auth username for -remoteRead.url
-remoteRead.bearerToken string
Optional bearer auth token to use for -remoteRead.url.
-remoteRead.bearerTokenFile string
Optional path to bearer token file to use for -remoteRead.url.
-remoteRead.disablePathAppend
Whether to disable automatic appending of '/api/v1/query' path to the configured -remoteRead.url.
-remoteRead.ignoreRestoreErrors
Whether to ignore errors from remote storage when restoring alerts state on startup. (default true)
-remoteRead.lookback duration
@@ -477,13 +508,21 @@ The shortlist of configuration flags is the following:
-remoteRead.tlsServerName string
Optional TLS server name to use for connections to -remoteRead.url. By default the server name from -remoteRead.url is used
-remoteRead.url vmalert
Optional URL to VictoriaMetrics or vmselect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
Optional URL to VictoriaMetrics or vmselect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428. See also -remoteRead.disablePathAppend
-remoteWrite.basicAuth.password string
Optional basic auth password for -remoteWrite.url
-remoteWrite.basicAuth.passwordFile string
Optional path to basic auth password to use for -remoteWrite.url
-remoteWrite.basicAuth.username string
Optional basic auth username for -remoteWrite.url
-remoteWrite.bearerToken string
Optional bearer auth token to use for -remoteWrite.url.
-remoteWrite.bearerTokenFile string
Optional path to bearer token file to use for -remoteWrite.url.
-remoteWrite.concurrency int
Defines number of writers for concurrent writing into remote querier (default 1)
-remoteWrite.disablePathAppend
Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.
-remoteWrite.flushInterval duration
Defines interval of flushes to remote write endpoint (default 5s)
-remoteWrite.maxBatchSize int
@@ -501,7 +540,7 @@ The shortlist of configuration flags is the following:
-remoteWrite.tlsServerName string
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used
-remoteWrite.url string
Optional URL to VictoriaMetrics or vminsert where to persist alerts state and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428
Optional URL to VictoriaMetrics or vminsert where to persist alerts state and recording rules results in form of timeseries. For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend
-replay.maxDatapointsPerQuery int
Max number of data points expected in one request. The higher the value, the less requests will be made during replay. (default 1000)
-replay.ruleRetryAttempts int
@@ -523,6 +562,8 @@ The shortlist of configuration flags is the following:
Supports an array of values separated by comma or specified via multiple flags.
-rule.configCheckInterval duration
Interval for checking for changes in '-rule' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes
-rule.maxResolveDuration duration
Limits the maximum duration for automatic alert expiration, which is by default equal to 3 evaluation intervals of the parent group.
-rule.validateExpressions
Whether to validate rules expressions via MetricsQL engine (default true)
-rule.validateTemplates

View File

@@ -58,7 +58,7 @@ type alertingRuleMetrics struct {
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
ar := &AlertingRule{
Type: cfg.Type,
Type: group.Type,
RuleID: cfg.ID,
Name: cfg.Alert,
Expr: cfg.Expr,
@@ -69,7 +69,7 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
GroupName: group.Name,
EvalInterval: group.Interval,
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: &cfg.Type,
DataSourceType: &group.Type,
EvaluationInterval: group.Interval,
ExtraLabels: group.ExtraFilterLabels,
}),
@@ -163,7 +163,13 @@ func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]
// so the hash key will be consistent on restore
s.SetLabel(k, v)
}
// set additional labels to identify group and rule name
if ar.Name != "" {
s.SetLabel(alertNameLabel, ar.Name)
}
if !*disableAlertGroupLabel && ar.GroupName != "" {
s.SetLabel(alertGroupNameLabel, ar.GroupName)
}
a, err := ar.newAlert(s, time.Time{}, qFn) // initial alert
if err != nil {
return nil, fmt.Errorf("failed to create alert: %s", err)
@@ -178,13 +184,11 @@ func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]
// if alert with For > 0
prevT := time.Time{}
//activeAt := time.Time{}
for i := range s.Values {
at := time.Unix(s.Timestamps[i], 0)
if at.Sub(prevT) > ar.EvalInterval {
// reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending
//activeAt = at
a.Start = at
} else if at.Sub(a.Start) >= ar.For {
a.State = notifier.StateFiring
@@ -231,6 +235,14 @@ func (ar *AlertingRule) Exec(ctx context.Context) ([]prompbmarshal.TimeSeries, e
// so the hash key will be consistent on restore
m.SetLabel(k, v)
}
// set additional labels to identify group and rule name
// set additional labels to identify group and rule name
if ar.Name != "" {
m.SetLabel(alertNameLabel, ar.Name)
}
if !*disableAlertGroupLabel && ar.GroupName != "" {
m.SetLabel(alertGroupNameLabel, ar.GroupName)
}
h := hash(m)
if _, ok := updated[h]; ok {
// duplicate may be caused by extra labels
@@ -352,11 +364,6 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time, qFn notif
Start: start,
Expr: ar.Expr,
}
// label defined here to make override possible by
// time series labels.
if ar.GroupName != "" {
a.Labels[alertGroupNameLabel] = ar.GroupName
}
for _, l := range m.Labels {
// drop __name__ to be consistent with Prometheus alerting
if l.Name == "__name__" {
@@ -415,10 +422,11 @@ func (ar *AlertingRule) AlertsAPI() []*APIAlert {
}
func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
return &APIAlert{
aa := &APIAlert{
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", a.ID),
GroupID: fmt.Sprintf("%d", a.GroupID),
RuleID: fmt.Sprintf("%d", ar.RuleID),
Name: a.Name,
Expression: ar.Expr,
@@ -426,8 +434,13 @@ func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
Annotations: a.Annotations,
State: a.State.String(),
ActiveAt: a.Start,
Value: strconv.FormatFloat(a.Value, 'e', -1, 64),
Restored: a.Restored,
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
}
if alertURLGeneratorFn != nil {
aa.SourceLink = alertURLGeneratorFn(a)
}
return aa
}
const (
@@ -442,43 +455,42 @@ const (
alertStateLabel = "alertstate"
// alertGroupNameLabel defines the label name attached for generated time series.
// attaching this label may be disabled via `-disableAlertgroupLabel` flag.
alertGroupNameLabel = "alertgroup"
)
// alertToTimeSeries converts the given alert with the given timestamp to timeseries
func (ar *AlertingRule) alertToTimeSeries(a *notifier.Alert, timestamp int64) []prompbmarshal.TimeSeries {
var tss []prompbmarshal.TimeSeries
tss = append(tss, alertToTimeSeries(ar.Name, a, timestamp))
tss = append(tss, alertToTimeSeries(a, timestamp))
if ar.For > 0 {
tss = append(tss, alertForToTimeSeries(ar.Name, a, timestamp))
tss = append(tss, alertForToTimeSeries(a, timestamp))
}
return tss
}
func alertToTimeSeries(name string, a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
func alertToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
labels := make(map[string]string)
for k, v := range a.Labels {
labels[k] = v
}
labels["__name__"] = alertMetricName
labels[alertNameLabel] = name
labels[alertStateLabel] = a.State.String()
return newTimeSeries([]float64{1}, []int64{timestamp}, labels)
}
// alertForToTimeSeries returns a timeseries that represents
// state of active alerts, where value is time when alert become active
func alertForToTimeSeries(name string, a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
labels := make(map[string]string)
for k, v := range a.Labels {
labels[k] = v
}
labels["__name__"] = alertForStateMetricName
labels[alertNameLabel] = name
return newTimeSeries([]float64{float64(a.Start.Unix())}, []int64{timestamp}, labels)
}
// Restore restores the state of active alerts basing on previously written timeseries.
// Restore restores the state of active alerts basing on previously written time series.
// Restore restores only Start field. Field State will be always Pending and supposed
// to be updated on next Exec, as well as Value field.
// Only rules with For > 0 will be restored.
@@ -506,23 +518,13 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
}
for _, m := range qMetrics {
labels := m.Labels
m.Labels = make([]datasource.Label, 0)
// drop all extra labels, so hash key will
// be identical to time series received in Exec
for _, l := range labels {
if l.Name == alertNameLabel || l.Name == alertGroupNameLabel {
continue
}
m.Labels = append(m.Labels, l)
}
a, err := ar.newAlert(m, time.Unix(int64(m.Values[0]), 0), qFn)
if err != nil {
return fmt.Errorf("failed to create alert: %w", err)
}
a.ID = hash(m)
a.State = notifier.StatePending
a.Restored = true
ar.alerts[a.ID] = a
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.Start)
}

View File

@@ -27,7 +27,6 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "instant",
}),
},
},
@@ -41,7 +40,6 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "instant extra labels",
"job": "foo",
"instance": "bar",
}),
@@ -57,7 +55,6 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "instant labels override",
}),
},
},
@@ -68,13 +65,11 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "for",
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
alertNameLabel: "for",
"__name__": alertForStateMetricName,
}),
},
},
@@ -85,13 +80,11 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StatePending.String(),
alertNameLabel: "for pending",
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
alertNameLabel: "for pending",
"__name__": alertForStateMetricName,
}),
},
},
@@ -109,23 +102,27 @@ func TestAlertingRule_ToTimeSeries(t *testing.T) {
func TestAlertingRule_Exec(t *testing.T) {
const defaultStep = 5 * time.Millisecond
type testAlert struct {
labels []string
alert *notifier.Alert
}
testCases := []struct {
rule *AlertingRule
steps [][]datasource.Metric
expAlerts map[uint64]*notifier.Alert
expAlerts []testAlert
}{
{
newTestAlertingRule("empty", 0),
[][]datasource.Metric{},
map[uint64]*notifier.Alert{},
nil,
},
{
newTestAlertingRule("empty labels", 0),
[][]datasource.Metric{
{datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}},
},
map[uint64]*notifier.Alert{
hash(datasource.Metric{}): {State: notifier.StateFiring},
[]testAlert{
{alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
@@ -133,8 +130,8 @@ func TestAlertingRule_Exec(t *testing.T) {
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
@@ -143,8 +140,8 @@ func TestAlertingRule_Exec(t *testing.T) {
{metricWithLabels(t, "name", "foo")},
{},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
@@ -154,8 +151,8 @@ func TestAlertingRule_Exec(t *testing.T) {
{},
{metricWithLabels(t, "name", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
@@ -166,8 +163,8 @@ func TestAlertingRule_Exec(t *testing.T) {
{metricWithLabels(t, "name", "foo")},
{},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
@@ -179,7 +176,7 @@ func TestAlertingRule_Exec(t *testing.T) {
{},
{},
},
map[uint64]*notifier.Alert{},
nil,
},
{
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
@@ -191,8 +188,8 @@ func TestAlertingRule_Exec(t *testing.T) {
{},
{metricWithLabels(t, "name", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
@@ -204,10 +201,10 @@ func TestAlertingRule_Exec(t *testing.T) {
metricWithLabels(t, "name", "foo2"),
},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateFiring},
hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
@@ -220,9 +217,9 @@ func TestAlertingRule_Exec(t *testing.T) {
// 1: fire first alert
// 2: fire second alert, set first inactive
// 3: fire third alert, set second inactive, delete first one
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateInactive},
hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
[]testAlert{
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
@@ -230,8 +227,8 @@ func TestAlertingRule_Exec(t *testing.T) {
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}},
},
},
{
@@ -240,8 +237,8 @@ func TestAlertingRule_Exec(t *testing.T) {
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
@@ -252,7 +249,7 @@ func TestAlertingRule_Exec(t *testing.T) {
// empty step to reset and delete pending alerts
{},
},
map[uint64]*notifier.Alert{},
nil,
},
{
newTestAlertingRule("for-pending=>firing=>inactive", defaultStep),
@@ -262,8 +259,8 @@ func TestAlertingRule_Exec(t *testing.T) {
// empty step to reset pending alerts
{},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
@@ -275,8 +272,8 @@ func TestAlertingRule_Exec(t *testing.T) {
{},
{metricWithLabels(t, "name", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}},
},
},
{
@@ -289,8 +286,8 @@ func TestAlertingRule_Exec(t *testing.T) {
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
}
@@ -312,7 +309,15 @@ func TestAlertingRule_Exec(t *testing.T) {
if len(tc.rule.alerts) != len(tc.expAlerts) {
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
}
for key, exp := range tc.expAlerts {
expAlerts := make(map[uint64]*notifier.Alert)
for _, ta := range tc.expAlerts {
labels := ta.labels
labels = append(labels, alertNameLabel)
labels = append(labels, tc.rule.Name)
h := hash(metricWithLabels(t, labels...))
expAlerts[h] = ta.alert
}
for key, exp := range expAlerts {
got, ok := tc.rule.alerts[key]
if !ok {
t.Fatalf("expected to have key %d", key)
@@ -468,6 +473,11 @@ func TestAlertingRule_ExecRange(t *testing.T) {
var j int
for _, series := range tc.data {
for _, timestamp := range series.Timestamps {
a := tc.expAlerts[j]
if a.Labels == nil {
a.Labels = make(map[string]string)
}
a.Labels[alertNameLabel] = tc.rule.Name
expTS = append(expTS, tc.rule.alertToTimeSeries(tc.expAlerts[j], timestamp)...)
j++
}
@@ -496,7 +506,6 @@ func TestAlertingRule_Restore(t *testing.T) {
[]datasource.Metric{
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
"__name__", alertForStateMetricName,
alertNameLabel, "",
),
},
map[uint64]*notifier.Alert{
@@ -509,7 +518,7 @@ func TestAlertingRule_Restore(t *testing.T) {
[]datasource.Metric{
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
"__name__", alertForStateMetricName,
alertNameLabel, "",
alertNameLabel, "metric labels",
alertGroupNameLabel, "groupID",
"foo", "bar",
"namespace", "baz",
@@ -517,6 +526,8 @@ func TestAlertingRule_Restore(t *testing.T) {
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t,
alertNameLabel, "metric labels",
alertGroupNameLabel, "groupID",
"foo", "bar",
"namespace", "baz",
)): {State: notifier.StatePending,
@@ -528,7 +539,6 @@ func TestAlertingRule_Restore(t *testing.T) {
[]datasource.Metric{
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
"__name__", alertForStateMetricName,
alertNameLabel, "",
"foo", "bar",
"namespace", "baz",
// extra labels set by rule
@@ -645,18 +655,20 @@ func TestAlertingRule_Template(t *testing.T) {
metricWithValueAndLabels(t, 1, "instance", "bar"),
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "region", "east", "instance", "foo")): {
hash(metricWithLabels(t, alertNameLabel, "common", "region", "east", "instance", "foo")): {
Annotations: map[string]string{},
Labels: map[string]string{
"region": "east",
"instance": "foo",
alertNameLabel: "common",
"region": "east",
"instance": "foo",
},
},
hash(metricWithLabels(t, "region", "east", "instance", "bar")): {
hash(metricWithLabels(t, alertNameLabel, "common", "region", "east", "instance", "bar")): {
Annotations: map[string]string{},
Labels: map[string]string{
"region": "east",
"instance": "bar",
alertNameLabel: "common",
"region": "east",
"instance": "bar",
},
},
},
@@ -679,20 +691,22 @@ func TestAlertingRule_Template(t *testing.T) {
metricWithValueAndLabels(t, 10, "instance", "bar"),
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "region", "east", "instance", "foo")): {
hash(metricWithLabels(t, alertNameLabel, "override label", "region", "east", "instance", "foo")): {
Labels: map[string]string{
"instance": "foo",
"region": "east",
alertNameLabel: "override label",
"instance": "foo",
"region": "east",
},
Annotations: map[string]string{
"summary": `Too high connection number for "foo" for region east`,
"description": `It is 2 connections for "foo"`,
},
},
hash(metricWithLabels(t, "region", "east", "instance", "bar")): {
hash(metricWithLabels(t, alertNameLabel, "override label", "region", "east", "instance", "bar")): {
Labels: map[string]string{
"instance": "bar",
"region": "east",
alertNameLabel: "override label",
"instance": "bar",
"region": "east",
},
Annotations: map[string]string{
"summary": `Too high connection number for "bar" for region east`,

View File

@@ -24,13 +24,16 @@ type Group struct {
Type datasource.Type `yaml:"type,omitempty"`
File string
Name string `yaml:"name"`
Interval utils.PromDuration `yaml:"interval,omitempty"`
Interval utils.PromDuration `yaml:"interval"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
// ExtraFilterLabels is a list label filters applied to every rule
// request withing a group. Is compatible only with VM datasources.
// See https://docs.victoriametrics.com#prometheus-querying-api-enhancements
ExtraFilterLabels map[string]string `yaml:"extra_filter_labels"`
// Labels is a set of label value pairs, that will be added to every rule.
// It has priority over the external labels.
Labels map[string]string `yaml:"labels"`
// Checksum stores the hash of yaml definition for this group.
// May be used to detect any changes like rules re-ordering etc.
Checksum string
@@ -53,14 +56,6 @@ func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
if g.Type.Get() == "" {
g.Type.Set(datasource.NewPrometheusType())
}
// update rules with empty type.
for i, r := range g.Rules {
if r.Type.Get() == "" {
r.Type.Set(g.Type)
r.ID = HashRule(r)
g.Rules[i] = r
}
}
h := md5.New()
h.Write(b)
@@ -73,9 +68,6 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
if g.Name == "" {
return fmt.Errorf("group name must be set")
}
if len(g.Rules) == 0 {
return fmt.Errorf("group %q can't contain no rules", g.Name)
}
uniqueRules := map[uint64]struct{}{}
for _, r := range g.Rules {
@@ -94,9 +86,6 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
// its needed only for tests.
// because correct types must be inherited after unmarshalling.
exprValidator := g.Type.ValidateExpr
if r.Type.Get() != "" {
exprValidator = r.Type.ValidateExpr
}
if err := exprValidator(r.Expr); err != nil {
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
}
@@ -117,7 +106,6 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
// recording rule or alerting rule.
type Rule struct {
ID uint64
Type datasource.Type `yaml:"type,omitempty"`
Record string `yaml:"record,omitempty"`
Alert string `yaml:"alert,omitempty"`
Expr string `yaml:"expr"`
@@ -159,7 +147,6 @@ func HashRule(r Rule) uint64 {
h.Write([]byte("alerting"))
h.Write([]byte(r.Alert))
}
h.Write([]byte(r.Type.Get()))
kv := sortMap(r.Labels)
for _, i := range kv {
h.Write([]byte(i.key))

View File

@@ -95,10 +95,6 @@ func TestGroup_Validate(t *testing.T) {
group: &Group{},
expErr: "group name must be set",
},
{
group: &Group{Name: "test"},
expErr: "contain no rules",
},
{
group: &Group{Name: "test",
Rules: []Rule{
@@ -267,7 +263,6 @@ func TestGroup_Validate(t *testing.T) {
},
{
Expr: "sum(up == 0 ) by (host)",
Type: datasource.NewPrometheusType(),
},
},
},
@@ -283,7 +278,6 @@ func TestGroup_Validate(t *testing.T) {
},
{
Expr: "sumSeries(time('foo.bar',10))",
Type: datasource.NewPrometheusType(),
},
},
},
@@ -436,7 +430,7 @@ rules:
`)
})
t.Run("Ok, `for` must change cs", func(t *testing.T) {
t.Run("`for` change", func(t *testing.T) {
f(t, `
name: TestGroup
rules:
@@ -450,5 +444,34 @@ rules:
expr: sum by(job) (up == 1)
`)
})
t.Run("`interval` change", func(t *testing.T) {
f(t, `
name: TestGroup
interval: 2s
rules:
- alert: ExampleAlertWithFor
expr: sum by(job) (up == 1)
`, `
name: TestGroup
interval: 4s
rules:
- alert: ExampleAlertWithFor
expr: sum by(job) (up == 1)
`)
})
t.Run("`concurrency` change", func(t *testing.T) {
f(t, `
name: TestGroup
concurrency: 2
rules:
- alert: ExampleAlertWithFor
expr: sum by(job) (up == 1)
`, `
name: TestGroup
concurrency: 16
rules:
- alert: ExampleAlertWithFor
expr: sum by(job) (up == 1)
`)
})
}

View File

@@ -1,13 +0,0 @@
groups:
- name: TestUpdateGroup
interval: 2s
concurrency: 2
type: prometheus
rules:
- alert: up
expr: up == 0
for: 30s
- alert: up graphite
expr: filterSeries(time('host.1',20),'>','0')
for: 30s
type: graphite

View File

@@ -1,12 +0,0 @@
groups:
- name: TestUpdateGroup
interval: 30s
type: graphite
rules:
- alert: up
expr: filterSeries(time('host.2',20),'>','0')
for: 30s
- alert: up graphite
expr: filterSeries(time('host.1',20),'>','0')
for: 30s
type: graphite

View File

@@ -1,5 +1,7 @@
groups:
- name: duplicatedGroupDiffFiles
labels:
dc: gcp
rules:
- alert: VMRows
for: 5m

View File

@@ -21,10 +21,3 @@ groups:
annotations:
summary: Too high connection number for {{$labels.instance}}
description: "It is {{ $value }} connections for {{$labels.instance}}"
- alert: HostDown
type: graphite
expr: filterSeries(sumSeries(host.receiver.interface.up),'last','=', 0)
for: 3m
annotations:
summary: Too high connection number for {{$labels.instance}}
description: "It is {{ $value }} connections for {{$labels.instance}}"

View File

@@ -0,0 +1,8 @@
groups:
- name: TestEmptyRules
interval: 2s
concurrency: 2
rules:
- name: TestNoRules
type: prometheus

View File

@@ -12,9 +12,12 @@ import (
var (
addr = flag.String("datasource.url", "", "VictoriaMetrics or vmselect url. Required parameter. "+
"E.g. http://127.0.0.1:8428")
appendTypePrefix = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.")
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
appendTypePrefix = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.")
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
basicAuthPasswordFile = flag.String("datasource.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -datasource.url")
bearerToken = flag.String("datasource.bearerToken", "", "Optional bearer auth token to use for -datasource.url.")
bearerTokenFile = flag.String("datasource.bearerTokenFile", "", "Optional path to bearer token file to use for -datasource.url.")
tlsInsecureSkipVerify = flag.Bool("datasource.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -datasource.url")
tlsCertFile = flag.String("datasource.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -datasource.url")
@@ -31,8 +34,15 @@ var (
`In VM "round_digits" limits the number of digits after the decimal point in response values.`)
)
// Param represents an HTTP GET param
type Param struct {
Key, Value string
}
// Init creates a Querier from provided flag values.
func Init() (QuerierBuilder, error) {
// Provided extraParams will be added as GET params to
// each request.
func Init(extraParams []Param) (QuerierBuilder, error) {
if *addr == "" {
return nil, fmt.Errorf("datasource.url is empty")
}
@@ -42,21 +52,30 @@ func Init() (QuerierBuilder, error) {
return nil, fmt.Errorf("failed to create transport: %w", err)
}
tr.MaxIdleConnsPerHost = *maxIdleConnections
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
}
var rd string
if *roundDigits > 0 {
rd = fmt.Sprintf("%d", *roundDigits)
extraParams = append(extraParams, Param{
Key: "round_digits",
Value: fmt.Sprintf("%d", *roundDigits),
})
}
authCfg, err := utils.AuthConfig(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile, *bearerToken, *bearerTokenFile)
if err != nil {
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
return &VMStorage{
c: &http.Client{Transport: tr},
basicAuthUser: *basicAuthUsername,
basicAuthPass: *basicAuthPassword,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(*addr, "/"),
appendTypePrefix: *appendTypePrefix,
lookBack: *lookBack,
queryStep: *queryStep,
roundDigits: rd,
dataSourceType: NewPrometheusType(),
extraParams: extraParams,
}, nil
}

View File

@@ -7,9 +7,6 @@ import (
"github.com/VictoriaMetrics/metricsql"
)
const graphiteType = "graphite"
const prometheusType = "prometheus"
// Type represents data source type
type Type struct {
name string
@@ -17,12 +14,16 @@ type Type struct {
// NewPrometheusType returns prometheus datasource type
func NewPrometheusType() Type {
return Type{name: prometheusType}
return Type{
name: "prometheus",
}
}
// NewGraphiteType returns graphite datasource type
func NewGraphiteType() Type {
return Type{name: graphiteType}
return Type{
name: "graphite",
}
}
// NewRawType returns datasource type from raw string
@@ -44,19 +45,19 @@ func (t *Type) Set(d Type) {
// String implements String interface with default value.
func (t Type) String() string {
if t.name == "" {
return prometheusType
return "prometheus"
}
return t.name
}
// ValidateExpr validates query expression with datasource ql.
func (t *Type) ValidateExpr(expr string) error {
switch t.name {
case graphiteType:
switch t.String() {
case "graphite":
if _, err := graphiteql.Parse(expr); err != nil {
return fmt.Errorf("bad graphite expr: %q, err: %w", expr, err)
}
case "", prometheusType:
case "prometheus":
if _, err := metricsql.Parse(expr); err != nil {
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
}
@@ -72,12 +73,13 @@ func (t *Type) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err := unmarshal(&s); err != nil {
return err
}
if s == "" {
s = "prometheus"
}
switch s {
case "":
s = prometheusType
case graphiteType, prometheusType:
case "graphite", "prometheus":
default:
return fmt.Errorf("unknown datasource type=%q, want %q or %q", s, prometheusType, graphiteType)
return fmt.Errorf("unknown datasource type=%q, want %q or %q", s, "prometheus", "graphite")
}
t.name = s
return nil

View File

@@ -7,35 +7,37 @@ import (
"net/http"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
// VMStorage represents vmstorage entity with ability to read and write metrics
type VMStorage struct {
c *http.Client
authCfg *promauth.Config
datasourceURL string
basicAuthUser string
basicAuthPass string
appendTypePrefix bool
lookBack time.Duration
queryStep time.Duration
roundDigits string
dataSourceType Type
evaluationInterval time.Duration
extraLabels []string
extraParams []Param
disablePathAppend bool
}
// Clone makes clone of VMStorage, shares http client.
func (s *VMStorage) Clone() *VMStorage {
return &VMStorage{
c: s.c,
datasourceURL: s.datasourceURL,
basicAuthUser: s.basicAuthUser,
basicAuthPass: s.basicAuthPass,
lookBack: s.lookBack,
queryStep: s.queryStep,
appendTypePrefix: s.appendTypePrefix,
dataSourceType: s.dataSourceType,
c: s.c,
authCfg: s.authCfg,
datasourceURL: s.datasourceURL,
lookBack: s.lookBack,
queryStep: s.queryStep,
appendTypePrefix: s.appendTypePrefix,
dataSourceType: s.dataSourceType,
disablePathAppend: s.disablePathAppend,
}
}
@@ -57,16 +59,16 @@ func (s *VMStorage) BuildWithParams(params QuerierParams) Querier {
}
// NewVMStorage is a constructor for VMStorage
func NewVMStorage(baseURL, basicAuthUser, basicAuthPass string, lookBack time.Duration, queryStep time.Duration, appendTypePrefix bool, c *http.Client) *VMStorage {
func NewVMStorage(baseURL string, authCfg *promauth.Config, lookBack time.Duration, queryStep time.Duration, appendTypePrefix bool, c *http.Client, disablePathAppend bool) *VMStorage {
return &VMStorage{
c: c,
basicAuthUser: basicAuthUser,
basicAuthPass: basicAuthPass,
datasourceURL: strings.TrimSuffix(baseURL, "/"),
appendTypePrefix: appendTypePrefix,
lookBack: lookBack,
queryStep: queryStep,
dataSourceType: NewPrometheusType(),
c: c,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(baseURL, "/"),
appendTypePrefix: appendTypePrefix,
lookBack: lookBack,
queryStep: queryStep,
dataSourceType: NewPrometheusType(),
disablePathAppend: disablePathAppend,
}
}
@@ -78,10 +80,10 @@ func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
}
ts := time.Now()
switch s.dataSourceType.name {
case "", prometheusType:
switch s.dataSourceType.String() {
case "prometheus":
s.setPrometheusInstantReqParams(req, query, ts)
case graphiteType:
case "graphite":
s.setGraphiteReqParams(req, query, ts)
default:
return nil, fmt.Errorf("engine not found: %q", s.dataSourceType.name)
@@ -96,7 +98,7 @@ func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
}()
parseFn := parsePrometheusResponse
if s.dataSourceType.name != prometheusType {
if s.dataSourceType.name != "prometheus" {
parseFn = parseGraphiteResponse
}
return parseFn(req, resp)
@@ -106,7 +108,7 @@ func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
// Graphite type isn't supported.
func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end time.Time) ([]Metric, error) {
if s.dataSourceType.name != prometheusType {
if s.dataSourceType.name != "prometheus" {
return nil, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType.name)
}
req, err := s.newRequestPOST()
@@ -133,12 +135,12 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
resp, err := s.c.Do(req.WithContext(ctx))
if err != nil {
return nil, fmt.Errorf("error getting response from %s: %w", req.URL, err)
return nil, fmt.Errorf("error getting response from %s: %w", req.URL.Redacted(), err)
}
if resp.StatusCode != http.StatusOK {
body, _ := ioutil.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL, body)
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL.Redacted(), body)
}
return resp, nil
}
@@ -149,8 +151,10 @@ func (s *VMStorage) newRequestPOST() (*http.Request, error) {
return nil, err
}
req.Header.Set("Content-Type", "application/json; charset=utf-8")
if s.basicAuthPass != "" {
req.SetBasicAuth(s.basicAuthUser, s.basicAuthPass)
if s.authCfg != nil {
if auth := s.authCfg.GetAuthHeader(); auth != "" {
req.Header.Set("Authorization", auth)
}
}
return req, nil
}

View File

@@ -38,7 +38,7 @@ func (r graphiteResponse) metrics() []Metric {
func parseGraphiteResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
r := &graphiteResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return nil, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL, err)
return nil, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
}
return r.metrics(), nil
}

View File

@@ -82,10 +82,10 @@ const (
func parsePrometheusResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
r := &promResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return nil, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL, err)
return nil, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL.Redacted(), err)
}
if r.Status == statusError {
return nil, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL, r.ErrorType, r.Error)
return nil, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
}
if r.Status != statusSuccess {
return nil, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
@@ -118,7 +118,9 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
if s.appendTypePrefix {
r.URL.Path += prometheusPrefix
}
r.URL.Path += prometheusInstantPath
if !s.disablePathAppend {
r.URL.Path += prometheusInstantPath
}
q := r.URL.Query()
if s.lookBack > 0 {
timestamp = timestamp.Add(-s.lookBack)
@@ -136,7 +138,9 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s
if s.appendTypePrefix {
r.URL.Path += prometheusPrefix
}
r.URL.Path += prometheusRangePath
if !s.disablePathAppend {
r.URL.Path += prometheusRangePath
}
q := r.URL.Query()
q.Add("start", fmt.Sprintf("%d", start.Unix()))
q.Add("end", fmt.Sprintf("%d", end.Unix()))
@@ -155,11 +159,11 @@ func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
// override step with user-specified value
q.Set("step", s.queryStep.String())
}
if s.roundDigits != "" {
q.Set("round_digits", s.roundDigits)
}
for _, l := range s.extraLabels {
q.Add("extra_label", l)
}
for _, p := range s.extraParams {
q.Add(p.Key, p.Value)
}
r.URL.RawQuery = q.Encode()
}

View File

@@ -10,14 +10,20 @@ import (
"strings"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
var (
ctx = context.Background()
basicAuthName = "foo"
basicAuthPass = "bar"
query = "vm_rows"
queryRender = "constantLine(10)"
baCfg = &promauth.BasicAuthConfig{
Username: basicAuthName,
Password: promauth.NewSecret(basicAuthPass),
}
query = "vm_rows"
queryRender = "constantLine(10)"
)
func TestVMInstantQuery(t *testing.T) {
@@ -73,7 +79,11 @@ func TestVMInstantQuery(t *testing.T) {
srv := httptest.NewServer(mux)
defer srv.Close()
s := NewVMStorage(srv.URL, basicAuthName, basicAuthPass, time.Minute, 0, false, srv.Client())
authCfg, err := promauth.NewConfig(".", nil, baCfg, "", "", nil, nil)
if err != nil {
t.Fatalf("unexpected: %s", err)
}
s := NewVMStorage(srv.URL, authCfg, time.Minute, 0, false, srv.Client(), false)
p := NewPrometheusType()
pq := s.BuildWithParams(QuerierParams{DataSourceType: &p, EvaluationInterval: 15 * time.Second})
@@ -179,12 +189,16 @@ func TestVMRangeQuery(t *testing.T) {
srv := httptest.NewServer(mux)
defer srv.Close()
s := NewVMStorage(srv.URL, basicAuthName, basicAuthPass, time.Minute, 0, false, srv.Client())
authCfg, err := promauth.NewConfig(".", nil, baCfg, "", "", nil, nil)
if err != nil {
t.Fatalf("unexpected: %s", err)
}
s := NewVMStorage(srv.URL, authCfg, time.Minute, 0, false, srv.Client(), false)
p := NewPrometheusType()
pq := s.BuildWithParams(QuerierParams{DataSourceType: &p, EvaluationInterval: 15 * time.Second})
_, err := pq.QueryRange(ctx, query, time.Now(), time.Time{})
_, err = pq.QueryRange(ctx, query, time.Now(), time.Time{})
expectError(t, err, "is missing")
_, err = pq.QueryRange(ctx, query, time.Time{}, time.Now())
@@ -216,6 +230,10 @@ func TestVMRangeQuery(t *testing.T) {
}
func TestRequestParams(t *testing.T) {
authCfg, err := promauth.NewConfig(".", nil, baCfg, "", "", nil, nil)
if err != nil {
t.Fatalf("unexpected: %s", err)
}
query := "up"
timestamp := time.Date(2001, 2, 3, 4, 5, 6, 0, time.UTC)
testCases := []struct {
@@ -234,6 +252,17 @@ func TestRequestParams(t *testing.T) {
checkEqualString(t, prometheusInstantPath, r.URL.Path)
},
},
{
"prometheus path with disablePathAppend",
false,
&VMStorage{
dataSourceType: NewPrometheusType(),
disablePathAppend: true,
},
func(t *testing.T, r *http.Request) {
checkEqualString(t, "", r.URL.Path)
},
},
{
"prometheus prefix",
false,
@@ -245,6 +274,18 @@ func TestRequestParams(t *testing.T) {
checkEqualString(t, prometheusPrefix+prometheusInstantPath, r.URL.Path)
},
},
{
"prometheus prefix with disablePathAppend",
false,
&VMStorage{
dataSourceType: NewPrometheusType(),
appendTypePrefix: true,
disablePathAppend: true,
},
func(t *testing.T, r *http.Request) {
checkEqualString(t, prometheusPrefix, r.URL.Path)
},
},
{
"prometheus range path",
true,
@@ -255,6 +296,17 @@ func TestRequestParams(t *testing.T) {
checkEqualString(t, prometheusRangePath, r.URL.Path)
},
},
{
"prometheus range path with disablePathAppend",
true,
&VMStorage{
dataSourceType: NewPrometheusType(),
disablePathAppend: true,
},
func(t *testing.T, r *http.Request) {
checkEqualString(t, "", r.URL.Path)
},
},
{
"prometheus range prefix",
true,
@@ -266,6 +318,18 @@ func TestRequestParams(t *testing.T) {
checkEqualString(t, prometheusPrefix+prometheusRangePath, r.URL.Path)
},
},
{
"prometheus range prefix with disablePathAppend",
true,
&VMStorage{
dataSourceType: NewPrometheusType(),
appendTypePrefix: true,
disablePathAppend: true,
},
func(t *testing.T, r *http.Request) {
checkEqualString(t, prometheusPrefix, r.URL.Path)
},
},
{
"graphite path",
false,
@@ -308,10 +372,7 @@ func TestRequestParams(t *testing.T) {
{
"basic auth",
false,
&VMStorage{
basicAuthUser: "foo",
basicAuthPass: "bar",
},
&VMStorage{authCfg: authCfg},
func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "foo", u)
@@ -321,10 +382,7 @@ func TestRequestParams(t *testing.T) {
{
"basic auth range",
true,
&VMStorage{
basicAuthUser: "foo",
basicAuthPass: "bar",
},
&VMStorage{authCfg: authCfg},
func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "foo", u)
@@ -385,7 +443,7 @@ func TestRequestParams(t *testing.T) {
"round digits",
false,
&VMStorage{
roundDigits: "10",
extraParams: []Param{{"round_digits", "10"}},
},
func(t *testing.T, r *http.Request) {
exp := fmt.Sprintf("query=%s&round_digits=10&time=%d", query, timestamp.Unix())
@@ -421,6 +479,20 @@ func TestRequestParams(t *testing.T) {
checkEqualString(t, exp, r.URL.RawQuery)
},
},
{
"extra params",
false,
&VMStorage{
extraParams: []Param{
{Key: "nocache", Value: "1"},
{Key: "max_lookback", Value: "1h"},
},
},
func(t *testing.T, r *http.Request) {
exp := fmt.Sprintf("max_lookback=1h&nocache=1&query=%s&time=%d", query, timestamp.Unix())
checkEqualString(t, exp, r.URL.RawQuery)
},
},
}
for _, tc := range testCases {
@@ -429,14 +501,14 @@ func TestRequestParams(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
switch tc.vm.dataSourceType.name {
case "", prometheusType:
switch tc.vm.dataSourceType.String() {
case "prometheus":
if tc.queryRange {
tc.vm.setPrometheusRangeReqParams(req, query, timestamp, timestamp)
} else {
tc.vm.setPrometheusInstantReqParams(req, query, timestamp)
}
case graphiteType:
case "graphite":
tc.vm.setGraphiteReqParams(req, query, timestamp)
}
tc.checkFn(t, req)

View File

@@ -18,15 +18,17 @@ import (
// Group is an entity for grouping rules
type Group struct {
mu sync.RWMutex
Name string
File string
Rules []Rule
Type datasource.Type
Interval time.Duration
Concurrency int
Checksum string
mu sync.RWMutex
Name string
File string
Rules []Rule
Type datasource.Type
Interval time.Duration
Concurrency int
Checksum string
ExtraFilterLabels map[string]string
Labels map[string]string
doneCh chan struct{}
finishedCh chan struct{}
@@ -50,6 +52,23 @@ func newGroupMetrics(name, file string) *groupMetrics {
return m
}
// merges group rule labels into result map
// set2 has priority over set1.
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
r := map[string]string{}
for k, v := range set1 {
r[k] = v
}
for k, v := range set2 {
if prevV, ok := r[k]; ok {
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
k, prevV, groupName, ruleName, k, v)
}
r[k] = v
}
return r
}
func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
g := &Group{
Type: cfg.Type,
@@ -59,6 +78,7 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
Concurrency: cfg.Concurrency,
Checksum: cfg.Checksum,
ExtraFilterLabels: cfg.ExtraFilterLabels,
Labels: cfg.Labels,
doneCh: make(chan struct{}),
finishedCh: make(chan struct{}),
@@ -73,17 +93,20 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
}
rules := make([]Rule, len(cfg.Rules))
for i, r := range cfg.Rules {
// override rule labels with external labels
for k, v := range labels {
if prevV, ok := r.Labels[k]; ok {
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
k, prevV, g.Name, r.Name(), k, v)
}
if r.Labels == nil {
r.Labels = map[string]string{}
}
r.Labels[k] = v
var extraLabels map[string]string
// apply external labels
if len(labels) > 0 {
extraLabels = labels
}
// apply group labels, it has priority on external labels
if len(cfg.Labels) > 0 {
extraLabels = mergeLabels(g.Name, r.Name(), extraLabels, g.Labels)
}
// apply rules labels, it has priority on other labels
if len(extraLabels) > 0 {
r.Labels = mergeLabels(g.Name, r.Name(), extraLabels, r.Labels)
}
rules[i] = g.newRule(qb, r)
}
g.Rules = rules
@@ -100,6 +123,9 @@ func (g *Group) newRule(qb datasource.QuerierBuilder, rule config.Rule) Rule {
// ID return unique group ID that consists of
// rules file and group name
func (g *Group) ID() uint64 {
g.mu.RLock()
defer g.mu.RUnlock()
hash := fnv.New64a()
hash.Write([]byte(g.File))
hash.Write([]byte("\xff"))
@@ -110,6 +136,7 @@ func (g *Group) ID() uint64 {
// Restore restores alerts state for group rules
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, lookback time.Duration, labels map[string]string) error {
labels = mergeLabels(g.Name, "", labels, g.Labels)
for _, rule := range g.Rules {
rr, ok := rule.(*AlertingRule)
if !ok {
@@ -166,20 +193,18 @@ func (g *Group) updateWith(newGroup *Group) error {
for _, nr := range rulesRegistry {
newRules = append(newRules, nr)
}
// note that g.Interval is not updated here
// so the value can be compared later in
// group.Start function
g.Type = newGroup.Type
g.Concurrency = newGroup.Concurrency
g.ExtraFilterLabels = newGroup.ExtraFilterLabels
g.Labels = newGroup.Labels
g.Checksum = newGroup.Checksum
g.Rules = newRules
return nil
}
var (
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
alertsSent = metrics.NewCounter(`vmalert_alerts_sent_total`)
alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
)
func (g *Group) close() {
if g.doneCh == nil {
return
@@ -201,7 +226,7 @@ func (g *Group) start(ctx context.Context, nts []notifier.Notifier, rw *remotewr
// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
if !skipRandSleepOnGroupStart {
randSleep := uint64(float64(g.Interval) * (float64(uint32(g.ID())) / (1 << 32)))
randSleep := uint64(float64(g.Interval) * (float64(g.ID()) / (1 << 64)))
sleepOffset := uint64(time.Now().UnixNano()) % uint64(g.Interval)
if randSleep < sleepOffset {
randSleep += uint64(g.Interval)
@@ -220,7 +245,16 @@ func (g *Group) start(ctx context.Context, nts []notifier.Notifier, rw *remotewr
}
logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
e := &executor{nts, rw}
e := &executor{rw: rw}
for _, nt := range nts {
ent := eNotifier{
Notifier: nt,
alertsSent: getOrCreateCounter(fmt.Sprintf("vmalert_alerts_sent_total{addr=%q}", nt.Addr())),
alertsSendErrors: getOrCreateCounter(fmt.Sprintf("vmalert_alerts_send_errors_total{addr=%q}", nt.Addr())),
}
e.notifiers = append(e.notifiers, ent)
}
t := time.NewTicker(g.Interval)
defer t.Stop()
for {
@@ -249,30 +283,48 @@ func (g *Group) start(ctx context.Context, nts []notifier.Notifier, rw *remotewr
case <-t.C:
g.metrics.iterationTotal.Inc()
iterationStart := time.Now()
errs := e.execConcurrently(ctx, g.Rules, g.Concurrency, g.Interval)
for err := range errs {
if err != nil {
logger.Errorf("group %q: %s", g.Name, err)
if len(g.Rules) > 0 {
resolveDuration := getResolveDuration(g.Interval)
errs := e.execConcurrently(ctx, g.Rules, g.Concurrency, resolveDuration)
for err := range errs {
if err != nil {
logger.Errorf("group %q: %s", g.Name, err)
}
}
}
g.metrics.iterationDuration.UpdateDuration(iterationStart)
}
}
}
// resolveDuration for alerts is equal to 3 interval evaluations
// so in case if vmalert stops sending updates for some reason,
// notifier could automatically resolve the alert.
func getResolveDuration(groupInterval time.Duration) time.Duration {
resolveInterval := groupInterval * 3
if *maxResolveDuration > 0 && (resolveInterval > *maxResolveDuration) {
return *maxResolveDuration
}
return resolveInterval
}
type executor struct {
notifiers []notifier.Notifier
notifiers []eNotifier
rw *remotewrite.Client
}
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurrency int, interval time.Duration) chan error {
type eNotifier struct {
notifier.Notifier
alertsSent *counter
alertsSendErrors *counter
}
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurrency int, resolveDuration time.Duration) chan error {
res := make(chan error, len(rules))
if concurrency == 1 {
// fast path
for _, rule := range rules {
res <- e.exec(ctx, rule, interval)
res <- e.exec(ctx, rule, resolveDuration)
}
close(res)
return res
@@ -285,7 +337,7 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurren
sem <- struct{}{}
wg.Add(1)
go func(r Rule) {
res <- e.exec(ctx, r, interval)
res <- e.exec(ctx, r, resolveDuration)
<-sem
wg.Done()
}(rule)
@@ -297,19 +349,16 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurren
}
var (
execTotal = metrics.NewCounter(`vmalert_execution_total`)
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
execTotal = metrics.NewCounter(`vmalert_execution_total`)
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
)
func (e *executor) exec(ctx context.Context, rule Rule, interval time.Duration) error {
func (e *executor) exec(ctx context.Context, rule Rule, resolveDuration time.Duration) error {
execTotal.Inc()
execStart := time.Now()
defer func() {
execDuration.UpdateDuration(execStart)
}()
tss, err := rule.Exec(ctx)
if err != nil {
@@ -334,10 +383,7 @@ func (e *executor) exec(ctx context.Context, rule Rule, interval time.Duration)
for _, a := range ar.alerts {
switch a.State {
case notifier.StateFiring:
// set End to execStart + 3 intervals
// so notifier can resolve it automatically if `vmalert`
// won't be able to send resolve for some reason
a.End = time.Now().Add(3 * interval)
a.End = time.Now().Add(resolveDuration)
alerts = append(alerts, *a)
case notifier.StateInactive:
// set End to execStart to notify
@@ -350,11 +396,11 @@ func (e *executor) exec(ctx context.Context, rule Rule, interval time.Duration)
return nil
}
alertsSent.Add(len(alerts))
errGr := new(utils.ErrGroup)
for _, nt := range e.notifiers {
nt.alertsSent.Add(len(alerts))
if err := nt.Send(ctx, alerts); err != nil {
alertsSendErrors.Inc()
nt.alertsSendErrors.Inc()
errGr.Add(fmt.Errorf("rule %q: failed to send alerts: %w", rule, err))
}
}

View File

@@ -2,12 +2,12 @@ package main
import (
"context"
"fmt"
"sort"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
)
@@ -107,17 +107,6 @@ func TestUpdateWith(t *testing.T) {
{Record: "foo5"},
},
},
{
"update datasource type",
[]config.Rule{
{Alert: "foo1", Type: datasource.NewPrometheusType()},
{Alert: "foo3", Type: datasource.NewGraphiteType()},
},
[]config.Rule{
{Alert: "foo1", Type: datasource.NewGraphiteType()},
{Alert: "foo10", Type: datasource.NewPrometheusType()},
},
},
}
for _, tc := range testCases {
@@ -191,7 +180,14 @@ func TestGroupStart(t *testing.T) {
// add rule labels - see config/testdata/rules1-good.rules
alert1.Labels["label"] = "bar"
alert1.Labels["host"] = inst1
alert1.ID = hash(m1)
// add service labels
alert1.Labels[alertNameLabel] = alert1.Name
alert1.Labels[alertGroupNameLabel] = g.Name
var labels1 []string
for k, v := range alert1.Labels {
labels1 = append(labels1, k, v)
}
alert1.ID = hash(metricWithLabels(t, labels1...))
alert2, err := r.newAlert(m2, time.Now(), nil)
if err != nil {
@@ -203,7 +199,14 @@ func TestGroupStart(t *testing.T) {
// add rule labels - see config/testdata/rules1-good.rules
alert2.Labels["label"] = "bar"
alert2.Labels["host"] = inst2
alert2.ID = hash(m2)
// add service labels
alert2.Labels[alertNameLabel] = alert2.Name
alert2.Labels[alertGroupNameLabel] = g.Name
var labels2 []string
for k, v := range alert2.Labels {
labels2 = append(labels2, k, v)
}
alert2.ID = hash(metricWithLabels(t, labels2...))
finished := make(chan struct{})
fs.add(m1)
@@ -235,3 +238,27 @@ func TestGroupStart(t *testing.T) {
g.close()
<-finished
}
func TestResolveDuration(t *testing.T) {
testCases := []struct {
groupInterval time.Duration
maxDuration time.Duration
expected time.Duration
}{
{time.Minute, 0, 3 * time.Minute},
{3 * time.Minute, 0, 9 * time.Minute},
{time.Minute, 2 * time.Minute, 2 * time.Minute},
{0, 0, 0},
}
defaultResolveDuration := *maxResolveDuration
defer func() { *maxResolveDuration = defaultResolveDuration }()
for _, tc := range testCases {
t.Run(fmt.Sprintf("%v-%v-%v", tc.groupInterval, tc.expected, tc.maxDuration), func(t *testing.T) {
*maxResolveDuration = tc.maxDuration
got := getResolveDuration(tc.groupInterval)
if got != tc.expected {
t.Errorf("expected to have %v; got %v", tc.expected, got)
}
})
}
}

View File

@@ -63,6 +63,7 @@ type fakeNotifier struct {
alerts []notifier.Alert
}
func (*fakeNotifier) Addr() string { return "" }
func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert) error {
fn.Lock()
defer fn.Unlock()
@@ -204,7 +205,8 @@ func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
}*/
}
if len(expTS.Labels) != len(gotTS.Labels) {
return fmt.Errorf("expected number of labels %d; got %d", len(expTS.Labels), len(gotTS.Labels))
return fmt.Errorf("expected number of labels %d (%v); got %d (%v)",
len(expTS.Labels), expTS.Labels, len(gotTS.Labels), gotTS.Labels)
}
for i, exp := range expTS.Labels {
got := gotTS.Labels[i]

View File

@@ -42,6 +42,8 @@ Rule files may contain %{ENV_VAR} placeholders, which are substituted by the cor
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
"which is by default equal to 3 evaluation intervals of the parent group.")
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used`)
@@ -52,9 +54,13 @@ eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup.")
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's name as label to generated alerts and time series.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The `-rule` flag must be specified.")
)
var alertURLGeneratorFn notifier.AlertURLGenerator
func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe.
flag.CommandLine.SetOutput(os.Stdout)
@@ -75,21 +81,31 @@ func main() {
}
return
}
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
if err != nil {
logger.Fatalf("failed to init `external.url`: %s", err)
}
notifier.InitTemplateFunc(eu)
alertURLGeneratorFn, err = getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
if err != nil {
logger.Fatalf("failed to init `external.alert.source`: %s", err)
}
if *replayFrom != "" || *replayTo != "" {
rw, err := remotewrite.Init(context.Background())
if err != nil {
logger.Fatalf("failed to init remoteWrite: %s", err)
}
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
if err != nil {
logger.Fatalf("failed to init `external.url`: %s", err)
}
notifier.InitTemplateFunc(eu)
groupsCfg, err := config.Parse(*rulePath, *validateTemplates, *validateExpressions)
if err != nil {
logger.Fatalf("cannot parse configuration file: %s", err)
}
q, err := datasource.Init()
// prevent queries from caching and boundaries aligning
// when querying VictoriaMetrics datasource.
noCache := datasource.Param{Key: "nocache", Value: "1"}
q, err := datasource.Init([]datasource.Param{noCache})
if err != nil {
logger.Fatalf("failed to init datasource: %s", err)
}
@@ -111,11 +127,16 @@ func main() {
logger.Fatalf("cannot parse configuration file: %s", err)
}
// Register SIGHUP handler for config re-read just before manager.start call.
// This guarantees that the config will be re-read if the signal arrives during manager.start call.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240
sighupCh := procutil.NewSighupChan()
if err := manager.start(ctx, groupsCfg); err != nil {
logger.Fatalf("failed to start: %s", err)
}
go configReload(ctx, manager, groupsCfg)
go configReload(ctx, manager, groupsCfg, sighupCh)
rh := &requestHandler{m: manager}
go httpserver.Serve(*httpListenAddr, rh.handler)
@@ -137,24 +158,14 @@ var (
)
func newManager(ctx context.Context) (*manager, error) {
q, err := datasource.Init()
q, err := datasource.Init(nil)
if err != nil {
return nil, fmt.Errorf("failed to init datasource: %w", err)
}
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
if err != nil {
return nil, fmt.Errorf("failed to init `external.url`: %w", err)
}
notifier.InitTemplateFunc(eu)
aug, err := getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
if err != nil {
return nil, fmt.Errorf("failed to init `external.alert.source`: %w", err)
}
nts, err := notifier.Init(aug)
nts, err := notifier.Init(alertURLGeneratorFn)
if err != nil {
return nil, fmt.Errorf("failed to init notifier: %w", err)
}
manager := &manager{
groups: make(map[uint64]*Group),
querierBuilder: q,
@@ -239,12 +250,7 @@ See the docs at https://docs.victoriametrics.com/vmalert.html .
flagutil.Usage(s)
}
func configReload(ctx context.Context, m *manager, groupsCfg []config.Group) {
// Register SIGHUP handler for config re-read just before manager.start call.
// This guarantees that the config will be re-read if the signal arrives during manager.start call.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240
sighupCh := procutil.NewSighupChan()
func configReload(ctx context.Context, m *manager, groupsCfg []config.Group, sighupCh <-chan os.Signal) {
var configCheckCh <-chan time.Time
if *rulesCheckInterval > 0 {
ticker := time.NewTicker(*rulesCheckInterval)
@@ -272,6 +278,9 @@ func configReload(ctx context.Context, m *manager, groupsCfg []config.Group) {
continue
}
if configsEqual(newGroupsCfg, groupsCfg) {
// set success to 1 since previous reload
// could have been unsuccessful
configSuccess.Set(1)
// config didn't change - skip it
continue
}

View File

@@ -94,14 +94,19 @@ groups:
*rulesCheckInterval = 200 * time.Millisecond
*rulePath = []string{f.Name()}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
m := &manager{
querierBuilder: &fakeQuerier{},
groups: make(map[uint64]*Group),
labels: map[string]string{},
}
go configReload(ctx, m, nil)
syncCh := make(chan struct{})
sighupCh := procutil.NewSighupChan()
go func() {
configReload(ctx, m, nil, sighupCh)
close(syncCh)
}()
lenLocked := func(m *manager) int {
m.groupsMu.RLock()
@@ -138,6 +143,9 @@ groups:
if groupsLen != 1 { // should remain unchanged
t.Fatalf("expected to have exactly 1 group loaded; got %d", groupsLen)
}
cancel()
<-syncCh
}
func writeToFile(t *testing.T, file, b string) {

View File

@@ -148,6 +148,7 @@ func (g *Group) toAPI() APIGroup {
Interval: g.Interval.String(),
Concurrency: g.Concurrency,
ExtraFilterLabels: g.ExtraFilterLabels,
Labels: g.Labels,
}
for _, r := range g.Rules {
switch v := r.(type) {

View File

@@ -113,18 +113,6 @@ func TestManagerUpdate(t *testing.T) {
Name: "ExampleAlertAlwaysFiring",
Expr: "sum by(job) (up == 1)",
}
ExampleAlertGraphite = &AlertingRule{
Name: "up graphite",
Expr: "filterSeries(time('host.1',20),'>','0')",
Type: datasource.NewGraphiteType(),
For: defaultEvalInterval,
}
ExampleAlertGraphite2 = &AlertingRule{
Name: "up",
Expr: "filterSeries(time('host.2',20),'>','0')",
Type: datasource.NewGraphiteType(),
For: defaultEvalInterval,
}
)
testCases := []struct {
@@ -148,7 +136,7 @@ func TestManagerUpdate(t *testing.T) {
Name: "VMRows",
Expr: "vm_rows > 0",
For: 5 * time.Minute,
Labels: map[string]string{"label": "bar"},
Labels: map[string]string{"dc": "gcp", "label": "bar"},
Annotations: map[string]string{
"summary": "{{ $value }}",
"description": "{{$labels}}",
@@ -226,23 +214,6 @@ func TestManagerUpdate(t *testing.T) {
},
},
},
{
name: "update prometheus to graphite type",
initPath: "config/testdata/dir/rules-update0-good.rules",
updatePath: "config/testdata/dir/rules-update1-good.rules",
want: []*Group{
{
File: "config/testdata/dir/rules-update1-good.rules",
Interval: defaultEvalInterval,
Type: datasource.NewGraphiteType(),
Name: "TestUpdateGroup",
Rules: []Rule{
ExampleAlertGraphite2,
ExampleAlertGraphite,
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {

View File

@@ -14,17 +14,28 @@ import (
// Alert the triggered alert
// TODO: Looks like alert name isn't unique
type Alert struct {
GroupID uint64
Name string
Labels map[string]string
// GroupID contains the ID of the parent rules group
GroupID uint64
// Name represents Alert name
Name string
// Labels is the list of label-value pairs attached to the Alert
Labels map[string]string
// Annotations is the list of annotations generated on Alert evaluation
Annotations map[string]string
State AlertState
Expr string
// State represents the current state of the Alert
State AlertState
// Expr contains expression that was executed to generate the Alert
Expr string
// Start defines the moment of time when Alert has triggered
Start time.Time
End time.Time
// End defines the moment of time when Alert supposed to expire
End time.Time
// Value stores the value returned from evaluating expression from Expr field
Value float64
ID uint64
// ID is the unique identifer for the Alert
ID uint64
// Restored is true if Alert was restored after restart
Restored bool
}
// AlertState type indicates the Alert state

View File

@@ -12,6 +12,7 @@ import (
// AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager
type AlertManager struct {
addr string
alertURL string
basicAuthUser string
basicAuthPass string
@@ -19,6 +20,9 @@ type AlertManager struct {
client *http.Client
}
// Addr returns address where alerts are sent.
func (am AlertManager) Addr() string { return am.addr }
// Send an alert or resolve message
func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
b := &bytes.Buffer{}
@@ -57,9 +61,10 @@ const alertManagerPath = "/api/v2/alerts"
// NewAlertManager is a constructor for AlertManager
func NewAlertManager(alertManagerURL, user, pass string, fn AlertURLGenerator, c *http.Client) *AlertManager {
addr := strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath
url := strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath
return &AlertManager{
alertURL: addr,
addr: alertManagerURL,
alertURL: url,
argFunc: fn,
client: c,
basicAuthUser: user,

View File

@@ -10,6 +10,14 @@ import (
"time"
)
func TestAlertManager_Addr(t *testing.T) {
const addr = "http://localhost"
am := NewAlertManager(addr, "", "", nil, nil)
if am.Addr() != addr {
t.Errorf("expected to have %q; got %q", addr, am.Addr())
}
}
func TestAlertManager_Send(t *testing.T) {
const baUser, baPass = "foo", "bar"
mux := http.NewServeMux()

View File

@@ -2,7 +2,12 @@ package notifier
import "context"
// Notifier is common interface for alert manager provider
// Notifier is a common interface for alert manager provider
type Notifier interface {
// Send sends the given list of alerts.
// Returns an error if fails to send the alerts.
// Must unblock if the given ctx is cancelled.
Send(ctx context.Context, alerts []Alert) error
// Addr returns address where alerts are sent.
Addr() string
}

View File

@@ -60,7 +60,7 @@ func (rr *RecordingRule) ID() uint64 {
func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
rr := &RecordingRule{
Type: cfg.Type,
Type: group.Type,
RuleID: cfg.ID,
Name: cfg.Record,
Expr: cfg.Expr,
@@ -68,7 +68,7 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
GroupID: group.ID(),
metrics: &recordingRuleMetrics{},
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: &cfg.Type,
DataSourceType: &group.Type,
EvaluationInterval: group.Interval,
ExtraLabels: group.ExtraFilterLabels,
}),

View File

@@ -12,9 +12,13 @@ import (
var (
addr = flag.String("remoteRead.url", "", "Optional URL to VictoriaMetrics or vmselect that will be used to restore alerts "+
"state. This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
"E.g. http://127.0.0.1:8428")
"E.g. http://127.0.0.1:8428. See also -remoteRead.disablePathAppend")
basicAuthUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
basicAuthPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
basicAuthPasswordFile = flag.String("remoteRead.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteRead.url")
bearerToken = flag.String("remoteRead.bearerToken", "", "Optional bearer auth token to use for -remoteRead.url.")
bearerTokenFile = flag.String("remoteRead.bearerTokenFile", "", "Optional path to bearer token file to use for -remoteRead.url.")
tlsInsecureSkipVerify = flag.Bool("remoteRead.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteRead.url")
tlsCertFile = flag.String("remoteRead.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url")
tlsKeyFile = flag.String("remoteRead.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url")
@@ -22,6 +26,7 @@ var (
"By default system CA is used")
tlsServerName = flag.String("remoteRead.tlsServerName", "", "Optional TLS server name to use for connections to -remoteRead.url. "+
"By default the server name from -remoteRead.url is used")
disablePathAppend = flag.Bool("remoteRead.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/query' path to the configured -remoteRead.url.")
)
// Init creates a Querier from provided flag values.
@@ -34,6 +39,10 @@ func Init() (datasource.QuerierBuilder, error) {
if err != nil {
return nil, fmt.Errorf("failed to create transport: %w", err)
}
authCfg, err := utils.AuthConfig(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile, *bearerToken, *bearerTokenFile)
if err != nil {
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
c := &http.Client{Transport: tr}
return datasource.NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, 0, 0, false, c), nil
return datasource.NewVMStorage(*addr, authCfg, 0, 0, false, c, *disablePathAppend), nil
}

View File

@@ -11,9 +11,13 @@ import (
var (
addr = flag.String("remoteWrite.url", "", "Optional URL to VictoriaMetrics or vminsert where to persist alerts state "+
"and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428")
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
"and recording rules results in form of timeseries. For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, "+
"then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend")
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
basicAuthPasswordFile = flag.String("remoteWrite.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteWrite.url")
bearerToken = flag.String("remoteWrite.bearerToken", "", "Optional bearer auth token to use for -remoteWrite.url.")
bearerTokenFile = flag.String("remoteWrite.bearerTokenFile", "", "Optional path to bearer token file to use for -remoteWrite.url.")
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines defines max number of timeseries to be flushed at once")
@@ -27,6 +31,7 @@ var (
"By default system CA is used")
tlsServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
"By default the server name from -remoteWrite.url is used")
disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
)
// Init creates Client object from given flags.
@@ -41,14 +46,19 @@ func Init(ctx context.Context) (*Client, error) {
return nil, fmt.Errorf("failed to create transport: %w", err)
}
authCfg, err := utils.AuthConfig(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile, *bearerToken, *bearerTokenFile)
if err != nil {
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
return NewClient(ctx, Config{
Addr: *addr,
Concurrency: *concurrency,
MaxQueueSize: *maxQueueSize,
MaxBatchSize: *maxBatchSize,
FlushInterval: *flushInterval,
BasicAuthUser: *basicAuthUsername,
BasicAuthPass: *basicAuthPassword,
Transport: t,
Addr: *addr,
AuthCfg: authCfg,
Concurrency: *concurrency,
MaxQueueSize: *maxQueueSize,
MaxBatchSize: *maxBatchSize,
FlushInterval: *flushInterval,
DisablePathAppend: *disablePathAppend,
Transport: t,
})
}

View File

@@ -6,26 +6,30 @@ import (
"fmt"
"io/ioutil"
"net/http"
"path"
"strings"
"sync"
"time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
)
// Client is an asynchronous HTTP client for writing
// timeseries via remote write protocol.
type Client struct {
addr string
c *http.Client
input chan prompbmarshal.TimeSeries
baUser, baPass string
flushInterval time.Duration
maxBatchSize int
maxQueueSize int
addr string
c *http.Client
authCfg *promauth.Config
input chan prompbmarshal.TimeSeries
flushInterval time.Duration
maxBatchSize int
maxQueueSize int
disablePathAppend bool
wg sync.WaitGroup
doneCh chan struct{}
@@ -34,10 +38,8 @@ type Client struct {
// Config is config for remote write.
type Config struct {
// Addr of remote storage
Addr string
BasicAuthUser string
BasicAuthPass string
Addr string
AuthCfg *promauth.Config
// Concurrency defines number of readers that
// concurrently read from the queue and flush data
@@ -56,6 +58,8 @@ type Config struct {
WriteTimeout time.Duration
// Transport will be used by the underlying http.Client
Transport *http.Transport
// DisablePathAppend can be used to not automatically append '/api/v1/write' to the remote write url
DisablePathAppend bool
}
const (
@@ -89,24 +93,25 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
if cfg.Transport == nil {
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
}
cc := defaultConcurrency
if cfg.Concurrency > 0 {
cc = cfg.Concurrency
}
c := &Client{
c: &http.Client{
Timeout: cfg.WriteTimeout,
Transport: cfg.Transport,
},
addr: strings.TrimSuffix(cfg.Addr, "/"),
baUser: cfg.BasicAuthUser,
baPass: cfg.BasicAuthPass,
flushInterval: cfg.FlushInterval,
maxBatchSize: cfg.MaxBatchSize,
maxQueueSize: cfg.MaxQueueSize,
doneCh: make(chan struct{}),
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
}
cc := defaultConcurrency
if cfg.Concurrency > 0 {
cc = cfg.Concurrency
addr: strings.TrimSuffix(cfg.Addr, "/"),
authCfg: cfg.AuthCfg,
flushInterval: cfg.FlushInterval,
maxBatchSize: cfg.MaxBatchSize,
maxQueueSize: cfg.MaxQueueSize,
doneCh: make(chan struct{}),
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
disablePathAppend: cfg.DisablePathAppend,
}
for i := 0; i < cc; i++ {
c.run(ctx)
}
@@ -179,10 +184,11 @@ func (c *Client) run(ctx context.Context) {
}
var (
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
)
// flush is a blocking function that marshals WriteRequest and sends
@@ -193,6 +199,7 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
return
}
defer prompbmarshal.ResetWriteRequest(wr)
defer bufferFlushDuration.UpdateDuration(time.Now())
data, err := wr.Marshal()
if err != nil {
@@ -228,20 +235,24 @@ func (c *Client) send(ctx context.Context, data []byte) error {
if err != nil {
return fmt.Errorf("failed to create new HTTP request: %w", err)
}
if c.baPass != "" {
req.SetBasicAuth(c.baUser, c.baPass)
if c.authCfg != nil {
if auth := c.authCfg.GetAuthHeader(); auth != "" {
req.Header.Set("Authorization", auth)
}
}
if !c.disablePathAppend {
req.URL.Path = path.Join(req.URL.Path, writePath)
}
req.URL.Path += writePath
resp, err := c.c.Do(req.WithContext(ctx))
if err != nil {
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
req.URL, err, len(data), r.Size())
req.URL.Redacted(), err, len(data), r.Size())
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusNoContent {
if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
body, _ := ioutil.ReadAll(resp.Body)
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL, body)
resp.StatusCode, req.URL.Redacted(), body)
}
return nil
}

View File

@@ -0,0 +1,36 @@
{% func Footer() %}
</main>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script>
<script type="text/javascript">
function expandAll() {
$('.collapse').addClass('show');
}
function collapseAll() {
$('.collapse').removeClass('show');
}
$(document).ready(function() {
// prevent collapse logic on link click
$(".group-heading a").click(function(e) {
e.stopPropagation();
});
$(".group-heading").click(function(e) {
let target = $(this).attr('data-bs-target');
let el = $('#'+target);
new bootstrap.Collapse(el, {
toggle: true
});
});
var hash = window.location.hash.substr(1);
let group = $('#'+hash);
if (group.length > 0) {
group.click();
}
});
</script>
</body>
</html>
{% endfunc %}

View File

@@ -0,0 +1,86 @@
// Code generated by qtc from "footer.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmalert/tpl/footer.qtpl:1
package tpl
//line app/vmalert/tpl/footer.qtpl:1
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmalert/tpl/footer.qtpl:1
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmalert/tpl/footer.qtpl:1
func StreamFooter(qw422016 *qt422016.Writer) {
//line app/vmalert/tpl/footer.qtpl:1
qw422016.N().S(`
</main>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script>
<script type="text/javascript">
function expandAll() {
$('.collapse').addClass('show');
}
function collapseAll() {
$('.collapse').removeClass('show');
}
$(document).ready(function() {
// prevent collapse logic on link click
$(".group-heading a").click(function(e) {
e.stopPropagation();
});
$(".group-heading").click(function(e) {
let target = $(this).attr('data-bs-target');
let el = $('#'+target);
new bootstrap.Collapse(el, {
toggle: true
});
});
var hash = window.location.hash.substr(1);
let group = $('#'+hash);
if (group.length > 0) {
group.click();
}
});
</script>
</body>
</html>
`)
//line app/vmalert/tpl/footer.qtpl:36
}
//line app/vmalert/tpl/footer.qtpl:36
func WriteFooter(qq422016 qtio422016.Writer) {
//line app/vmalert/tpl/footer.qtpl:36
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/tpl/footer.qtpl:36
StreamFooter(qw422016)
//line app/vmalert/tpl/footer.qtpl:36
qt422016.ReleaseWriter(qw422016)
//line app/vmalert/tpl/footer.qtpl:36
}
//line app/vmalert/tpl/footer.qtpl:36
func Footer() string {
//line app/vmalert/tpl/footer.qtpl:36
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/tpl/footer.qtpl:36
WriteFooter(qb422016)
//line app/vmalert/tpl/footer.qtpl:36
qs422016 := string(qb422016.B)
//line app/vmalert/tpl/footer.qtpl:36
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmalert/tpl/footer.qtpl:36
return qs422016
//line app/vmalert/tpl/footer.qtpl:36
}

View File

@@ -0,0 +1,43 @@
{% func Header(title string, pages []NavItem) %}
<!DOCTYPE html>
<html lang="en">
<head>
<title>vmalert{% if title != "" %} - {%s title %}{% endif %}</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<style>
body{
min-height: 75rem;
padding-top: 4.5rem;
}
pre {
overflow: scroll;
max-width: 600px;
min-height: 30px;
}
.group-heading {
cursor: pointer;
padding: 5px;
margin-top: 5px;
position: relative;
}
.group-heading .anchor {
position:absolute;
top:-60px;
}
.group-heading span {
float: right;
margin-left: 5px;
margin-right: 5px;
}
.group-heading:hover {
background-color: #f8f9fa!important;
}
.table .error-cell{
word-break: break-word;
}
</style>
</head>
<body>
{%= PrintNavItems(title, pages) %}
<main class="px-2">
{% endfunc %}

View File

@@ -0,0 +1,107 @@
// Code generated by qtc from "header.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmalert/tpl/header.qtpl:1
package tpl
//line app/vmalert/tpl/header.qtpl:1
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmalert/tpl/header.qtpl:1
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmalert/tpl/header.qtpl:1
func StreamHeader(qw422016 *qt422016.Writer, title string, pages []NavItem) {
//line app/vmalert/tpl/header.qtpl:1
qw422016.N().S(`
<!DOCTYPE html>
<html lang="en">
<head>
<title>vmalert`)
//line app/vmalert/tpl/header.qtpl:5
if title != "" {
//line app/vmalert/tpl/header.qtpl:5
qw422016.N().S(` - `)
//line app/vmalert/tpl/header.qtpl:5
qw422016.E().S(title)
//line app/vmalert/tpl/header.qtpl:5
}
//line app/vmalert/tpl/header.qtpl:5
qw422016.N().S(`</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<style>
body{
min-height: 75rem;
padding-top: 4.5rem;
}
pre {
overflow: scroll;
max-width: 600px;
min-height: 30px;
}
.group-heading {
cursor: pointer;
padding: 5px;
margin-top: 5px;
position: relative;
}
.group-heading .anchor {
position:absolute;
top:-60px;
}
.group-heading span {
float: right;
margin-left: 5px;
margin-right: 5px;
}
.group-heading:hover {
background-color: #f8f9fa!important;
}
.table .error-cell{
word-break: break-word;
}
</style>
</head>
<body>
`)
//line app/vmalert/tpl/header.qtpl:41
StreamPrintNavItems(qw422016, title, pages)
//line app/vmalert/tpl/header.qtpl:41
qw422016.N().S(`
<main class="px-2">
`)
//line app/vmalert/tpl/header.qtpl:43
}
//line app/vmalert/tpl/header.qtpl:43
func WriteHeader(qq422016 qtio422016.Writer, title string, pages []NavItem) {
//line app/vmalert/tpl/header.qtpl:43
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/tpl/header.qtpl:43
StreamHeader(qw422016, title, pages)
//line app/vmalert/tpl/header.qtpl:43
qt422016.ReleaseWriter(qw422016)
//line app/vmalert/tpl/header.qtpl:43
}
//line app/vmalert/tpl/header.qtpl:43
func Header(title string, pages []NavItem) string {
//line app/vmalert/tpl/header.qtpl:43
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/tpl/header.qtpl:43
WriteHeader(qb422016, title, pages)
//line app/vmalert/tpl/header.qtpl:43
qs422016 := string(qb422016.B)
//line app/vmalert/tpl/header.qtpl:43
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmalert/tpl/header.qtpl:43
return qs422016
//line app/vmalert/tpl/header.qtpl:43
}

25
app/vmalert/tpl/nav.qtpl Normal file
View File

@@ -0,0 +1,25 @@
{% code
type NavItem struct {
Name string
Url string
}
%}
{% func PrintNavItems(current string, items []NavItem) %}
<nav class="navbar navbar-expand-md navbar-dark fixed-top bg-dark">
<div class="container-fluid">
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav me-auto mb-2 mb-md-0">
{% for _, item := range items %}
<li class="nav-item">
<a class="nav-link{% if current == item.Name %} active{% endif %}" href="{%s item.Url %}">
{%s item.Name %}
</a>
</li>
{% endfor %}
</ul>
</div>
</nav>
{% endfunc %}

View File

@@ -0,0 +1,96 @@
// Code generated by qtc from "nav.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmalert/tpl/nav.qtpl:1
package tpl
//line app/vmalert/tpl/nav.qtpl:1
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmalert/tpl/nav.qtpl:1
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmalert/tpl/nav.qtpl:2
type NavItem struct {
Name string
Url string
}
//line app/vmalert/tpl/nav.qtpl:8
func StreamPrintNavItems(qw422016 *qt422016.Writer, current string, items []NavItem) {
//line app/vmalert/tpl/nav.qtpl:8
qw422016.N().S(`
<nav class="navbar navbar-expand-md navbar-dark fixed-top bg-dark">
<div class="container-fluid">
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav me-auto mb-2 mb-md-0">
`)
//line app/vmalert/tpl/nav.qtpl:13
for _, item := range items {
//line app/vmalert/tpl/nav.qtpl:13
qw422016.N().S(`
<li class="nav-item">
<a class="nav-link`)
//line app/vmalert/tpl/nav.qtpl:15
if current == item.Name {
//line app/vmalert/tpl/nav.qtpl:15
qw422016.N().S(` active`)
//line app/vmalert/tpl/nav.qtpl:15
}
//line app/vmalert/tpl/nav.qtpl:15
qw422016.N().S(`" href="`)
//line app/vmalert/tpl/nav.qtpl:15
qw422016.E().S(item.Url)
//line app/vmalert/tpl/nav.qtpl:15
qw422016.N().S(`">
`)
//line app/vmalert/tpl/nav.qtpl:16
qw422016.E().S(item.Name)
//line app/vmalert/tpl/nav.qtpl:16
qw422016.N().S(`
</a>
</li>
`)
//line app/vmalert/tpl/nav.qtpl:19
}
//line app/vmalert/tpl/nav.qtpl:19
qw422016.N().S(`
</ul>
</div>
</nav>
`)
//line app/vmalert/tpl/nav.qtpl:23
}
//line app/vmalert/tpl/nav.qtpl:23
func WritePrintNavItems(qq422016 qtio422016.Writer, current string, items []NavItem) {
//line app/vmalert/tpl/nav.qtpl:23
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/tpl/nav.qtpl:23
StreamPrintNavItems(qw422016, current, items)
//line app/vmalert/tpl/nav.qtpl:23
qt422016.ReleaseWriter(qw422016)
//line app/vmalert/tpl/nav.qtpl:23
}
//line app/vmalert/tpl/nav.qtpl:23
func PrintNavItems(current string, items []NavItem) string {
//line app/vmalert/tpl/nav.qtpl:23
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/tpl/nav.qtpl:23
WritePrintNavItems(qb422016, current, items)
//line app/vmalert/tpl/nav.qtpl:23
qs422016 := string(qb422016.B)
//line app/vmalert/tpl/nav.qtpl:23
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmalert/tpl/nav.qtpl:23
return qs422016
//line app/vmalert/tpl/nav.qtpl:23
}

18
app/vmalert/utils/auth.go Normal file
View File

@@ -0,0 +1,18 @@
package utils
import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
// AuthConfig returns promauth.Config based on the given params
func AuthConfig(baUser, baPass, baFile, bearerToken, bearerTokenFile string) (*promauth.Config, error) {
var baCfg *promauth.BasicAuthConfig
if baUser != "" || baPass != "" || baFile != "" {
baCfg = &promauth.BasicAuthConfig{
Username: baUser,
Password: promauth.NewSecret(baPass),
PasswordFile: baFile,
}
}
return promauth.NewConfig(".", nil, baCfg, bearerToken, bearerTokenFile, nil, nil)
}

View File

@@ -4,32 +4,63 @@ import (
"encoding/json"
"fmt"
"net/http"
"path"
"sort"
"strconv"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
)
var (
once = sync.Once{}
apiLinks [][2]string
navItems []tpl.NavItem
)
func initLinks() {
pathPrefix := httpserver.GetPathPrefix()
apiLinks = [][2]string{
{path.Join(pathPrefix, "api/v1/groups"), "list all loaded groups and rules"},
{path.Join(pathPrefix, "api/v1/alerts"), "list all active alerts"},
{path.Join(pathPrefix, "api/v1/groupID/alertID/status"), "get alert status by ID"},
{path.Join(pathPrefix, "flags"), "command-line flags"},
{path.Join(pathPrefix, "metrics"), "list of application metrics"},
{path.Join(pathPrefix, "-/reload"), "reload configuration"},
}
navItems = []tpl.NavItem{
{Name: "vmalert", Url: pathPrefix},
{Name: "Groups", Url: path.Join(pathPrefix, "groups")},
{Name: "Alerts", Url: path.Join(pathPrefix, "alerts")},
{Name: "Docs", Url: "https://docs.victoriametrics.com/vmalert.html"},
}
}
type requestHandler struct {
m *manager
}
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
once.Do(func() {
initLinks()
})
switch r.URL.Path {
case "/":
if r.Method != "GET" {
return false
}
httpserver.WriteAPIHelp(w, [][2]string{
{"/api/v1/groups", "list all loaded groups and rules"},
{"/api/v1/alerts", "list all active alerts"},
{"/api/v1/groupID/alertID/status", "get alert status by ID"},
{"/metrics", "list of application metrics"},
{"/-/reload", "reload configuration"},
})
WriteWelcome(w)
return true
case "/alerts":
WriteListAlerts(w, rh.groupAlerts())
return true
case "/groups":
WriteListGroups(w, rh.groups())
return true
case "/api/v1/groups":
data, err := rh.listGroups()
@@ -58,14 +89,26 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
if !strings.HasSuffix(r.URL.Path, "/status") {
return false
}
// /api/v1/<groupName>/<alertID>/status
data, err := rh.alert(r.URL.Path)
alert, err := rh.alertByPath(strings.TrimPrefix(r.URL.Path, "/api/v1/"))
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Write(data)
// /api/v1/<groupID>/<alertID>/status
if strings.HasPrefix(r.URL.Path, "/api/v1/") {
data, err := json.Marshal(alert)
if err != nil {
httpserver.Errorf(w, r, "failed to marshal alert: %s", err)
return true
}
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Write(data)
return true
}
// <groupID>/<alertID>/status
WriteAlert(w, alert)
return true
}
}
@@ -77,20 +120,25 @@ type listGroupsResponse struct {
Status string `json:"status"`
}
func (rh *requestHandler) listGroups() ([]byte, error) {
func (rh *requestHandler) groups() []APIGroup {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
lr := listGroupsResponse{Status: "success"}
var groups []APIGroup
for _, g := range rh.m.groups {
lr.Data.Groups = append(lr.Data.Groups, g.toAPI())
groups = append(groups, g.toAPI())
}
// sort list of alerts for deterministic output
sort.Slice(lr.Data.Groups, func(i, j int) bool {
return lr.Data.Groups[i].Name < lr.Data.Groups[j].Name
sort.Slice(groups, func(i, j int) bool {
return groups[i].Name < groups[j].Name
})
return groups
}
func (rh *requestHandler) listGroups() ([]byte, error) {
lr := listGroupsResponse{Status: "success"}
lr.Data.Groups = rh.groups()
b, err := json.Marshal(lr)
if err != nil {
return nil, &httpserver.ErrorWithStatusCode{
@@ -108,6 +156,30 @@ type listAlertsResponse struct {
Status string `json:"status"`
}
func (rh *requestHandler) groupAlerts() []GroupAlerts {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
var groupAlerts []GroupAlerts
for _, g := range rh.m.groups {
var alerts []*APIAlert
for _, r := range g.Rules {
a, ok := r.(*AlertingRule)
if !ok {
continue
}
alerts = append(alerts, a.AlertsAPI()...)
}
if len(alerts) > 0 {
groupAlerts = append(groupAlerts, GroupAlerts{
Group: g.toAPI(),
Alerts: alerts,
})
}
}
return groupAlerts
}
func (rh *requestHandler) listAlerts() ([]byte, error) {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
@@ -138,18 +210,17 @@ func (rh *requestHandler) listAlerts() ([]byte, error) {
return b, nil
}
func (rh *requestHandler) alert(path string) ([]byte, error) {
func (rh *requestHandler) alertByPath(path string) (*APIAlert, error) {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
parts := strings.SplitN(strings.TrimPrefix(path, "/api/v1/"), "/", 3)
parts := strings.SplitN(strings.TrimLeft(path, "/"), "/", 3)
if len(parts) != 3 {
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`path %q cointains /status suffix but doesn't match pattern "/group/alert/status"`, path),
Err: fmt.Errorf(`path %q cointains /status suffix but doesn't match pattern "/groupID/alertID/status"`, path),
StatusCode: http.StatusBadRequest,
}
}
groupID, err := uint64FromPath(parts[0])
if err != nil {
return nil, badRequest(fmt.Errorf(`cannot parse groupID: %w`, err))
@@ -162,7 +233,7 @@ func (rh *requestHandler) alert(path string) ([]byte, error) {
if err != nil {
return nil, errResponse(err, http.StatusNotFound)
}
return json.Marshal(resp)
return resp, nil
}
func uint64FromPath(path string) (uint64, error) {

305
app/vmalert/web.qtpl Normal file
View File

@@ -0,0 +1,305 @@
{% package main %}
{% import (
"time"
"sort"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
) %}
{% func Welcome() %}
{%= tpl.Header("vmalert", navItems) %}
<p>
API:<br>
{% for _, p := range apiLinks %}
{%code
p, doc := p[0], p[1]
%}
<a href="{%s p %}">{%s p %}</a> - {%s doc %}<br/>
{% endfor %}
</p>
{%= tpl.Footer() %}
{% endfunc %}
{% func ListGroups(groups []APIGroup) %}
{%= tpl.Header("Groups", navItems) %}
{% if len(groups) > 0 %}
{%code
rOk := make(map[string]int)
rNotOk := make(map[string]int)
for _, g := range groups {
for _, r := range g.AlertingRules{
if r.LastError != "" {
rNotOk[g.Name]++
} else {
rOk[g.Name]++
}
}
for _, r := range g.RecordingRules{
if r.LastError != "" {
rNotOk[g.Name]++
} else {
rOk[g.Name]++
}
}
}
%}
<a class="btn btn-primary" role="button" onclick="collapseAll()">Collapse All</a>
<a class="btn btn-primary" role="button" onclick="expandAll()">Expand All</a>
{% for _, g := range groups %}
<div class="group-heading{% if rNotOk[g.Name] > 0 %} alert-danger{% endif %}" data-bs-target="rules-{%s g.ID %}">
<span class="anchor" id="group-{%s g.ID %}"></span>
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%s g.Interval %})</a>
{% if rNotOk[g.Name] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d rNotOk[g.Name] %}</span> {% endif %}
<span class="badge bg-success" title="Number of rules withs status Ok">{%d rOk[g.Name] %}</span>
<p class="fs-6 fw-lighter">{%s g.File %}</p>
{% if len(g.ExtraFilterLabels) > 0 %}
<div class="fs-6 fw-lighter">Extra filter labels
{% for k, v := range g.ExtraFilterLabels %}
<span class="float-left badge bg-primary">{%s k %}={%s v %}</span>
{% endfor %}
</div>
{% endif %}
</div>
<div class="collapse" id="rules-{%s g.ID %}">
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col">Rule</th>
<th scope="col" title="Shows if rule's execution ended with error">Error</th>
<th scope="col" title="How many samples were produced by the rule">Samples</th>
<th scope="col" title="How many seconds ago rule was executed">Updated</th>
</tr>
</thead>
<tbody>
{% for _, ar := range g.AlertingRules %}
<tr{% if ar.LastError != "" %} class="alert-danger"{% endif %}>
<td>
<b>alert:</b> {%s ar.Name %} (for: {%v ar.For %})<br>
<code><pre>{%s ar.Expression %}</pre></code><br>
{% if len(ar.Labels) > 0 %} <b>Labels:</b>{% endif %}
{% for k, v := range ar.Labels %}
<span class="ms-1 badge bg-primary">{%s k %}={%s v %}</span>
{% endfor %}
</td>
<td><div class="error-cell">{%s ar.LastError %}</div></td>
<td>{%d ar.LastSamples %}</td>
<td>{%f.3 time.Since(ar.LastExec).Seconds() %}s ago</td>
</tr>
{% endfor %}
{% for _, rr := range g.RecordingRules %}
<tr>
<td>
<b>record:</b> {%s rr.Name %}<br>
<code><pre>{%s rr.Expression %}</pre></code>
{% if len(rr.Labels) > 0 %} <b>Labels:</b>{% endif %}
{% for k, v := range rr.Labels %}
<span class="ms-1 badge bg-primary">{%s k %}={%s v %}</span>
{% endfor %}
</td>
<td><div class="error-cell">{%s rr.LastError %}</div></td>
<td>{%d rr.LastSamples %}</td>
<td>{%f.3 time.Since(rr.LastExec).Seconds() %}s ago</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endfor %}
{% else %}
<div>
<p>No items...</p>
</div>
{% endif %}
{%= tpl.Footer() %}
{% endfunc %}
{% func ListAlerts(groupAlerts []GroupAlerts) %}
{%= tpl.Header("Alerts", navItems) %}
{% if len(groupAlerts) > 0 %}
<a class="btn btn-primary" role="button" onclick="collapseAll()">Collapse All</a>
<a class="btn btn-primary" role="button" onclick="expandAll()">Expand All</a>
{% for _, ga := range groupAlerts %}
{%code g := ga.Group %}
<div class="group-heading alert-danger" data-bs-target="rules-{%s g.ID %}">
<span class="anchor" id="group-{%s g.ID %}"></span>
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
<br>
<p class="fs-6 fw-lighter">{%s g.File %}</p>
</div>
{%code
var keys []string
alertsByRule := make(map[string][]*APIAlert)
for _, alert := range ga.Alerts {
if len(alertsByRule[alert.RuleID]) < 1 {
keys = append(keys, alert.RuleID)
}
alertsByRule[alert.RuleID] = append(alertsByRule[alert.RuleID], alert)
}
sort.Strings(keys)
%}
<div class="collapse" id="rules-{%s g.ID %}">
{% for _, ruleID := range keys %}
{%code
defaultAR := alertsByRule[ruleID][0]
var labelKeys []string
for k := range defaultAR.Labels {
labelKeys = append(labelKeys, k)
}
sort.Strings(labelKeys)
%}
<br>
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
<br>
<b>expr:</b><code><pre>{%s defaultAR.Expression %}</pre></code>
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col">Labels</th>
<th scope="col">State</th>
<th scope="col">Active at</th>
<th scope="col">Value</th>
<th scope="col">Link</th>
</tr>
</thead>
<tbody>
{% for _, ar := range alertsByRule[ruleID] %}
<tr>
<td>
{% for _, k := range labelKeys %}
<span class="ms-1 badge bg-primary">{%s k %}={%s ar.Labels[k] %}</span>
{% endfor %}
</td>
<td>{%= badgeState(ar.State) %}</td>
<td>
{%s ar.ActiveAt.Format("2006-01-02T15:04:05Z07:00") %}
{% if ar.Restored %}{%= badgeRestored() %}{% endif %}
</td>
<td>{%s ar.Value %}</td>
<td>
<a href="/{%s g.ID %}/{%s ar.ID %}/status">Details</a>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endfor %}
</div>
<br>
{% endfor %}
{% else %}
<div>
<p>No items...</p>
</div>
{% endif %}
{%= tpl.Footer() %}
{% endfunc %}
{% func Alert(alert *APIAlert) %}
{%= tpl.Header("", navItems) %}
{%code
var labelKeys []string
for k := range alert.Labels {
labelKeys = append(labelKeys, k)
}
sort.Strings(labelKeys)
var annotationKeys []string
for k := range alert.Annotations {
annotationKeys = append(annotationKeys, k)
}
sort.Strings(annotationKeys)
%}
<div class="display-6 pb-3 mb-3">{%s alert.Name %}<span class="ms-2 badge {% if alert.State=="firing" %}bg-danger{% else %} bg-warning text-dark{% endif %}">{%s alert.State %}</span></div>
<div class="container border-bottom p-2">
<div class="row">
<div class="col-2">
Active at
</div>
<div class="col">
{%s alert.ActiveAt.Format("2006-01-02T15:04:05Z07:00") %}
</div>
</div>
</div>
<div class="container border-bottom p-2">
<div class="row">
<div class="col-2">
Expr
</div>
<div class="col">
<code><pre>{%s alert.Expression %}</pre></code>
</div>
</div>
</div>
<div class="container border-bottom p-2">
<div class="row">
<div class="col-2">
Labels
</div>
<div class="col">
{% for _, k := range labelKeys %}
<span class="m-1 badge bg-primary">{%s k %}={%s alert.Labels[k] %}</span>
{% endfor %}
</div>
</div>
</div>
<div class="container border-bottom p-2">
<div class="row">
<div class="col-2">
Annotations
</div>
<div class="col">
{% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br>
<p>{%s alert.Annotations[k] %}</p>
{% endfor %}
</div>
</div>
</div>
<div class="container border-bottom p-2">
<div class="row">
<div class="col-2">
Group
</div>
<div class="col">
<a target="_blank" href="/groups#group-{%s alert.GroupID %}">{%s alert.GroupID %}</a>
</div>
</div>
</div>
<div class="container border-bottom p-2">
<div class="row">
<div class="col-2">
Source link
</div>
<div class="col">
<a target="_blank" href="{%s alert.SourceLink %}">Link</a>
</div>
</div>
</div>
{%= tpl.Footer() %}
{% endfunc %}
{% func badgeState(state string) %}
{%code
badgeClass := "bg-warning text-dark"
if state == "firing" {
badgeClass = "bg-danger"
}
%}
<span class="badge {%s badgeClass %}">{%s state %}</span>
{% endfunc %}
{% func badgeRestored() %}
<span class="badge bg-warning text-dark" title="Alert state was restored after the service restart from remote storage">restored</span>
{% endfunc %}

1018
app/vmalert/web.qtpl.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,7 @@ import (
type APIAlert struct {
ID string `json:"id"`
Name string `json:"name"`
RuleID string `json:"rule_id"`
GroupID string `json:"group_id"`
Expression string `json:"expression"`
State string `json:"state"`
@@ -16,6 +17,8 @@ type APIAlert struct {
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
SourceLink string `json:"source"`
Restored bool `json:"restored"`
}
// APIGroup represents Group for WEB view
@@ -27,6 +30,7 @@ type APIGroup struct {
Interval string `json:"interval"`
Concurrency int `json:"concurrency"`
ExtraFilterLabels map[string]string `json:"extra_filter_labels"`
Labels map[string]string `json:"labels,omitempty"`
AlertingRules []APIAlertingRule `json:"alerting_rules"`
RecordingRules []APIRecordingRule `json:"recording_rules"`
}
@@ -58,3 +62,9 @@ type APIRecordingRule struct {
LastExec time.Time `json:"last_exec"`
Labels map[string]string `json:"labels"`
}
// GroupAlerts represents a group of alerts for WEB view
type GroupAlerts struct {
Group APIGroup
Alerts []*APIAlert
}

View File

@@ -37,9 +37,8 @@ Each `url_prefix` in the [-auth.config](#auth-config) may contain either a singl
`-auth.config` is represented in the following simple `yml` format:
```yml
# Arbitrary number of usernames may be put here.
# Usernames must be unique.
# Username and bearer_token values must be unique.
users:
# Requests with the 'Authorization: Bearer XXXX' header are proxied to http://localhost:8428 .
@@ -47,6 +46,14 @@ users:
- bearer_token: "XXXX"
url_prefix: "http://localhost:8428"
# Requests with the 'Authorization: Bearer YYY' header are proxied to http://localhost:8428 ,
# The `X-Scope-OrgID: foobar` http header is added to every proxied request.
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
- bearer_token: "YYY"
url_prefix: "http://localhost:8428"
headers:
- "X-Scope-OrgID: foobar"
# The user for querying local single-node VictoriaMetrics.
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
# will be proxied to http://localhost:8428 .
@@ -89,7 +96,6 @@ users:
- "http://vminsert1:8480/insert/42/prometheus"
- "http://vminsert2:8480/insert/42/prometheus"
# A single user for querying and inserting data:
# - Requests to http://vmauth:8427/api/v1/query, http://vmauth:8427/api/v1/query_range
# and http://vmauth:8427/api/v1/label/<label_name>/values are proxied to the following urls in a round-robin manner:
@@ -97,7 +103,8 @@ users:
# - http://vmselect2:8481/select/42/prometheus
# For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect1:8480/select/42/prometheus/api/v1/query
# or to http://vmselect2:8480/select/42/prometheus/api/v1/query .
# - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write
# - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write .
# The "X-Scope-OrgID: abc" http header is added to these requests.
- username: "foobar"
url_map:
- src_paths:
@@ -109,6 +116,8 @@ users:
- "http://vmselect2:8481/select/42/prometheus"
- src_paths: ["/api/v1/write"]
url_prefix: "http://vminsert:8480/insert/42/prometheus"
headers:
- "X-Scope-OrgID: abc"
```
The config may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
@@ -230,6 +239,8 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
-httpListenAddr string
TCP address to listen for http connections (default ":8427")
-logInvalidAuthTokens
Whether to log requests with invalid auth tokens. Such requests are always counted at vmagent_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
-loggerDisableTimestamps
Whether to disable writing timestamps in logs
-loggerErrorsPerSecondLimit int

View File

@@ -27,24 +27,53 @@ var (
// AuthConfig represents auth config.
type AuthConfig struct {
Users []UserInfo `yaml:"users"`
Users []UserInfo `yaml:"users,omitempty"`
}
// UserInfo is user information read from authConfigPath
type UserInfo struct {
BearerToken string `yaml:"bearer_token"`
Username string `yaml:"username"`
Password string `yaml:"password"`
URLPrefix *URLPrefix `yaml:"url_prefix"`
URLMap []URLMap `yaml:"url_map"`
BearerToken string `yaml:"bearer_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
URLMap []URLMap `yaml:"url_map,omitempty"`
Headers []Header `yaml:"headers,omitempty"`
requests *metrics.Counter
}
// Header is `Name: Value` http header, which must be added to the proxied request.
type Header struct {
Name string
Value string
}
// UnmarshalYAML unmarshals h from f.
func (h *Header) UnmarshalYAML(f func(interface{}) error) error {
var s string
if err := f(&s); err != nil {
return err
}
n := strings.IndexByte(s, ':')
if n < 0 {
return fmt.Errorf("missing speparator char ':' between Name and Value in the header %q; expected format - 'Name: Value'", s)
}
h.Name = strings.TrimSpace(s[:n])
h.Value = strings.TrimSpace(s[n+1:])
return nil
}
// MarshalYAML marshals h to yaml.
func (h *Header) MarshalYAML() (interface{}, error) {
s := fmt.Sprintf("%s: %s", h.Name, h.Value)
return s, nil
}
// URLMap is a mapping from source paths to target urls.
type URLMap struct {
SrcPaths []*SrcPath `yaml:"src_paths"`
URLPrefix *URLPrefix `yaml:"url_prefix"`
SrcPaths []*SrcPath `yaml:"src_paths,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
Headers []Header `yaml:"headers,omitempty"`
}
// SrcPath represents an src path

View File

@@ -69,6 +69,14 @@ users:
- [foo]
`)
// Invalid headers
f(`
users:
- username: foo
url_prefix: http://foo.bar
headers: foobar
`)
// empty url_prefix
f(`
users:
@@ -156,6 +164,27 @@ users:
- src_paths: ['fo[obar']
url_prefix: http://foobar
`)
// Invalid headers in url_map (missing ':')
f(`
users:
- username: a
url_map:
- src_paths: ['/foobar']
url_prefix: http://foobar
headers:
- foobar
`)
// Invalid headers in url_map (dictionary instead of array)
f(`
users:
- username: a
url_map:
- src_paths: ['/foobar']
url_prefix: http://foobar
headers:
aaa: bbb
`)
}
func TestParseAuthConfigSuccess(t *testing.T) {
@@ -231,6 +260,9 @@ users:
url_prefix: http://vmselect/select/0/prometheus
- src_paths: ["/api/v1/write"]
url_prefix: ["http://vminsert1/insert/0/prometheus","http://vminsert2/insert/0/prometheus"]
headers:
- "foo: bar"
- "xxx: y"
`, map[string]*UserInfo{
getAuthToken("foo", "", ""): {
BearerToken: "foo",
@@ -245,6 +277,16 @@ users:
"http://vminsert1/insert/0/prometheus",
"http://vminsert2/insert/0/prometheus",
}),
Headers: []Header{
{
Name: "foo",
Value: "bar",
},
{
Name: "xxx",
Value: "y",
},
},
},
},
},

View File

@@ -1,5 +1,5 @@
# Arbitrary number of usernames may be put here.
# Usernames must be unique.
# Username and bearer_token values must be unique.
users:
# Requests with the 'Authorization: Bearer XXXX' header are proxied to http://localhost:8428 .
@@ -7,6 +7,14 @@ users:
- bearer_token: "XXXX"
url_prefix: "http://localhost:8428"
# Requests with the 'Authorization: Bearer YYY' header are proxied to http://localhost:8428 ,
# The `X-Scope-OrgID: foobar` http header is added to every proxied request.
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
- bearer_token: "YYY"
url_prefix: "http://localhost:8428"
headers:
- "X-Scope-OrgID: foobar"
# The user for querying local single-node VictoriaMetrics.
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
# will be proxied to http://localhost:8428 .
@@ -49,7 +57,6 @@ users:
- "http://vminsert1:8480/insert/42/prometheus"
- "http://vminsert2:8480/insert/42/prometheus"
# A single user for querying and inserting data:
# - Requests to http://vmauth:8427/api/v1/query, http://vmauth:8427/api/v1/query_range
# and http://vmauth:8427/api/v1/label/<label_name>/values are proxied to the following urls in a round-robin manner:
@@ -57,7 +64,8 @@ users:
# - http://vmselect2:8481/select/42/prometheus
# For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect1:8480/select/42/prometheus/api/v1/query
# or to http://vmselect2:8480/select/42/prometheus/api/v1/query .
# - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write
# - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write .
# The "X-Scope-OrgID: abc" http header is added to these requests.
- username: "foobar"
url_map:
- src_paths:
@@ -69,3 +77,5 @@ users:
- "http://vmselect2:8481/select/42/prometheus"
- src_paths: ["/api/v1/write"]
url_prefix: "http://vminsert:8480/insert/42/prometheus"
headers:
- "X-Scope-OrgID: abc"

View File

@@ -2,6 +2,7 @@ package main
import (
"flag"
"fmt"
"net/http"
"net/http/httputil"
"net/url"
@@ -21,6 +22,8 @@ var (
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections")
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
reloadAuthKey = flag.String("reloadAuthKey", "", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
)
func main() {
@@ -71,16 +74,25 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
ac := authConfig.Load().(map[string]*UserInfo)
ui := ac[authToken]
if ui == nil {
httpserver.Errorf(w, r, "cannot find the provided auth token %q in config", authToken)
invalidAuthTokenRequests.Inc()
if *logInvalidAuthTokens {
httpserver.Errorf(w, r, "cannot find the provided auth token %q in config", authToken)
} else {
errStr := fmt.Sprintf("cannot find the provided auth token %q in config", authToken)
http.Error(w, errStr, http.StatusBadRequest)
}
return true
}
ui.requests.Inc()
targetURL, err := createTargetURL(ui, r.URL)
targetURL, headers, err := createTargetURL(ui, r.URL)
if err != nil {
httpserver.Errorf(w, r, "cannot determine targetURL: %s", err)
return true
}
r.Header.Set("vm-target-url", targetURL.String())
for _, h := range headers {
r.Header.Set(h.Name, h.Value)
}
proxyRequest(w, r)
return true
}
@@ -99,7 +111,11 @@ func proxyRequest(w http.ResponseWriter, r *http.Request) {
reverseProxy.ServeHTTP(w, r)
}
var configReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
var (
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
)
var reverseProxy = &httputil.ReverseProxy{
Director: func(r *http.Request) {
@@ -117,6 +133,9 @@ var reverseProxy = &httputil.ReverseProxy{
// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
tr.ForceAttemptHTTP2 = false
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
}
return tr
}(),
FlushInterval: time.Second,

View File

@@ -35,22 +35,24 @@ func mergeURLs(uiURL, requestURI *url.URL) *url.URL {
return &targetURL
}
func createTargetURL(ui *UserInfo, uOrig *url.URL) (*url.URL, error) {
func createTargetURL(ui *UserInfo, uOrig *url.URL) (*url.URL, []Header, error) {
u := *uOrig
// Prevent from attacks with using `..` in r.URL.Path
u.Path = path.Clean(u.Path)
if !strings.HasPrefix(u.Path, "/") {
u.Path = "/" + u.Path
}
u.Path = strings.TrimSuffix(u.Path, "/")
for _, e := range ui.URLMap {
for _, sp := range e.SrcPaths {
if sp.match(u.Path) {
return e.URLPrefix.mergeURLs(&u), nil
return e.URLPrefix.mergeURLs(&u), e.Headers, nil
}
}
}
if ui.URLPrefix != nil {
return ui.URLPrefix.mergeURLs(&u), nil
return ui.URLPrefix.mergeURLs(&u), ui.Headers, nil
}
return nil, fmt.Errorf("missing route for %q", u.String())
missingRouteRequests.Inc()
return nil, nil, fmt.Errorf("missing route for %q", u.String())
}

View File

@@ -1,44 +1,56 @@
package main
import (
"fmt"
"net/url"
"testing"
)
func TestCreateTargetURLSuccess(t *testing.T) {
f := func(ui *UserInfo, requestURI, expectedTarget string) {
f := func(ui *UserInfo, requestURI, expectedTarget, expectedHeaders string) {
t.Helper()
u, err := url.Parse(requestURI)
if err != nil {
t.Fatalf("cannot parse %q: %s", requestURI, err)
}
target, err := createTargetURL(ui, u)
target, headers, err := createTargetURL(ui, u)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if target.String() != expectedTarget {
t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
}
headersStr := fmt.Sprintf("%q", headers)
if headersStr != expectedHeaders {
t.Fatalf("unexpected headers; got %s; want %s", headersStr, expectedHeaders)
}
}
// Simple routing with `url_prefix`
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar"),
}, "", "http://foo.bar/.")
}, "", "http://foo.bar/.", "[]")
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar"),
}, "/", "http://foo.bar/")
Headers: []Header{{
Name: "bb",
Value: "aaa",
}},
}, "/", "http://foo.bar", `[{"bb" "aaa"}]`)
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar/federate"),
}, "/", "http://foo.bar/federate", "[]")
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar"),
}, "a/b?c=d", "http://foo.bar/a/b?c=d")
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]")
f(&UserInfo{
URLPrefix: mustParseURL("https://sss:3894/x/y"),
}, "/z", "https://sss:3894/x/y/z")
}, "/z", "https://sss:3894/x/y/z", "[]")
f(&UserInfo{
URLPrefix: mustParseURL("https://sss:3894/x/y"),
}, "/../../aaa", "https://sss:3894/x/y/aaa")
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]")
f(&UserInfo{
URLPrefix: mustParseURL("https://sss:3894/x/y"),
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd")
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]")
// Complex routing with `url_map`
ui := &UserInfo{
@@ -46,6 +58,16 @@ func TestCreateTargetURLSuccess(t *testing.T) {
{
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
Headers: []Header{
{
Name: "xx",
Value: "aa",
},
{
Name: "yy",
Value: "asdf",
},
},
},
{
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
@@ -53,10 +75,14 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
},
URLPrefix: mustParseURL("http://default-server"),
Headers: []Header{{
Name: "bb",
Value: "aaa",
}},
}
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up")
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write")
f(ui, "/api/v1/query_range", "http://default-server/api/v1/query_range")
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`)
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]")
f(ui, "/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`)
// Complex routing regexp paths in `url_map`
ui = &UserInfo{
@@ -72,17 +98,17 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
URLPrefix: mustParseURL("http://default-server"),
}
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up")
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up")
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values")
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write")
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar")
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]")
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]")
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]")
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]")
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]")
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"),
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev")
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]")
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"),
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile")
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]")
}
@@ -93,13 +119,16 @@ func TestCreateTargetURLFailure(t *testing.T) {
if err != nil {
t.Fatalf("cannot parse %q: %s", requestURI, err)
}
target, err := createTargetURL(ui, u)
target, headers, err := createTargetURL(ui, u)
if err == nil {
t.Fatalf("expecting non-nil error")
}
if target != nil {
t.Fatalf("unexpected target=%q; want empty string", target)
}
if headers != nil {
t.Fatalf("unexpected headers=%q; want empty string", headers)
}
}
f(&UserInfo{}, "/foo/bar")
f(&UserInfo{

View File

@@ -4,13 +4,13 @@
Supported storage systems for backups:
* [GCS](https://cloud.google.com/storage/). Example: `gcs://<bucket>/<path/to/backup>`
* [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/docs/mimic/radosgw/s3/) or [Swift](https://www.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
* Local filesystem. Example: `fs://</absolute/path/to/backup>`
`vmbackup` supports incremental and full backups. Incremental backups created automatically if the destination path already contains data from the previous backup.
Full backups can be sped up with `-origin` pointing to already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
`vmbackup` supports incremental and full backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
Full backups can be sped up with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
data between the existing backup and new backup. It saves time and costs on data transfer.
Backup process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmbackup` with the same args.
@@ -19,7 +19,7 @@ Backed up data can be restored with [vmrestore](https://docs.victoriametrics.com
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
See also [vmbackupmanager](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) tool built on top of `vmbackup`. This tool simplifies
See also [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) tool built on top of `vmbackup`. This tool simplifies
creation of hourly, daily, weekly and monthly backups.
@@ -30,11 +30,11 @@ creation of hourly, daily, weekly and monthly backups.
Regular backup can be performed with the following command:
```
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/new/backup>
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gs://<bucket>/<path/to/new/backup>
```
* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
There is no need to stop VictoriaMetrics for creating backups, since they are performed from immutable [instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
There is no need to stop VictoriaMetrics for creating backups since they are performed from immutable [instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
* `<local-snapshot>` is the snapshot to back up. See [how to create instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots). `vmbackup` can create the snapshot on itself if `-snapshot.createURL` command-line flag is set to an url for creating snapshots. In this case `-snapshotName` flag isn't needed.
* `<bucket>` is an already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
* `<path/to/new/backup>` is the destination path where new backup will be placed.
@@ -46,7 +46,7 @@ If the destination GCS bucket already contains the previous backup at `-origin`
with the following command:
```
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/new/backup> -origin=gcs://<bucket>/<path/to/existing/backup>
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gs://<bucket>/<path/to/new/backup> -origin=gs://<bucket>/<path/to/existing/backup>
```
It saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
@@ -54,11 +54,11 @@ It saves time and network bandwidth costs by performing server-side copy for the
### Incremental backups
Incremental backups performed if `-dst` points to an already existing backup. In this case only new data uploaded to remote storage.
Incremental backups are performed if `-dst` points to an already existing backup. In this case only new data is uploaded to remote storage.
It saves time and network bandwidth costs when working with big backups:
```
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/existing/backup>
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gs://<bucket>/<path/to/existing/backup>
```
@@ -69,16 +69,16 @@ Smart backups mean storing full daily backups into `YYYYMMDD` folders and creati
* Run the following command every hour:
```
vmbackup -snapshotName=<latest-snapshot> -dst=gcs://<bucket>/latest
vmbackup -snapshotName=<latest-snapshot> -dst=gs://<bucket>/latest
```
Where `<latest-snapshot>` is the latest [snapshot](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
The command will upload only changed data to `gcs://<bucket>/latest`.
The command will upload only changed data to `gs://<bucket>/latest`.
* Run the following command once a day:
```
vmbackup -snapshotName=<daily-snapshot> -dst=gcs://<bucket>/<YYYYMMDD> -origin=gcs://<bucket>/latest
vmbackup -snapshotName=<daily-snapshot> -dst=gs://<bucket>/<YYYYMMDD> -origin=gs://<bucket>/latest
```
Where `<daily-snapshot>` is the snapshot for the last day `<YYYYMMDD>`.
@@ -87,9 +87,9 @@ Where `<daily-snapshot>` is the snapshot for the last day `<YYYYMMDD>`.
This apporach saves network bandwidth costs on hourly backups (since they are incremental) and allows recovering data from either the last hour (`latest` backup)
or from any day (`YYYYMMDD` backups). Note that hourly backup shouldn't run when creating daily backup.
Do not forget removing old snapshots and backups when they are no longer needed for saving storage costs.
Do not forget to remove old snapshots and backups when they are no longer needed in order to save storage costs.
See also [vmbackupmanager tool](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for automating smart backups.
See also [vmbackupmanager tool](https://docs.victoriametrics.com/vmbackupmanager.html) for automating smart backups.
## How does it work?
@@ -97,19 +97,19 @@ See also [vmbackupmanager tool](https://github.com/VictoriaMetrics/VictoriaMetri
The backup algorithm is the following:
1. Collect information about files in the `-snapshotName`, in the `-dst` and in the `-origin`.
2. Determine files in `-dst`, which are missing in `-snapshotName`, and delete them. These are usually small files, which are already merged into bigger files in the snapshot.
3. Determine files from `-snapshotName`, which are missing in `-dst`. These are usually small new files and bigger merged files.
4. Determine files from step 3, which exist in the `-origin`, and perform server-side copy of these files from `-origin` to `-dst`.
2. Determine which files in `-dst` are missing in `-snapshotName`, and delete them. These are usually small files, which are already merged into bigger files in the snapshot.
3. Determine which files in `-snapshotName` are missing in `-dst`. These are usually small new files and bigger merged files.
4. Determine which files from step 3 exist in the `-origin`, and perform server-side copy of these files from `-origin` to `-dst`.
These are usually the biggest and the oldest files, which are shared between backups.
5. Upload the remaining files from step 3 from `-snapshotName` to `-dst`.
The algorithm splits source files into 100 MB chunks in the backup. Each chunk stored as a separate file in the backup.
The algorithm splits source files into 1 GiB chunks in the backup. Each chunk is stored as a separate file in the backup.
Such splitting minimizes the amounts of data to re-transfer after temporary errors.
`vmbackup` relies on [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) properties:
- All the files in the snapshot are immutable.
- Old files periodically merged into new files.
- Old files are periodically merged into new files.
- Smaller files have higher probability to be merged.
- Consecutive snapshots share many identical files.
@@ -183,7 +183,7 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-dst string
Where to put the backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
-envflag.enable
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details

View File

@@ -25,7 +25,7 @@ var (
snapshotDeleteURL = flag.String("snapshot.deleteURL", "", "VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. "+
"All created snapshots will be automatically deleted. Example: http://victoriametrics:8428/snapshot/delete")
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir\n"+
"Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir\n"+
"-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded")
origin = flag.String("origin", "", "Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups")
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce backup duration")

View File

@@ -1,6 +1,6 @@
## vmbackupmanager
***vmbackupmanager is a part of [enterprise package](https://victoriametrics.com/enterprise.html)***
***vmbackupmanager is a part of [enterprise package](https://victoriametrics.com/enterprise.html). It is available for download and evaluation at [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)***
The VictoriaMetrics backup manager automates regular backup procedures. It supports the following backup intervals: **hourly**, **daily**, **weekly** and **monthly**. Multiple backup intervals may be configured simultaneously. I.e. the backup manager creates hourly backups every hour, while it creates daily backups every day, etc. Backup manager must have read access to the storage data, so best practice is to install it on the same machine (or as a sidecar) where the storage node is installed.
The backup service makes a backup every hour and puts it to the latest folder and then copies data to the folders which represent the backup intervals (hourly, daily, weekly and monthly)
@@ -76,7 +76,7 @@ Backup manager launched with the following configuration:
```console
export NODE_IP=192.168.0.10
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
./vmbackupmanager -dst=gcs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create -eula
./vmbackupmanager -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create -eula
```
Expected logs in vmbackupmanager:
@@ -98,11 +98,11 @@ The result on the GCS bucket
- The root folder
![root](root.png)
![root](vmbackupmanager_root_folder.png)
- The latest folder
![latest](latest.png)
![latest](vmbackupmanager_latest_folder.png)
## Backup Retention Policy
@@ -117,7 +117,7 @@ Backup retention policy is controlled by:
Lets assume we have a backup manager collecting daily backups for the past 10 days.
![daily](rp_daily_1.png)
![daily](vmbackupmanager_rp_daily_1.png)
We enable backup retention policy for backup manager by using following configuration:
@@ -125,7 +125,7 @@ We enable backup retention policy for backup manager by using following configur
```console
export NODE_IP=192.168.0.10
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
./vmbackupmanager -dst=gcs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create
./vmbackupmanager -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create
-keepLastDaily=3 -eula
```
@@ -143,4 +143,4 @@ info app/vmbackupmanager/retention.go:106 daily backups to delete [daily/2
The result on the GCS bucket. We see only 3 daily backups:
![daily](rp_daily_2.png)
![daily](vmbackupmanager_rp_daily_2.png)

View File

Before

Width:  |  Height:  |  Size: 29 KiB

After

Width:  |  Height:  |  Size: 29 KiB

View File

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

Before

Width:  |  Height:  |  Size: 99 KiB

After

Width:  |  Height:  |  Size: 99 KiB

View File

Before

Width:  |  Height:  |  Size: 64 KiB

After

Width:  |  Height:  |  Size: 64 KiB

View File

@@ -10,56 +10,63 @@ Features:
- [x] OpenTSDB: migrate data from OpenTSDB to VictoriaMetrics
- [ ] Storage Management: data re-balancing between nodes
vmctl acts as a proxy between data source ([Prometheus](#migrating-data-from-prometheus),
[InfluxDB](#migrating-data-from-influxdb-1x), [VictoriaMetrics](##migrating-data-from-victoriametrics), etc.)
and destination - VictoriaMetrics single or cluster version. To see the full list of supported modes
run the following command:
```
./vmctl --help
NAME:
vmctl - VictoriaMetrics command-line tool
USAGE:
vmctl [global options] command [command options] [arguments...]
COMMANDS:
opentsdb Migrate timeseries from OpenTSDB
influx Migrate timeseries from InfluxDB
prometheus Migrate timeseries from Prometheus
vm-native Migrate time series between VictoriaMetrics installations via native binary format
```
Each mode has its own unique set of flags specific (e.g. prefixed with `influx` for influx mode)
to the data source and common list of flags for destination (prefixed with `vm` for VictoriaMetrics):
```
./vmctl influx --help
OPTIONS:
--influx-addr value InfluxDB server addr (default: "http://localhost:8086")
--influx-user value InfluxDB user [$INFLUX_USERNAME]
...
--vm-addr vmctl VictoriaMetrics address to perform import requests.
Should be the same as --httpListenAddr value for single-node version or vminsert component.
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
Please note, that vmctl performs initial readiness check for the given address by checking `/health` endpoint. (default: "http://localhost:8428")
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
```
When doing a migration user needs to specify flags for source (where and how to fetch data) and for
destination (where to migrate data). Every mode has additional details and nuances, please see
them below in corresponding sections.
For the destination flags see the full description by running the following command:
```
./vmctl influx --help | grep vm-
```
Some flags like [--vm-extra-label](#adding-extra-labels) or [--vm-significant-figures](#significant-figures)
has additional sections with description below. Details about tweaking and adjusting settings
are explained in [Tuning](#tuning) section.
Please note, that if you're going to import data into VictoriaMetrics cluster do not
forget to specify the `--vm-account-id` flag. See more details for cluster version
[here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
## Articles
* [How to migrate data from Prometheus](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-d44a6728f043)
* [How to migrate data from Prometheus. Filtering and modifying time series](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-filtering-and-modifying-time-series-6d40cea4bf21)
## How to build
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmctl` is located in `vmutils-*` archives there.
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.16.
2. Run `make vmctl` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl` binary and puts it into the `bin` folder.
### Production build
1. [Install docker](https://docs.docker.com/install/).
2. Run `make vmctl-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl-prod` binary and puts it into the `bin` folder.
### Building docker images
Run `make package-vmctl`. It builds `victoriametrics/vmctl:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmctl`.
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
```bash
ROOT_IMAGE=scratch make package-vmctl
```
### ARM build
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
#### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.16.
2. Run `make vmctl-arm` or `make vmctl-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl-arm` or `vmctl-arm64` binary respectively and puts it into the `bin` folder.
#### Production ARM build
1. [Install docker](https://docs.docker.com/install/).
2. Run `make vmctl-arm-prod` or `make vmctl-arm64-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl-arm-prod` or `vmctl-arm64-prod` binary respectively and puts it into the `bin` folder.
## Migrating data from OpenTSDB
@@ -209,16 +216,16 @@ Found 40000 timeseries to import. Continue? [Y/n] y
### Data mapping
Vmctl maps Influx data the same way as VictoriaMetrics does by using the following rules:
Vmctl maps InfluxDB data the same way as VictoriaMetrics does by using the following rules:
* `influx-database` arg is mapped into `db` label value unless `db` tag exists in the Influx line.
* `influx-database` arg is mapped into `db` label value unless `db` tag exists in the InfluxDB line.
* Field names are mapped to time series names prefixed with {measurement}{separator} value,
where {separator} equals to _ by default.
It can be changed with `--influx-measurement-field-separator` command-line flag.
* Field values are mapped to time series values.
* Tags are mapped to Prometheus labels format as-is.
For example, the following Influx line:
For example, the following InfluxDB line:
```
foo,tag1=value1,tag2=value2 field1=12,field2=40
```
@@ -287,7 +294,7 @@ if flags `--prom-filter-time-start` or `--prom-filter-time-end` were set. The ex
Please note that stats are not taking into account timeseries or samples filtering. This will be done during importing process.
The importing process takes the snapshot blocks revealed from Explore procedure and processes them one by one
accumulating timeseries and samples. Please note, that `vmctl` relies on responses from Influx on this stage,
accumulating timeseries and samples. Please note, that `vmctl` relies on responses from InfluxDB on this stage,
so ensure that Explore queries are executed without errors or limits. Please see this
[issue](https://github.com/VictoriaMetrics/vmctl/issues/30) for details.
The data processed in chunks and then sent to VM.
@@ -472,11 +479,12 @@ To avoid such situation try to filter out VM process metrics via `--vm-native-fi
[Backfilling tips](https://github.com/VictoriaMetrics/VictoriaMetrics#backfilling) section.
3. `vmctl` doesn't provide relabeling or other types of labels management in this mode.
Instead, use [relabeling in VictoriaMetrics](https://github.com/VictoriaMetrics/vmctl/issues/4#issuecomment-683424375).
4. When importing in or from cluster version remember to use correct [URL format](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format)
and specify `accountID` param.
## Tuning
### Influx mode
### InfluxDB mode
The flag `--influx-concurrency` controls how many concurrent requests may be sent to InfluxDB while fetching
timeseries. Please set it wisely to avoid InfluxDB overwhelming.
@@ -551,4 +559,49 @@ results such as `average`, `rate`, etc.
`vmctl` allows to add extra labels to all imported series. It can be achived with flag `--vm-extra-label label=value`.
If multiple labels needs to be added, set flag for each label, for example, `--vm-extra-label label1=value1 --vm-extra-label label2=value2`.
If timeseries already have label, that must be added with `--vm-extra-label` flag, flag has priority and will override label value from timeseries.
## How to build
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmctl` is located in `vmutils-*` archives there.
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.16.
2. Run `make vmctl` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl` binary and puts it into the `bin` folder.
### Production build
1. [Install docker](https://docs.docker.com/install/).
2. Run `make vmctl-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl-prod` binary and puts it into the `bin` folder.
### Building docker images
Run `make package-vmctl`. It builds `victoriametrics/vmctl:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmctl`.
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
```bash
ROOT_IMAGE=scratch make package-vmctl
```
### ARM build
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
#### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.16.
2. Run `make vmctl-arm` or `make vmctl-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl-arm` or `vmctl-arm64` binary respectively and puts it into the `bin` folder.
#### Production ARM build
1. [Install docker](https://docs.docker.com/install/).
2. Run `make vmctl-arm-prod` or `make vmctl-arm64-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl-arm-prod` or `vmctl-arm64-prod` binary respectively and puts it into the `bin` folder.

View File

@@ -40,6 +40,7 @@ var (
Value: "http://localhost:8428",
Usage: "VictoriaMetrics address to perform import requests. \n" +
"Should be the same as --httpListenAddr value for single-node version or vminsert component. \n" +
"When importing into the clustered version do not forget to set additionally --vm-account-id flag. \n" +
"Please note, that `vmctl` performs initial readiness check for the given address by checking `/health` endpoint.",
},
&cli.StringFlag{
@@ -55,6 +56,7 @@ var (
&cli.StringFlag{
Name: vmAccountID,
Usage: "AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant). \n" +
"AccountID is required when importing into the clustered version of VictoriaMetrics. \n" +
"It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer. \n" +
"If projectID isn't set, then it equals to 0",
},
@@ -187,26 +189,26 @@ var (
&cli.StringFlag{
Name: influxAddr,
Value: "http://localhost:8086",
Usage: "Influx server addr",
Usage: "InfluxDB server addr",
},
&cli.StringFlag{
Name: influxUser,
Usage: "Influx user",
Usage: "InfluxDB user",
EnvVars: []string{"INFLUX_USERNAME"},
},
&cli.StringFlag{
Name: influxPassword,
Usage: "Influx user password",
Usage: "InfluxDB user password",
EnvVars: []string{"INFLUX_PASSWORD"},
},
&cli.StringFlag{
Name: influxDB,
Usage: "Influx database",
Usage: "InfluxDB database",
Required: true,
},
&cli.StringFlag{
Name: influxRetention,
Usage: "Influx retention policy",
Usage: "InfluxDB retention policy",
Value: "autogen",
},
&cli.IntFlag{
@@ -221,7 +223,7 @@ var (
},
&cli.StringFlag{
Name: influxFilterSeries,
Usage: "Influx filter expression to select series. E.g. \"from cpu where arch='x86' AND hostname='host_2753'\".\n" +
Usage: "InfluxDB filter expression to select series. E.g. \"from cpu where arch='x86' AND hostname='host_2753'\".\n" +
"See for details https://docs.influxdata.com/influxdb/v1.7/query_language/schema_exploration#show-series",
},
&cli.StringFlag{

View File

@@ -58,6 +58,11 @@ func toFloat64(v interface{}) (float64, error) {
return float64(i), nil
case string:
return strconv.ParseFloat(i, 64)
case bool:
if i {
return 1, nil
}
return 0, nil
default:
return 0, fmt.Errorf("unexpected value type %v", i)
}

View File

@@ -1,6 +1,7 @@
package influx
import (
"encoding/json"
"reflect"
"testing"
)
@@ -58,3 +59,28 @@ func TestSeries_Unmarshal(t *testing.T) {
}
}
}
func TestToFloat64(t *testing.T) {
f := func(in interface{}, want float64) {
t.Helper()
got, err := toFloat64(in)
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
if got != want {
t.Errorf("got %v; want %v", got, want)
}
}
f("123.4", 123.4)
f(float64(123.4), 123.4)
f(float32(12), 12)
f(123, 123)
f(true, 1)
f(false, 0)
f(json.Number("123456.789"), 123456.789)
_, err := toFloat64("text")
if err == nil {
t.Fatalf("expected to get err; got nil instead")
}
}

View File

@@ -1,6 +1,6 @@
# vmgateway
***vmgateway is a part of [enterprise package](https://victoriametrics.com/enterprise.html)***
***vmgateway is a part of [enterprise package](https://victoriametrics.com/enterprise.html). It is available for download and evaluation at [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)***
<img alt="vmgateway" src="vmgateway-overview.jpeg">

View File

@@ -0,0 +1,97 @@
package datadog
import (
"fmt"
"net/http"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="datadog"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="datadog"}`)
)
// InsertHandlerForHTTP processes remote write for DataDog POST /api/v1/series request.
//
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
func InsertHandlerForHTTP(req *http.Request) error {
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
ce := req.Header.Get("Content-Encoding")
err := parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
return insertRows(series, extraLabels)
})
if err != nil {
return fmt.Errorf("headers: %q; err: %w", req.Header, err)
}
return nil
})
}
func insertRows(series []parser.Series, extraLabels []prompbmarshal.Label) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
rowsLen := 0
for i := range series {
rowsLen += len(series[i].Points)
}
ctx.Reset(rowsLen)
rowsTotal := 0
hasRelabeling := relabel.HasRelabeling()
for i := range series {
ss := &series[i]
rowsTotal += len(ss.Points)
ctx.Labels = ctx.Labels[:0]
ctx.AddLabel("", ss.Metric)
ctx.AddLabel("host", ss.Host)
for _, tag := range ss.Tags {
n := strings.IndexByte(tag, ':')
if n < 0 {
return fmt.Errorf("cannot find ':' in tag %q", tag)
}
name := tag[:n]
value := tag[n+1:]
if name == "host" {
name = "exported_host"
}
ctx.AddLabel(name, value)
}
for j := range extraLabels {
label := &extraLabels[j]
ctx.AddLabel(label.Name, label.Value)
}
if hasRelabeling {
ctx.ApplyRelabeling()
}
if len(ctx.Labels) == 0 {
// Skip metric without labels.
continue
}
ctx.SortLabelsIfNeeded()
var metricNameRaw []byte
var err error
for _, pt := range ss.Points {
timestamp := pt.Timestamp()
value := pt.Value()
metricNameRaw, err = ctx.WriteDataPointExt(metricNameRaw, ctx.Labels, timestamp, value)
if err != nil {
return err
}
}
}
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
return ctx.FlushBufs()
}

View File

@@ -20,8 +20,8 @@ import (
)
var (
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field")
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field")
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
)

View File

@@ -9,6 +9,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/csvimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/native"
@@ -35,12 +36,13 @@ import (
var (
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for InfluxDB line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
"This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
"Usually :4242 must be set. Doesn't work if empty")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 30, "The maximum number of labels accepted per time series. Superfluous labels are dropped")
)
@@ -155,6 +157,36 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
influxQueryRequests.Inc()
influxutils.WriteDatabaseNames(w)
return true
case "/datadog/api/v1/series":
datadogWriteRequests.Inc()
if err := datadog.InsertHandlerForHTTP(r); err != nil {
datadogWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
// See https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/datadog/api/v1/validate":
datadogValidateRequests.Inc()
// See https://docs.datadoghq.com/api/latest/authentication/#validate-api-key
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, `{"valid":true}`)
return true
case "/datadog/api/v1/check_run":
datadogCheckRunRequests.Inc()
// See https://docs.datadoghq.com/api/latest/service-checks/#submit-a-service-check
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/datadog/intake/":
datadogIntakeRequests.Inc()
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, `{}`)
return true
case "/prometheus/targets", "/targets":
promscrapeTargetsRequests.Inc()
promscrape.WriteHumanReadableTargetsStatus(w, r)
@@ -165,6 +197,19 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
state := r.FormValue("state")
promscrape.WriteAPIV1Targets(w, state)
return true
case "/prometheus/config", "/config":
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
return true
}
promscrapeConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
promscrape.WriteConfigData(w)
return true
case "/prometheus/-/reload", "/-/reload":
promscrapeConfigReloadRequests.Inc()
procutil.SelfSIGHUP()
@@ -204,14 +249,23 @@ var (
nativeimportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/import/native", protocol="nativeimport"}`)
nativeimportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/import/native", protocol="nativeimport"}`)
influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/write", protocol="influx"}`)
influxWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/write", protocol="influx"}`)
influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/influx/write", protocol="influx"}`)
influxWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/influx/write", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/query", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/influx/query", protocol="influx"}`)
datadogWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/datadog/api/v1/series", protocol="datadog"}`)
datadogValidateRequests = metrics.NewCounter(`vm_http_requests_total{path="/datadog/api/v1/validate", protocol="datadog"}`)
datadogCheckRunRequests = metrics.NewCounter(`vm_http_requests_total{path="/datadog/api/v1/check_run", protocol="datadog"}`)
datadogIntakeRequests = metrics.NewCounter(`vm_http_requests_total{path="/datadog/intake/", protocol="datadog"}`)
promscrapeTargetsRequests = metrics.NewCounter(`vm_http_requests_total{path="/targets"}`)
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/targets"}`)
promscrapeConfigRequests = metrics.NewCounter(`vm_http_requests_total{path="/config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vm_http_requests_total{path="/-/reload"}`)
_ = metrics.NewGauge(`vm_metrics_with_dropped_labels_total`, func() float64 {

View File

@@ -25,7 +25,7 @@ func InsertHandler(req *http.Request) error {
return err
}
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(tss []prompb.TimeSeries) error {
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
return insertRows(tss, extraLabels)
})
})

View File

@@ -16,7 +16,7 @@ import (
var (
relabelConfig = flag.String("relabelConfig", "", "Optional path to a file with relabeling rules, which are applied to all the ingested metrics. "+
"See https://docs.victoriametrics.com/#relabeling for details")
"See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal")
relabelDebug = flag.Bool("relabelDebug", false, "Whether to log metrics before and after relabeling with -relabelConfig. If the -relabelDebug is enabled, "+
"then the metrics aren't sent to storage. This is useful for debugging the relabeling configs")
)

View File

@@ -30,7 +30,8 @@ func InsertHandler(req *http.Request) error {
return err
}
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(rows []parser.Row) error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
return insertRows(rows, extraLabels)
})
})

View File

@@ -12,7 +12,7 @@ when restarting `vmrestore` with the same args.
VictoriaMetrics must be stopped during the restore process.
```
vmrestore -src=gcs://<bucket>/<path/to/backup> -storageDataPath=<local/path/to/restore>
vmrestore -src=gs://<bucket>/<path/to/backup> -storageDataPath=<local/path/to/restore>
```
@@ -116,7 +116,7 @@ i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/q
-skipBackupCompleteCheck
Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file
-src string
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
Source path with backup on the remote storage. Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
-storageDataPath string
Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir is synchronized with -src contents, i.e. it works like 'rsync --delete' (default "victoria-metrics-data")
-version

View File

@@ -16,7 +16,7 @@ import (
var (
src = flag.String("src", "", "Source path with backup on the remote storage. "+
"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir")
"Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir")
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Destination path where backup must be restored. "+
"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir "+
"is synchronized with -src contents, i.e. it works like 'rsync --delete'")

View File

@@ -85,12 +85,6 @@ var vmuiFileServer = http.FileServer(http.FS(vmuiFiles))
// RequestHandler handles remote read API requests
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
// vmui access.
if strings.HasPrefix(r.URL.Path, "/vmui") {
vmuiFileServer.ServeHTTP(w, r)
return true
}
startTime := time.Now()
defer requestDuration.UpdateDuration(startTime)
@@ -153,11 +147,32 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
//
// See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
switch {
case strings.HasPrefix(path, "/prometheus"):
case strings.HasPrefix(path, "/prometheus/"):
path = path[len("/prometheus"):]
case strings.HasPrefix(path, "/graphite"):
case strings.HasPrefix(path, "/graphite/"):
path = path[len("/graphite"):]
}
// vmui access.
if strings.HasPrefix(path, "/vmui") {
r.URL.Path = path
vmuiFileServer.ServeHTTP(w, r)
return true
}
if path == "/graph" {
// Redirect to /graph/, otherwise vmui redirects to /vmui/, which can be inaccessible in user env.
// Use relative redirect, since, since the hostname and path prefix may be incorrect if VictoriaMetrics
// is hidden behind vmauth or similar proxy.
_ = r.ParseForm()
newURL := "graph/?" + r.Form.Encode()
http.Redirect(w, r, newURL, http.StatusFound)
return true
}
if strings.HasPrefix(path, "/graph/") {
// This is needed for serving /graph URLs from Prometheus datasource in Grafana.
r.URL.Path = strings.Replace(path, "/graph/", "/vmui/", 1)
vmuiFileServer.ServeHTTP(w, r)
return true
}
if strings.HasPrefix(path, "/api/v1/label/") {
s := path[len("/api/v1/label/"):]
@@ -183,7 +198,12 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
return true
}
if strings.HasPrefix(path, "/functions") {
graphiteFunctionsRequests.Inc()
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, "%s", `{}`)
return true
}
switch path {
case "/api/v1/query":
queryRequests.Inc()
@@ -529,6 +549,8 @@ var (
graphiteTagsDelSeriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/tags/delSeries"}`)
graphiteTagsDelSeriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/tags/delSeries"}`)
graphiteFunctionsRequests = metrics.NewCounter(`vm_http_request_total{path="/functions"}`)
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
metadataRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/metadata"}`)

View File

@@ -41,17 +41,12 @@ type Result struct {
// Values are sorted by Timestamps.
Values []float64
Timestamps []int64
// Marshaled MetricName. Used only for results sorting
// in app/vmselect/promql
MetricNameMarshaled []byte
}
func (r *Result) reset() {
r.MetricName.Reset()
r.Values = r.Values[:0]
r.Timestamps = r.Timestamps[:0]
r.MetricNameMarshaled = r.MetricNameMarshaled[:0]
}
// Results holds results returned from ProcessSearchQuery.
@@ -1140,7 +1135,6 @@ func setupTfss(tr storage.TimeRange, tagFilterss [][]storage.TagFilter, deadline
}
}
tfss = append(tfss, tfs)
tfss = append(tfss, tfs.Finalize()...)
}
return tfss, nil
}

View File

@@ -49,6 +49,8 @@ const defaultStep = 5 * 60 * 1000
// FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer federateDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %w", err)
@@ -96,12 +98,11 @@ func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request
return err
})
if err != nil {
return fmt.Errorf("error during data fetching: %w", err)
return fmt.Errorf("error during sending data to remote client: %w", err)
}
if err := bw.Flush(); err != nil {
return err
}
federateDuration.UpdateDuration(startTime)
return nil
}
@@ -109,6 +110,8 @@ var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/fe
// ExportCSVHandler exports data in CSV format from /api/v1/export/csv
func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer exportCSVDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %w", err)
@@ -172,9 +175,8 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
}
err = <-doneCh
if err != nil {
return fmt.Errorf("error during exporting data to csv: %w", err)
return fmt.Errorf("error during sending the exported csv data to remote client: %w", err)
}
exportCSVDuration.UpdateDuration(startTime)
return nil
}
@@ -182,6 +184,8 @@ var exportCSVDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/a
// ExportNativeHandler exports data in native format from /api/v1/export/native.
func ExportNativeHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer exportNativeDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %w", err)
@@ -240,12 +244,11 @@ func ExportNativeHandler(startTime time.Time, w http.ResponseWriter, r *http.Req
return err
})
if err != nil {
return err
return fmt.Errorf("error during sending native data to remote client: %w", err)
}
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("error during flushing native data to remote client: %w", err)
}
exportNativeDuration.UpdateDuration(startTime)
return nil
}
@@ -255,6 +258,8 @@ var bbPool bytesutil.ByteBufferPool
// ExportHandler exports data in raw format from /api/v1/export.
func ExportHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer exportDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %w", err)
@@ -285,7 +290,6 @@ func ExportHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
if err := exportHandler(w, matches, etf, start, end, format, maxRowsPerLine, reduceMemUsage, deadline); err != nil {
return fmt.Errorf("error when exporting data for queries=%q on the time range (start=%d, end=%d): %w", matches, start, end, err)
}
exportDuration.UpdateDuration(startTime)
return nil
}
@@ -410,7 +414,7 @@ func exportHandler(w http.ResponseWriter, matches []string, etf []storage.TagFil
}
err = <-doneCh
if err != nil {
return fmt.Errorf("error during data fetching: %w", err)
return fmt.Errorf("error during sending the data to remote client: %w", err)
}
return nil
}
@@ -437,6 +441,8 @@ var exportBlockPool = &sync.Pool{
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
func DeleteHandler(startTime time.Time, r *http.Request) error {
defer deleteDuration.UpdateDuration(startTime)
deadline := searchutils.GetDeadlineForQuery(r, startTime)
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %w", err)
@@ -457,7 +463,6 @@ func DeleteHandler(startTime time.Time, r *http.Request) error {
if deletedCount > 0 {
promql.ResetRollupResultCache()
}
deleteDuration.UpdateDuration(startTime)
return nil
}
@@ -467,6 +472,8 @@ var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
func LabelValuesHandler(startTime time.Time, labelName string, w http.ResponseWriter, r *http.Request) error {
defer labelValuesDuration.UpdateDuration(startTime)
deadline := searchutils.GetDeadlineForQuery(r, startTime)
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse form values: %w", err)
@@ -531,9 +538,8 @@ func LabelValuesHandler(startTime time.Time, labelName string, w http.ResponseWr
defer bufferedwriter.Put(bw)
WriteLabelValuesResponse(bw, labelValues)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("canot flush label values to remote client: %w", err)
}
labelValuesDuration.UpdateDuration(startTime)
return nil
}
@@ -594,7 +600,7 @@ func labelValuesWithMatches(labelName string, matches []string, etf []storage.Ta
return nil
})
if err != nil {
return nil, fmt.Errorf("error when data fetching: %w", err)
return nil, fmt.Errorf("cannot fetch label values from storage: %w", err)
}
}
labelValues := make([]string, 0, len(m))
@@ -609,6 +615,8 @@ var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
// LabelsCountHandler processes /api/v1/labels/count request.
func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer labelsCountDuration.UpdateDuration(startTime)
deadline := searchutils.GetDeadlineForStatusRequest(r, startTime)
labelEntries, err := netstorage.GetLabelEntries(deadline)
if err != nil {
@@ -619,9 +627,8 @@ func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Requ
defer bufferedwriter.Put(bw)
WriteLabelsCountResponse(bw, labelEntries)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot send labels count response to remote client: %w", err)
}
labelsCountDuration.UpdateDuration(startTime)
return nil
}
@@ -635,6 +642,8 @@ const secsPerDay = 3600 * 24
//
// It can accept `match[]` filters in order to narrow down the search.
func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer tsdbStatusDuration.UpdateDuration(startTime)
deadline := searchutils.GetDeadlineForStatusRequest(r, startTime)
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse form values: %w", err)
@@ -686,9 +695,8 @@ func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
defer bufferedwriter.Put(bw)
WriteTSDBStatusResponse(bw, status)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot send tsdb status response to remote client: %w", err)
}
tsdbStatusDuration.UpdateDuration(startTime)
return nil
}
@@ -717,6 +725,8 @@ var tsdbStatusDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
func LabelsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer labelsDuration.UpdateDuration(startTime)
deadline := searchutils.GetDeadlineForQuery(r, startTime)
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse form values: %w", err)
@@ -779,9 +789,8 @@ func LabelsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
defer bufferedwriter.Put(bw)
WriteLabelsResponse(bw, labels)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot send labels response to remote client: %w", err)
}
labelsDuration.UpdateDuration(startTime)
return nil
}
@@ -829,7 +838,7 @@ func labelsWithMatches(matches []string, etf []storage.TagFilter, start, end int
return nil
})
if err != nil {
return nil, fmt.Errorf("error when data fetching: %w", err)
return nil, fmt.Errorf("cannot fetch labels from storage: %w", err)
}
}
labels := make([]string, 0, len(m))
@@ -844,6 +853,8 @@ var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
// SeriesCountHandler processes /api/v1/series/count request.
func SeriesCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer seriesCountDuration.UpdateDuration(startTime)
deadline := searchutils.GetDeadlineForStatusRequest(r, startTime)
n, err := netstorage.GetSeriesCount(deadline)
if err != nil {
@@ -854,9 +865,8 @@ func SeriesCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Requ
defer bufferedwriter.Put(bw)
WriteSeriesCountResponse(bw, n)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot send series count response to remote client: %w", err)
}
seriesCountDuration.UpdateDuration(startTime)
return nil
}
@@ -866,6 +876,8 @@ var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers
func SeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer seriesDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse form values: %w", err)
@@ -945,13 +957,12 @@ func SeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
// WriteSeriesResponse must consume all the data from resultsCh.
WriteSeriesResponse(bw, resultsCh)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot flush series response to remote client: %w", err)
}
err = <-doneCh
if err != nil {
return fmt.Errorf("error during data fetching: %w", err)
return fmt.Errorf("cannot send series response to remote client: %w", err)
}
seriesDuration.UpdateDuration(startTime)
return nil
}
@@ -961,6 +972,8 @@ var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer queryDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
query := r.FormValue("query")
if len(query) == 0 {
@@ -1062,9 +1075,8 @@ func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) e
defer bufferedwriter.Put(bw)
WriteQueryResponse(bw, result)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot flush query response to remote client: %w", err)
}
queryDuration.UpdateDuration(startTime)
return nil
}
@@ -1074,6 +1086,8 @@ var queryDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
func QueryRangeHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer queryRangeDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
query := r.FormValue("query")
if len(query) == 0 {
@@ -1098,7 +1112,6 @@ func QueryRangeHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
if err := queryRangeHandler(startTime, w, query, start, end, step, r, ct, etf); err != nil {
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %w", query, start, end, step, err)
}
queryRangeDuration.UpdateDuration(startTime)
return nil
}
@@ -1155,7 +1168,7 @@ func queryRangeHandler(startTime time.Time, w http.ResponseWriter, query string,
defer bufferedwriter.Put(bw)
WriteQueryRangeResponse(bw, result)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot send query range response to remote client: %w", err)
}
return nil
}
@@ -1311,6 +1324,8 @@ func getLatencyOffsetMilliseconds() int64 {
// QueryStatsHandler returns query stats at `/api/v1/status/top_queries`
func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer queryStatsDuration.UpdateDuration(startTime)
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse form values: %w", err)
}
@@ -1333,9 +1348,8 @@ func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
defer bufferedwriter.Put(bw)
querystats.WriteJSONQueryStats(bw, topN, maxLifetime)
if err := bw.Flush(); err != nil {
return err
return fmt.Errorf("cannot send query stats response to client: %w", err)
}
queryStatsDuration.UpdateDuration(startTime)
return nil
}

View File

@@ -11,7 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/metricsql"
"github.com/valyala/histogram"
xxhash "github.com/cespare/xxhash/v2"
)
var aggrFuncs = map[string]aggrFunc{
@@ -32,6 +32,7 @@ var aggrFuncs = map[string]aggrFunc{
// PromQL extension funcs
"median": aggrFuncMedian,
"limitk": aggrFuncLimitK,
"limit_offset": aggrFuncLimitOffset,
"distinct": newAggrFunc(aggrFuncDistinct),
"sum2": newAggrFunc(aggrFuncSum2),
"geomean": newAggrFunc(aggrFuncGeomean),
@@ -40,14 +41,19 @@ var aggrFuncs = map[string]aggrFunc{
"topk_max": newAggrFuncRangeTopK(maxValue, false),
"topk_avg": newAggrFuncRangeTopK(avgValue, false),
"topk_median": newAggrFuncRangeTopK(medianValue, false),
"topk_last": newAggrFuncRangeTopK(lastValue, false),
"bottomk_min": newAggrFuncRangeTopK(minValue, true),
"bottomk_max": newAggrFuncRangeTopK(maxValue, true),
"bottomk_avg": newAggrFuncRangeTopK(avgValue, true),
"bottomk_median": newAggrFuncRangeTopK(medianValue, true),
"bottomk_last": newAggrFuncRangeTopK(lastValue, true),
"any": aggrFuncAny,
"mad": newAggrFunc(aggrFuncMAD),
"outliers_mad": aggrFuncOutliersMAD,
"outliersk": aggrFuncOutliersK,
"mode": newAggrFunc(aggrFuncMode),
"zscore": aggrFuncZScore,
"quantiles": aggrFuncQuantiles,
}
type aggrFunc func(afa *aggrFuncArg) ([]*timeseries, error)
@@ -484,7 +490,6 @@ func aggrFuncZScore(afa *aggrFuncArg) ([]*timeseries, error) {
ts.Values[i] = (v - avg) / stddev
}
}
// Remove MetricGroup from all the tss.
for _, ts := range tss {
ts.MetricName.ResetMetricGroup()
@@ -575,7 +580,7 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
var dst timeseries
dst.CopyFromShallowTimestamps(tss[0])
dst.MetricName.RemoveTag(dstLabel)
dst.MetricName.AddTag(dstLabel, strconv.FormatFloat(v, 'g', -1, 64))
dst.MetricName.AddTag(dstLabel, strconv.FormatFloat(v, 'f', -1, 64))
for i := range dst.Values {
count := 0
for _, ts := range tss {
@@ -799,15 +804,127 @@ func avgValue(values []float64) float64 {
}
func medianValue(values []float64) float64 {
h := histogram.GetFast()
for _, v := range values {
if !math.IsNaN(v) {
h.Update(v)
}
return quantile(0.5, values)
}
func lastValue(values []float64) float64 {
values = skipTrailingNaNs(values)
if len(values) == 0 {
return nan
}
value := h.Quantile(0.5)
histogram.PutFast(h)
return value
return values[len(values)-1]
}
// quantiles calculates the given phis from originValues without modifying originValues, appends them to qs and returns the result.
func quantiles(qs, phis []float64, originValues []float64) []float64 {
a := getFloat64s()
a.A = prepareForQuantileFloat64(a.A[:0], originValues)
qs = quantilesSorted(qs, phis, a.A)
putFloat64s(a)
return qs
}
// quantile calculates the given phi from originValues without modifying originValues
func quantile(phi float64, originValues []float64) float64 {
a := getFloat64s()
a.A = prepareForQuantileFloat64(a.A[:0], originValues)
q := quantileSorted(phi, a.A)
putFloat64s(a)
return q
}
// prepareForQuantileFloat64 copies items from src to dst but removes NaNs and sorts the dst
func prepareForQuantileFloat64(dst, src []float64) []float64 {
for _, v := range src {
if math.IsNaN(v) {
continue
}
dst = append(dst, v)
}
sort.Float64s(dst)
return dst
}
// quantilesSorted calculates the given phis over a sorted list of values, appends them to qs and returns the result.
//
// It is expected that values won't contain NaN items.
// The implementation mimics Prometheus implementation for compatibility's sake.
func quantilesSorted(qs, phis []float64, values []float64) []float64 {
for _, phi := range phis {
q := quantileSorted(phi, values)
qs = append(qs, q)
}
return qs
}
// quantileSorted calculates the given quantile over a sorted list of values.
//
// It is expected that values won't contain NaN items.
// The implementation mimics Prometheus implementation for compatibility's sake.
func quantileSorted(phi float64, values []float64) float64 {
if len(values) == 0 || math.IsNaN(phi) {
return nan
}
if phi < 0 {
return math.Inf(-1)
}
if phi > 1 {
return math.Inf(+1)
}
n := float64(len(values))
rank := phi * (n - 1)
lowerIndex := math.Max(0, math.Floor(rank))
upperIndex := math.Min(n-1, lowerIndex+1)
weight := rank - math.Floor(rank)
return values[int(lowerIndex)]*(1-weight) + values[int(upperIndex)]*weight
}
func aggrFuncMAD(tss []*timeseries) []*timeseries {
// Calculate medians for each point across tss.
medians := getPerPointMedians(tss)
// Calculate MAD values multipled by tolerance for each point across tss.
// See https://en.wikipedia.org/wiki/Median_absolute_deviation
mads := getPerPointMADs(tss, medians)
tss[0].Values = append(tss[0].Values[:0], mads...)
return tss[:1]
}
func aggrFuncOutliersMAD(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 2); err != nil {
return nil, err
}
tolerances, err := getScalar(args[0], 0)
if err != nil {
return nil, err
}
afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
// Calculate medians for each point across tss.
medians := getPerPointMedians(tss)
// Calculate MAD values multipled by tolerance for each point across tss.
// See https://en.wikipedia.org/wiki/Median_absolute_deviation
mads := getPerPointMADs(tss, medians)
for n := range mads {
mads[n] *= tolerances[n]
}
// Leave only time series with at least a single peak above the MAD multiplied by tolerance.
tssDst := tss[:0]
for _, ts := range tss {
values := ts.Values
for n, v := range values {
ad := math.Abs(v - medians[n])
mad := mads[n]
if ad > mad {
tssDst = append(tssDst, ts)
break
}
}
}
return tssDst
}
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
}
func aggrFuncOutliersK(afa *aggrFuncArg) ([]*timeseries, error) {
@@ -821,20 +938,7 @@ func aggrFuncOutliersK(afa *aggrFuncArg) ([]*timeseries, error) {
}
afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
// Calculate medians for each point across tss.
medians := make([]float64, len(ks))
h := histogram.GetFast()
for n := range ks {
h.Reset()
for j := range tss {
v := tss[j].Values[n]
if !math.IsNaN(v) {
h.Update(v)
}
}
medians[n] = h.Quantile(0.5)
}
histogram.PutFast(h)
medians := getPerPointMedians(tss)
// Return topK time series with the highest variance from median.
f := func(values []float64) float64 {
sum2 := float64(0)
@@ -849,38 +953,187 @@ func aggrFuncOutliersK(afa *aggrFuncArg) ([]*timeseries, error) {
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
}
func getPerPointMedians(tss []*timeseries) []float64 {
if len(tss) == 0 {
logger.Panicf("BUG: expecting non-empty tss")
}
medians := make([]float64, len(tss[0].Values))
a := getFloat64s()
values := a.A
for n := range medians {
values = values[:0]
for j := range tss {
v := tss[j].Values[n]
if !math.IsNaN(v) {
values = append(values, v)
}
}
medians[n] = quantile(0.5, values)
}
a.A = values
putFloat64s(a)
return medians
}
func getPerPointMADs(tss []*timeseries, medians []float64) []float64 {
mads := make([]float64, len(medians))
a := getFloat64s()
values := a.A
for n, median := range medians {
values = values[:0]
for j := range tss {
v := tss[j].Values[n]
if !math.IsNaN(v) {
ad := math.Abs(v - median)
values = append(values, ad)
}
}
mads[n] = quantile(0.5, values)
}
a.A = values
putFloat64s(a)
return mads
}
func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 2); err != nil {
return nil, err
}
ks, err := getScalar(args[0], 0)
limits, err := getScalar(args[0], 0)
if err != nil {
return nil, fmt.Errorf("cannot obtain limit arg: %w", err)
}
limit := 0
if len(limits) > 0 {
limit = int(limits[0])
}
afe := newLimitOffsetAggrFunc(limit, 0)
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
}
func aggrFuncLimitOffset(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 3); err != nil {
return nil, err
}
maxK := 0
for _, kf := range ks {
k := int(kf)
if k > maxK {
maxK = k
}
limit, err := getIntNumber(args[0], 0)
if err != nil {
return nil, fmt.Errorf("cannot obtain limit arg: %w", err)
}
afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
if len(tss) > maxK {
tss = tss[:maxK]
offset, err := getIntNumber(args[1], 1)
if err != nil {
return nil, fmt.Errorf("cannot obtain offset arg: %w", err)
}
afe := newLimitOffsetAggrFunc(limit, offset)
return aggrFuncExt(afe, args[2], &afa.ae.Modifier, afa.ae.Limit, true)
}
func newLimitOffsetAggrFunc(limit, offset int) func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
if offset < 0 {
offset = 0
}
if limit < 0 {
limit = 0
}
return func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
// Sort series by metricName hash in order to get consistent set of output series
// across multiple calls to limitk() and limit_offset() functions.
// Sort series by hash in order to guarantee uniform selection across series.
type hashSeries struct {
h uint64
ts *timeseries
}
for i, kf := range ks {
k := int(kf)
if k < 0 {
k = 0
}
for j := k; j < len(tss); j++ {
tss[j].Values[i] = nan
hss := make([]hashSeries, len(tss))
d := xxhash.New()
for i, ts := range tss {
h := getHash(d, &ts.MetricName)
hss[i] = hashSeries{
h: h,
ts: ts,
}
}
sort.Slice(hss, func(i, j int) bool {
return hss[i].h < hss[j].h
})
for i, hs := range hss {
tss[i] = hs.ts
}
if offset > len(tss) {
return nil
}
tss = tss[offset:]
if limit < len(tss) {
tss = tss[:limit]
}
return tss
}
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
}
func getHash(d *xxhash.Digest, mn *storage.MetricName) uint64 {
d.Reset()
_, _ = d.Write(mn.MetricGroup)
for _, tag := range mn.Tags {
_, _ = d.Write(tag.Key)
_, _ = d.Write(tag.Value)
}
return d.Sum64()
}
func aggrFuncQuantiles(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if len(args) < 3 {
return nil, fmt.Errorf("unexpected number of args: %d; expecting at least 3 args", len(args))
}
dstLabel, err := getString(args[0], 0)
if err != nil {
return nil, fmt.Errorf("cannot obtain dstLabel: %w", err)
}
phiArgs := args[1 : len(args)-1]
phis := make([]float64, len(phiArgs))
for i, phiArg := range phiArgs {
phisLocal, err := getScalar(phiArg, i+1)
if err != nil {
return nil, err
}
if len(phis) == 0 {
logger.Panicf("BUG: expecting at least a single sample")
}
phis[i] = phisLocal[0]
}
argOrig := args[len(args)-1]
afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
tssDst := make([]*timeseries, len(phiArgs))
for j := range tssDst {
ts := &timeseries{}
ts.CopyFromShallowTimestamps(tss[0])
ts.MetricName.RemoveTag(dstLabel)
ts.MetricName.AddTag(dstLabel, fmt.Sprintf("%g", phis[j]))
tssDst[j] = ts
}
b := getFloat64s()
qs := b.A
a := getFloat64s()
values := a.A
for n := range tss[0].Values {
values = values[:0]
for j := range tss {
values = append(values, tss[j].Values[n])
}
qs = quantiles(qs[:0], phis, values)
for j := range tssDst {
tssDst[j].Values[n] = qs[j]
}
}
a.A = values
putFloat64s(a)
b.A = qs
putFloat64s(b)
return tssDst
}
return aggrFuncExt(afe, argOrig, &afa.ae.Modifier, afa.ae.Limit, false)
}
func aggrFuncQuantile(afa *aggrFuncArg) ([]*timeseries, error) {
@@ -909,19 +1162,17 @@ func aggrFuncMedian(afa *aggrFuncArg) ([]*timeseries, error) {
func newAggrQuantileFunc(phis []float64) func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
return func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
dst := tss[0]
h := histogram.GetFast()
defer histogram.PutFast(h)
a := getFloat64s()
values := a.A
for n := range dst.Values {
h.Reset()
values = values[:0]
for j := range tss {
v := tss[j].Values[n]
if !math.IsNaN(v) {
h.Update(v)
}
values = append(values, tss[j].Values[n])
}
phi := phis[n]
dst.Values[n] = h.Quantile(phi)
dst.Values[n] = quantile(phis[n], values)
}
a.A = values
putFloat64s(a)
tss[0] = dst
return tss[:1]
}

View File

@@ -19,6 +19,9 @@ var binaryOpFuncs = map[string]binaryOpFunc{
"%": newBinaryOpArithFunc(binaryop.Mod),
"^": newBinaryOpArithFunc(binaryop.Pow),
// See https://github.com/prometheus/prometheus/pull/9248
"atan2": newBinaryOpArithFunc(binaryop.Atan2),
// cmp ops
"==": newBinaryOpCmpFunc(binaryop.Eq),
"!=": newBinaryOpCmpFunc(binaryop.Neq),
@@ -59,12 +62,12 @@ func newBinaryOpCmpFunc(cf func(left, right float64) bool) binaryOpFunc {
}
return nan
}
if cf(left, right) {
return 1
}
if math.IsNaN(left) {
return nan
}
if cf(left, right) {
return 1
}
return 0
}
return newBinaryOpFunc(cfe)
@@ -303,12 +306,18 @@ func binaryOpAnd(bfa *binaryOpFuncArg) ([]*timeseries, error) {
if tssLeft == nil {
continue
}
// Add gaps to tssLeft if there are gaps at valuesRight.
valuesRight := tssRight[0].Values
// Add gaps to tssLeft if there are gaps at tssRight.
for _, tsLeft := range tssLeft {
valuesLeft := tsLeft.Values
for i, v := range valuesRight {
if math.IsNaN(v) {
for i := range valuesLeft {
hasValue := false
for _, tsRight := range tssRight {
if !math.IsNaN(tsRight.Values[i]) {
hasValue = true
break
}
}
if !hasValue {
valuesLeft[i] = nan
}
}
@@ -333,12 +342,18 @@ func binaryOpOr(bfa *binaryOpFuncArg) ([]*timeseries, error) {
}
// Fill gaps in tssLeft with values from tssRight as Prometheus does.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/552
valuesRight := tssRight[0].Values
for _, tsLeft := range tssLeft {
valuesLeft := tsLeft.Values
for i, v := range valuesLeft {
if math.IsNaN(v) {
valuesLeft[i] = valuesRight[i]
if !math.IsNaN(v) {
continue
}
for _, tsRight := range tssRight {
vRight := tsRight.Values[i]
if !math.IsNaN(vRight) {
valuesLeft[i] = vRight
break
}
}
}
}
@@ -355,13 +370,15 @@ func binaryOpUnless(bfa *binaryOpFuncArg) ([]*timeseries, error) {
rvs = append(rvs, tssLeft...)
continue
}
// Add gaps to tssLeft if the are no gaps at valuesRight.
valuesRight := tssRight[0].Values
// Add gaps to tssLeft if the are no gaps at tssRight.
for _, tsLeft := range tssLeft {
valuesLeft := tsLeft.Values
for i, v := range valuesRight {
if !math.IsNaN(v) {
valuesLeft[i] = nan
for i := range valuesLeft {
for _, tsRight := range tssRight {
if !math.IsNaN(tsRight.Values[i]) {
valuesLeft[i] = nan
break
}
}
}
}

View File

@@ -4,6 +4,7 @@ import (
"flag"
"fmt"
"math"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
@@ -23,6 +24,7 @@ var (
maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from /api/v1/query_range. "+
"This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points "+
"returned to graphing UI such as Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph")
noStaleMarkers = flag.Bool("search.noStaleMarkers", false, "Set this flag to true if the database doesn't contain Prometheus stale markers, so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets")
)
// The minimum number of points per timeseries for enabling time rounding.
@@ -389,7 +391,7 @@ func tryGetArgRollupFuncWithMetricExpr(ae *metricsql.AggrFuncExpr) (*metricsql.F
if nrf == nil {
return nil, nil
}
rollupArgIdx := getRollupArgIdx(fe.Name)
rollupArgIdx := getRollupArgIdx(fe)
if rollupArgIdx >= len(fe.Args) {
// Incorrect number of args for rollup func.
return nil, nil
@@ -429,7 +431,7 @@ func evalExprs(ec *EvalConfig, es []metricsql.Expr) ([][]*timeseries, error) {
func evalRollupFuncArgs(ec *EvalConfig, fe *metricsql.FuncExpr) ([]interface{}, *metricsql.RollupExpr, error) {
var re *metricsql.RollupExpr
rollupArgIdx := getRollupArgIdx(fe.Name)
rollupArgIdx := getRollupArgIdx(fe)
if len(fe.Args) <= rollupArgIdx {
return nil, nil, fmt.Errorf("expecting at least %d args to %q; got %d args; expr: %q", rollupArgIdx+1, fe.Name, len(fe.Args), fe.AppendString(nil))
}
@@ -477,7 +479,8 @@ func getRollupExprArg(arg metricsql.Expr) *metricsql.RollupExpr {
return &reNew
}
func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
func evalRollupFunc(ec *EvalConfig, funcName string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
funcName = strings.ToLower(funcName)
ecNew := ec
var offset int64
if re.Offset != nil {
@@ -490,7 +493,7 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.E
// so cache hit rate should be quite good.
// See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/976
}
if name == "rollup_candlestick" {
if funcName == "rollup_candlestick" {
// Automatically apply `offset -step` to `rollup_candlestick` function
// in order to obtain expected OHLC results.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309#issuecomment-582113462
@@ -503,12 +506,12 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.E
var rvs []*timeseries
var err error
if me, ok := re.Expr.(*metricsql.MetricExpr); ok {
rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, expr, me, iafc, re.Window)
rvs, err = evalRollupFuncWithMetricExpr(ecNew, funcName, rf, expr, me, iafc, re.Window)
} else {
if iafc != nil {
logger.Panicf("BUG: iafc must be nil for rollup %q over subquery %q", name, re.AppendString(nil))
logger.Panicf("BUG: iafc must be nil for rollup %q over subquery %q", funcName, re.AppendString(nil))
}
rvs, err = evalRollupFuncWithSubquery(ecNew, name, rf, expr, re)
rvs, err = evalRollupFuncWithSubquery(ecNew, funcName, rf, expr, re)
}
if err != nil {
return nil, err
@@ -527,7 +530,7 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.E
return rvs, nil
}
func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr) ([]*timeseries, error) {
func evalRollupFuncWithSubquery(ec *EvalConfig, funcName string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr) ([]*timeseries, error) {
// TODO: determine whether to use rollupResultCacheV here.
step := re.Step.Duration(ec.Step)
if step == 0 {
@@ -549,25 +552,24 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, expr
return nil, err
}
if len(tssSQ) == 0 {
if name == "absent_over_time" {
if funcName == "absent_over_time" {
tss := evalNumber(ec, 1)
return tss, nil
}
return nil, nil
}
sharedTimestamps := getTimestamps(ec.Start, ec.End, ec.Step)
preFunc, rcs, err := getRollupConfigs(name, rf, expr, ec.Start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
preFunc, rcs, err := getRollupConfigs(funcName, rf, expr, ec.Start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
if err != nil {
return nil, err
}
tss := make([]*timeseries, 0, len(tssSQ)*len(rcs))
var tssLock sync.Mutex
removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
doParallel(tssSQ, func(tsSQ *timeseries, values []float64, timestamps []int64) ([]float64, []int64) {
values, timestamps = removeNanValues(values[:0], timestamps[:0], tsSQ.Values, tsSQ.Timestamps)
preFunc(values, timestamps)
for _, rc := range rcs {
if tsm := newTimeseriesMap(name, sharedTimestamps, &tsSQ.MetricName); tsm != nil {
if tsm := newTimeseriesMap(funcName, sharedTimestamps, &tsSQ.MetricName); tsm != nil {
rc.DoTimeseriesMap(tsm, values, timestamps)
tssLock.Lock()
tss = tsm.AppendTimeseriesTo(tss)
@@ -575,7 +577,7 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, expr
continue
}
var ts timeseries
doRollupForTimeseries(rc, &ts, &tsSQ.MetricName, values, timestamps, sharedTimestamps, removeMetricGroup)
doRollupForTimeseries(funcName, rc, &ts, &tsSQ.MetricName, values, timestamps, sharedTimestamps)
tssLock.Lock()
tss = append(tss, &ts)
tssLock.Unlock()
@@ -641,7 +643,7 @@ var (
rollupResultCacheMiss = metrics.NewCounter(`vm_rollup_result_cache_miss_total`)
)
func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
func evalRollupFuncWithMetricExpr(ec *EvalConfig, funcName string, rf rollupFunc,
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowExpr *metricsql.DurationExpr) ([]*timeseries, error) {
if me.IsEmpty() {
return evalNumber(ec, nan), nil
@@ -664,7 +666,7 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
// Obtain rollup configs before fetching data from db,
// so type errors can be caught earlier.
sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
preFunc, rcs, err := getRollupConfigs(name, rf, expr, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
preFunc, rcs, err := getRollupConfigs(funcName, rf, expr, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
if err != nil {
return nil, err
}
@@ -688,7 +690,7 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
if rssLen == 0 {
rss.Cancel()
var tss []*timeseries
if name == "absent_over_time" {
if funcName == "absent_over_time" {
tss = getAbsentTimeseries(ec, me)
}
// Add missing points until ec.End.
@@ -734,12 +736,11 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
defer rml.Put(uint64(rollupMemorySize))
// Evaluate rollup
removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
var tss []*timeseries
if iafc != nil {
tss, err = evalRollupWithIncrementalAggregate(name, iafc, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
tss, err = evalRollupWithIncrementalAggregate(funcName, iafc, rss, rcs, preFunc, sharedTimestamps)
} else {
tss, err = evalRollupNoIncrementalAggregate(name, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
tss, err = evalRollupNoIncrementalAggregate(funcName, rss, rcs, preFunc, sharedTimestamps)
}
if err != nil {
return nil, err
@@ -761,18 +762,15 @@ func getRollupMemoryLimiter() *memoryLimiter {
return &rollupMemoryLimiter
}
func evalRollupWithIncrementalAggregate(name string, iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
func evalRollupWithIncrementalAggregate(funcName string, iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64) ([]*timeseries, error) {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
if name != "default_rollup" {
// Remove Prometheus staleness marks, so non-default rollup functions don't hit NaN values.
rs.Values, rs.Timestamps = dropStaleNaNs(rs.Values, rs.Timestamps)
}
rs.Values, rs.Timestamps = dropStaleNaNs(funcName, rs.Values, rs.Timestamps)
preFunc(rs.Values, rs.Timestamps)
ts := getTimeseries()
defer putTimeseries(ts)
for _, rc := range rcs {
if tsm := newTimeseriesMap(name, sharedTimestamps, &rs.MetricName); tsm != nil {
if tsm := newTimeseriesMap(funcName, sharedTimestamps, &rs.MetricName); tsm != nil {
rc.DoTimeseriesMap(tsm, rs.Values, rs.Timestamps)
for _, ts := range tsm.m {
iafc.updateTimeseries(ts, workerID)
@@ -780,7 +778,7 @@ func evalRollupWithIncrementalAggregate(name string, iafc *incrementalAggrFuncCo
continue
}
ts.Reset()
doRollupForTimeseries(rc, ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
doRollupForTimeseries(funcName, rc, ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps)
iafc.updateTimeseries(ts, workerID)
// ts.Timestamps points to sharedTimestamps. Zero it, so it can be re-used.
@@ -796,18 +794,15 @@ func evalRollupWithIncrementalAggregate(name string, iafc *incrementalAggrFuncCo
return tss, nil
}
func evalRollupNoIncrementalAggregate(name string, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
func evalRollupNoIncrementalAggregate(funcName string, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64) ([]*timeseries, error) {
tss := make([]*timeseries, 0, rss.Len()*len(rcs))
var tssLock sync.Mutex
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
if name != "default_rollup" {
// Remove Prometheus staleness marks, so non-default rollup functions don't hit NaN values.
rs.Values, rs.Timestamps = dropStaleNaNs(rs.Values, rs.Timestamps)
}
rs.Values, rs.Timestamps = dropStaleNaNs(funcName, rs.Values, rs.Timestamps)
preFunc(rs.Values, rs.Timestamps)
for _, rc := range rcs {
if tsm := newTimeseriesMap(name, sharedTimestamps, &rs.MetricName); tsm != nil {
if tsm := newTimeseriesMap(funcName, sharedTimestamps, &rs.MetricName); tsm != nil {
rc.DoTimeseriesMap(tsm, rs.Values, rs.Timestamps)
tssLock.Lock()
tss = tsm.AppendTimeseriesTo(tss)
@@ -815,7 +810,7 @@ func evalRollupNoIncrementalAggregate(name string, rss *netstorage.Results, rcs
continue
}
var ts timeseries
doRollupForTimeseries(rc, &ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
doRollupForTimeseries(funcName, rc, &ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps)
tssLock.Lock()
tss = append(tss, &ts)
tssLock.Unlock()
@@ -828,13 +823,13 @@ func evalRollupNoIncrementalAggregate(name string, rss *netstorage.Results, rcs
return tss, nil
}
func doRollupForTimeseries(rc *rollupConfig, tsDst *timeseries, mnSrc *storage.MetricName, valuesSrc []float64, timestampsSrc []int64,
sharedTimestamps []int64, removeMetricGroup bool) {
func doRollupForTimeseries(funcName string, rc *rollupConfig, tsDst *timeseries, mnSrc *storage.MetricName, valuesSrc []float64, timestampsSrc []int64,
sharedTimestamps []int64) {
tsDst.MetricName.CopyFrom(mnSrc)
if len(rc.TagValue) > 0 {
tsDst.MetricName.AddTag("rollup", rc.TagValue)
}
if removeMetricGroup {
if !rollupFuncsKeepMetricGroup[funcName] {
tsDst.MetricName.ResetMetricGroup()
}
tsDst.Values = rc.Do(tsDst.Values[:0], valuesSrc, timestampsSrc)
@@ -901,7 +896,13 @@ func toTagFilter(dst *storage.TagFilter, src *metricsql.LabelFilter) {
dst.IsNegative = src.IsNegative
}
func dropStaleNaNs(values []float64, timestamps []int64) ([]float64, []int64) {
func dropStaleNaNs(funcName string, values []float64, timestamps []int64) ([]float64, []int64) {
if *noStaleMarkers || funcName == "default_rollup" {
// Do not drop Prometheus staleness marks (aka stale NaNs) for default_rollup() function,
// since it uses them for Prometheus-style staleness detection.
return values, timestamps
}
// Remove Prometheus staleness marks, so non-default rollup functions don't hit NaN values.
hasStaleSamples := false
for _, v := range values {
if decimal.IsStaleNaN(v) {

View File

@@ -13,6 +13,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/querystats"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/metricsql"
)
@@ -80,8 +81,8 @@ func maySortResults(e metricsql.Expr, tss []*timeseries) bool {
case *metricsql.AggrFuncExpr:
switch strings.ToLower(v.Name) {
case "topk", "bottomk", "outliersk",
"topk_max", "topk_min", "topk_avg", "topk_median",
"bottomk_max", "bottomk_min", "bottomk_avg", "bottomk_median":
"topk_max", "topk_min", "topk_avg", "topk_median", "topk_last",
"bottomk_max", "bottomk_min", "bottomk_avg", "bottomk_median", "bottomk_last":
return false
}
}
@@ -101,7 +102,6 @@ func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, e
m[string(bb.B)] = struct{}{}
rs := &result[i]
rs.MetricNameMarshaled = append(rs.MetricNameMarshaled[:0], bb.B...)
rs.MetricName.CopyFrom(&ts.MetricName)
rs.Values = append(rs.Values[:0], ts.Values...)
rs.Timestamps = append(rs.Timestamps[:0], ts.Timestamps...)
@@ -110,13 +110,39 @@ func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, e
if maySort {
sort.Slice(result, func(i, j int) bool {
return string(result[i].MetricNameMarshaled) < string(result[j].MetricNameMarshaled)
return metricNameLess(&result[i].MetricName, &result[j].MetricName)
})
}
return result, nil
}
func metricNameLess(a, b *storage.MetricName) bool {
if string(a.MetricGroup) != string(b.MetricGroup) {
return string(a.MetricGroup) < string(b.MetricGroup)
}
// Metric names for a and b match. Compare tags.
// Tags must be already sorted by the caller, so just compare them.
ats := a.Tags
bts := b.Tags
for i := range ats {
if i >= len(bts) {
// a contains more tags than b and all the previous tags were identical,
// so a is considered bigger than b.
return false
}
at := &ats[i]
bt := &bts[i]
if string(at.Key) != string(bt.Key) {
return string(at.Key) < string(bt.Key)
}
if string(at.Value) != string(bt.Value) {
return string(at.Value) < string(bt.Value)
}
}
return len(ats) < len(bts)
}
func removeNaNs(tss []*timeseries) []*timeseries {
rvs := tss[:0]
for _, ts := range tss {

View File

@@ -1,6 +1,7 @@
package promql
import (
"math"
"testing"
"time"
@@ -132,6 +133,72 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("bitmap_and(0xB3, 0x11)", func(t *testing.T) {
t.Parallel()
q := `bitmap_and(0xB3, 0x11)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{17, 17, 17, 17, 17, 17},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("bitmap_and(time(), 0x11)", func(t *testing.T) {
t.Parallel()
q := `bitmap_and(time(), 0x11)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 16, 16, 0, 0, 16},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("bitmap_or(0xA2, 0x11)", func(t *testing.T) {
t.Parallel()
q := `bitmap_or(0xA2, 0x11)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{179, 179, 179, 179, 179, 179},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("bitmap_or(time(), 0x11)", func(t *testing.T) {
t.Parallel()
q := `bitmap_or(time(), 0x11)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1017, 1201, 1401, 1617, 1817, 2001},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("bitmap_xor(0xB3, 0x11)", func(t *testing.T) {
t.Parallel()
q := `bitmap_xor(0xB3, 0x11)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{162, 162, 162, 162, 162, 162},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("bitmap_xor(time(), 0x11)", func(t *testing.T) {
t.Parallel()
q := `bitmap_xor(time(), 0x11)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1017, 1185, 1385, 1617, 1817, 1985},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("timezone_offset(UTC)", func(t *testing.T) {
t.Parallel()
q := `timezone_offset("UTC")`
@@ -972,6 +1039,17 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("sinh()", func(t *testing.T) {
t.Parallel()
q := `sinh(pi()*(2000-time())/1000)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{11.548739357257748, 6.132140673514712, 3.217113080357038, 1.6144880404748523, 0.6704839982471175, 0},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("asin()", func(t *testing.T) {
t.Parallel()
q := `asin((2000-time())/1000)`
@@ -983,6 +1061,50 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("asinh(sinh)", func(t *testing.T) {
t.Parallel()
q := `asinh(sinh((2000-time())/1000))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 0.8000000000000002, 0.6, 0.4000000000000001, 0.2, 0},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("atan2()", func(t *testing.T) {
t.Parallel()
q := `time() atan2 time()/10`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.07853981633974483, 0.07853981633974483, 0.07853981633974483, 0.07853981633974483, 0.07853981633974483, 0.07853981633974483},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("atan()", func(t *testing.T) {
t.Parallel()
q := `atan((2000-time())/1000)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.7853981633974483, 0.6747409422235526, 0.5404195002705842, 0.3805063771123649, 0.19739555984988078, 0},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("atanh(tanh)", func(t *testing.T) {
t.Parallel()
q := `atanh(tanh((2000-time())/1000))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 0.8000000000000002, 0.6, 0.4000000000000001, 0.2, 0},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("cos()", func(t *testing.T) {
t.Parallel()
q := `cos(pi()*(2000-time())/1000)`
@@ -1005,6 +1127,28 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("acosh(cosh)", func(t *testing.T) {
t.Parallel()
q := `acosh(cosh((2000-time())/1000))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 0.8000000000000002, 0.5999999999999999, 0.40000000000000036, 0.20000000000000023, 0},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("rad(deg)", func(t *testing.T) {
t.Parallel()
q := `rad(deg(time()/500))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2.3999999999999995, 2.8, 3.2, 3.6, 4},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("floor(time()/500)", func(t *testing.T) {
t.Parallel()
q := `floor(time()/500)`
@@ -1868,6 +2012,62 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`label_graphite_group()`, func(t *testing.T) {
t.Parallel()
q := `sort(label_graphite_group((
alias(1, "foo.bar.baz"),
alias(2, "abc"),
label_set(alias(3, "a.xx.zz.asd"), "qwe", "rty"),
), 1, 3))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("bar.")
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte(".")
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{3, 3, 3, 3, 3, 3},
Timestamps: timestampsExpected,
}
r3.MetricName.MetricGroup = []byte("xx.asd")
r3.MetricName.Tags = []storage.Tag{{
Key: []byte("qwe"),
Value: []byte("rty"),
}}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`sum(label_graphite_group)`, func(t *testing.T) {
t.Parallel()
q := `sort(sum by (__name__) (
label_graphite_group((
alias(1, "foo.bar.baz"),
alias(2, "x.y.z"),
alias(3, "qe.bar.qqq"),
), 1)
))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("y")
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{4, 4, 4, 4, 4, 4},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("bar")
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`two_timeseries`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(time() or label_set(2, "xx", "foo"))`
@@ -1888,9 +2088,9 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`sign(time()-1400)`, func(t *testing.T) {
t.Run(`sgn(time()-1400)`, func(t *testing.T) {
t.Parallel()
q := `sign(time()-1400)`
q := `sgn(time()-1400)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{-1, -1, 0, 1, 1, 1},
@@ -2103,6 +2303,28 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`nan!=bool scalar`, func(t *testing.T) {
t.Parallel()
q := `(time() > 1234) !=bool 1400`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, 0, 1, 1, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`scalar!=bool nan`, func(t *testing.T) {
t.Parallel()
q := `1400 !=bool (time() > 1234)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, 0, 1, 1, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`scalar > time()`, func(t *testing.T) {
t.Parallel()
q := `123 > time()`
@@ -3362,6 +3584,46 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantiles()`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(histogram_quantiles("phi", 0.2, 0.3,
label_set(0, "foo", "bar", "le", "10")
or label_set(100, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
), "phi")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{22, 22, 22, 22, 22, 22},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("phi"),
Value: []byte("0.2"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{28, 28, 28, 28, 28, 28},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("phi"),
Value: []byte("0.3"),
},
}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`histogram_share(normal-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(35,
@@ -4509,6 +4771,67 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`quantile_over_time`, func(t *testing.T) {
t.Parallel()
q := `quantile_over_time(0.9, label_set(round(rand(0), 0.01), "__name__", "foo", "xx", "yy")[200s:5s])`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.893, 0.892, 0.9510000000000001, 0.8730000000000001, 0.9250000000000002, 0.891},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("foo")
r.MetricName.Tags = []storage.Tag{
{
Key: []byte("xx"),
Value: []byte("yy"),
},
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`quantiles_over_time`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(
quantiles_over_time("phi", 0.5, 0.9,
label_set(round(rand(0), 0.01), "__name__", "foo", "xx", "yy")[200s:5s]
),
"phi",
)`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.46499999999999997, 0.57, 0.485, 0.54, 0.555, 0.515},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("foo")
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("phi"),
Value: []byte("0.5"),
},
{
Key: []byte("xx"),
Value: []byte("yy"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.893, 0.892, 0.9510000000000001, 0.8730000000000001, 0.9250000000000002, 0.891},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("foo")
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("phi"),
Value: []byte("0.9"),
},
{
Key: []byte("xx"),
Value: []byte("yy"),
},
}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`histogram_over_time`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]), "vmrange")`
@@ -4707,6 +5030,17 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`duration_over_time`, func(t *testing.T) {
t.Parallel()
q := `duration_over_time((time()<1200)[600s:10s], 20s)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{590, 580, 380, 180, nan, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`share_gt_over_time`, func(t *testing.T) {
t.Parallel()
q := `share_gt_over_time(rand(0)[200s:10s], 0.7)`
@@ -4803,7 +5137,22 @@ func TestExecSuccess(t *testing.T) {
})
t.Run(`limitk(1)`, func(t *testing.T) {
t.Parallel()
q := `limitk(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
q := `limitk(1, label_set(10, "foo", "bar") or label_set(time()/150, "xbaz", "sss"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`limit_offset()`, func(t *testing.T) {
t.Parallel()
q := `limit_offset(1, 0, (label_set(10, "foo", "bar"), label_set(time()/150, "xbaz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
@@ -4931,7 +5280,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort(topk_min(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, nan, nan, nan},
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -4946,7 +5295,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort(bottomk_min(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -4961,7 +5310,7 @@ func TestExecSuccess(t *testing.T) {
q := `topk_max(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -4976,7 +5325,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort_desc(topk_max(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"), "remaining_sum=foo"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -5002,7 +5351,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort_desc(topk_max(2, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"), "remaining_sum"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -5028,7 +5377,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort_desc(topk_max(3, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"), "remaining_sum"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -5054,7 +5403,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort(bottomk_max(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, nan, nan, nan},
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -5069,7 +5418,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort(topk_avg(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -5099,7 +5448,22 @@ func TestExecSuccess(t *testing.T) {
q := `sort(topk_median(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("baz"),
Value: []byte("sss"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`topk_last(1)`, func(t *testing.T) {
t.Parallel()
q := `sort(topk_last(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -5111,10 +5475,25 @@ func TestExecSuccess(t *testing.T) {
})
t.Run(`bottomk_median(1)`, func(t *testing.T) {
t.Parallel()
q := `sort(bottomk_median(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
q := `sort(bottomk_median(1, label_set(10, "foo", "bar") or label_set(time()/15, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, nan, nan, nan},
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`bottomk_last(1)`, func(t *testing.T) {
t.Parallel()
q := `sort(bottomk_last(1, label_set(10, "foo", "bar") or label_set(time()/15, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -5292,7 +5671,7 @@ func TestExecSuccess(t *testing.T) {
q := `distinct_over_time((time() < 1700)[500s])`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{3, 3, 3, 3, nan, nan},
Values: []float64{3, 3, 3, 3, 2, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r1}
@@ -5303,7 +5682,7 @@ func TestExecSuccess(t *testing.T) {
q := `distinct_over_time((time() < 1700)[2.5i])`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{3, 3, 3, 3, nan, nan},
Values: []float64{3, 3, 3, 3, 2, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r1}
@@ -5365,9 +5744,10 @@ func TestExecSuccess(t *testing.T) {
t.Run(`quantile(-2)`, func(t *testing.T) {
t.Parallel()
q := `quantile(-2, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
inf := math.Inf(-1)
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10, 10, 10},
Values: []float64{inf, inf, inf, inf, inf, inf},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -5378,7 +5758,7 @@ func TestExecSuccess(t *testing.T) {
q := `quantile(0.2, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10, 10, 10},
Values: []float64{7.333333333333334, 8.4, 9.466666666666669, 10.133333333333333, 10.4, 10.666666666666668},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -5389,18 +5769,42 @@ func TestExecSuccess(t *testing.T) {
q := `quantile(0.5, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{8.333333333333334, 9, 9.666666666666668, 10.333333333333332, 11, 11.666666666666668},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`quantiles("phi", 0.2, 0.5)`, func(t *testing.T) {
t.Parallel()
q := `sort(quantiles("phi", 0.2, 0.5, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{7.333333333333334, 8.4, 9.466666666666669, 10.133333333333333, 10.4, 10.666666666666668},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("phi"),
Value: []byte("0.2"),
}}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{8.333333333333334, 9, 9.666666666666668, 10.333333333333332, 11, 11.666666666666668},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("phi"),
Value: []byte("0.5"),
}}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`median()`, func(t *testing.T) {
t.Parallel()
q := `median(label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{8.333333333333334, 9, 9.666666666666668, 10.333333333333332, 11, 11.666666666666668},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -5420,9 +5824,10 @@ func TestExecSuccess(t *testing.T) {
t.Run(`quantile(3)`, func(t *testing.T) {
t.Parallel()
q := `quantile(3, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
inf := math.Inf(+1)
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
Values: []float64{inf, inf, inf, inf, inf, inf},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -5434,6 +5839,47 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`mad()`, func(t *testing.T) {
t.Parallel()
q := `mad(
alias(time(), "metric1"),
alias(time()*1.5, "metric2"),
label_set(time()*0.9, "baz", "sss"),
)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{100, 120, 140, 160, 180, 200},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`outliers_mad(1)`, func(t *testing.T) {
t.Parallel()
q := `outliers_mad(1, (
alias(time(), "metric1"),
alias(time()*1.5, "metric2"),
label_set(time()*0.9, "baz", "sss"),
))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1500, 1800, 2100, 2400, 2700, 3000},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("metric2")
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`outliers_mad(5)`, func(t *testing.T) {
t.Parallel()
q := `outliers_mad(5, (
alias(time(), "metric1"),
alias(time()*1.5, "metric2"),
label_set(time()*0.9, "baz", "sss"),
))`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`outliersk(0)`, func(t *testing.T) {
t.Parallel()
q := `outliersk(0, (
@@ -5493,7 +5939,8 @@ func TestExecSuccess(t *testing.T) {
q := `range_quantile(0.5, time())`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1600, 1600, 1600, 1600, 1600, 1600},
// time() results in [1000 1200 1400 1600 1800 2000]
Values: []float64{1500, 1500, 1500, 1500, 1500, 1500},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -5504,7 +5951,8 @@ func TestExecSuccess(t *testing.T) {
q := `range_median(time())`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1600, 1600, 1600, 1600, 1600, 1600},
// time() results in [1000 1200 1400 1600 1800 2000]
Values: []float64{1500, 1500, 1500, 1500, 1500, 1500},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -6152,12 +6600,13 @@ func TestExecSuccess(t *testing.T) {
})
t.Run(`rollup_candlestick()`, func(t *testing.T) {
t.Parallel()
q := `sort(rollup_candlestick(round(rand(0),0.01)[:10s]))`
q := `sort(rollup_candlestick(alias(round(rand(0),0.01),"foobar")[:10s]))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.02, 0.02, 0.03, 0, 0.03, 0.02},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("foobar")
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("low"),
@@ -6167,6 +6616,7 @@ func TestExecSuccess(t *testing.T) {
Values: []float64{0.9, 0.32, 0.82, 0.13, 0.28, 0.86},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("foobar")
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("open"),
@@ -6176,6 +6626,7 @@ func TestExecSuccess(t *testing.T) {
Values: []float64{0.1, 0.04, 0.49, 0.46, 0.57, 0.92},
Timestamps: timestampsExpected,
}
r3.MetricName.MetricGroup = []byte("foobar")
r3.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("close"),
@@ -6185,6 +6636,7 @@ func TestExecSuccess(t *testing.T) {
Values: []float64{0.9, 0.94, 0.97, 0.93, 0.98, 0.92},
Timestamps: timestampsExpected,
}
r4.MetricName.MetricGroup = []byte("foobar")
r4.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("high"),
@@ -6225,6 +6677,39 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`rollup_scrape_interval()`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(rollup_scrape_interval(1[5m:10s]), "rollup")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("avg"),
}}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("max"),
}}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("min"),
}}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`rollup()`, func(t *testing.T) {
t.Parallel()
q := `sort(rollup(time()[:50s]))`
@@ -6568,6 +7053,37 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3, r4}
f(q, resultExpected)
})
t.Run(`count_values_big_numbers`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(
count_values("xxx", (alias(772424014, "first"), alias(772424230, "second"))),
"xxx"
)`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("xxx"),
Value: []byte("772424014"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("xxx"),
Value: []byte("772424230"),
},
}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`count_values`, func(t *testing.T) {
t.Parallel()
q := `count_values("xxx", label_set(10, "foo", "bar") or label_set(time()/100, "foo", "bar", "baz", "xx"))`
@@ -6687,7 +7203,8 @@ func TestExecSuccess(t *testing.T) {
Value: []byte("10"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3, r4}
// expected sorted output for strings 1, 10, 2, 3
resultExpected := []netstorage.Result{r1, r4, r2, r3}
f(q, resultExpected)
})
t.Run(`count_values without (baz)`, func(t *testing.T) {
@@ -6741,6 +7258,44 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`result sorting`, func(t *testing.T) {
t.Parallel()
q := `label_set(1, "instance", "localhost:1001", "type", "free")
or label_set(1, "instance", "localhost:1001", "type", "buffers")
or label_set(1, "instance", "localhost:1000", "type", "buffers")
or label_set(1, "instance", "localhost:1000", "type", "free")
`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
testAddLabels(t, &r1.MetricName,
"instance", "localhost:1000", "type", "buffers")
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
testAddLabels(t, &r2.MetricName,
"instance", "localhost:1000", "type", "free")
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
testAddLabels(t, &r3.MetricName,
"instance", "localhost:1001", "type", "buffers")
r4 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
testAddLabels(t, &r4.MetricName,
"instance", "localhost:1001", "type", "free")
resultExpected := []netstorage.Result{r1, r2, r3, r4}
f(q, resultExpected)
})
}
func TestExecError(t *testing.T) {
@@ -6803,9 +7358,10 @@ func TestExecError(t *testing.T) {
f(`label_keep()`)
f(`label_match()`)
f(`label_mismatch()`)
f(`label_graphite_group()`)
f(`round()`)
f(`round(1,2,3)`)
f(`sign()`)
f(`sgn()`)
f(`scalar()`)
f(`sort(1,2)`)
f(`sort_desc()`)
@@ -6814,6 +7370,7 @@ func TestExecError(t *testing.T) {
f(`timestamp()`)
f(`vector()`)
f(`histogram_quantile()`)
f(`histogram_quantiles()`)
f(`sum()`)
f(`count_values()`)
f(`quantile()`)
@@ -6824,12 +7381,14 @@ func TestExecError(t *testing.T) {
f(`topk_max()`)
f(`topk_avg()`)
f(`topk_median()`)
f(`topk_last()`)
f(`limitk()`)
f(`bottomk()`)
f(`bottomk_min()`)
f(`bottomk_max()`)
f(`bottomk_avg()`)
f(`bottomk_median()`)
f(`bottomk_last()`)
f(`time(123)`)
f(`start(1)`)
f(`end(1)`)
@@ -6842,9 +7401,13 @@ func TestExecError(t *testing.T) {
f(`smooth_exponential(1)`)
f(`remove_resets()`)
f(`sin()`)
f(`sinh()`)
f(`cos()`)
f(`cosh()`)
f(`asin()`)
f(`asinh()`)
f(`acos()`)
f(`acosh()`)
f(`rand(123, 456)`)
f(`rand_normal(123, 456)`)
f(`rand_exponential(122, 456)`)
@@ -6875,6 +7438,9 @@ func TestExecError(t *testing.T) {
f(`hoeffding_bound_upper()`)
f(`hoeffding_bound_upper(1)`)
f(`hoeffding_bound_upper(0.99, foo, 1)`)
f(`mad()`)
f(`outliers_mad()`)
f(`outliers_mad(1)`)
f(`outliersk()`)
f(`outliersk(1)`)
f(`mode_over_time()`)
@@ -6885,12 +7451,19 @@ func TestExecError(t *testing.T) {
f(`prometheus_buckets()`)
f(`buckets_limit()`)
f(`buckets_limit(1)`)
f(`duration_over_time()`)
f(`share_le_over_time()`)
f(`share_gt_over_time()`)
f(`count_le_over_time()`)
f(`count_gt_over_time()`)
f(`count_eq_over_time()`)
f(`count_ne_over_time()`)
f(`timezone_offset()`)
f(`bitmap_and()`)
f(`bitmap_or()`)
f(`bitmap_xor()`)
f(`quantiles()`)
f(`limit_offset()`)
// Invalid argument type
f(`median_over_time({}, 2)`)
@@ -6903,6 +7476,8 @@ func TestExecError(t *testing.T) {
f(`topk(label_set(2, "xx", "foo") or 1, 12)`)
f(`topk_avg(label_set(2, "xx", "foo") or 1, 12)`)
f(`limitk(label_set(2, "xx", "foo") or 1, 12)`)
f(`limit_offet((alias(1,"foo"),alias(2,"bar")), 2, 10)`)
f(`limit_offet(1, (alias(1,"foo"),alias(2,"bar")), 10)`)
f(`round(1, 1 or label_set(2, "xx", "foo"))`)
f(`histogram_quantile(1 or label_set(2, "xx", "foo"), 1)`)
f(`label_set(1, 2, 3)`)
@@ -7009,3 +7584,16 @@ func testMetricNamesEqual(t *testing.T, mn, mnExpected *storage.MetricName, pos
}
}
}
func testAddLabels(t *testing.T, mn *storage.MetricName, labels ...string) {
t.Helper()
if len(labels)%2 > 0 {
t.Fatalf("uneven number of labels passed: %v", labels)
}
for i := 0; i < len(labels); i += 2 {
mn.Tags = append(mn.Tags, storage.Tag{
Key: []byte(labels[i]),
Value: []byte(labels[i+1]),
})
}
}

View File

@@ -12,7 +12,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/metricsql"
"github.com/valyala/histogram"
)
var minStalenessInterval = flag.Duration("search.minStalenessInterval", 0, "The minimum interval for staleness calculations. "+
@@ -43,46 +42,49 @@ var rollupFuncs = map[string]newRollupFunc{
"stdvar_over_time": newRollupFuncOneArg(rollupStdvar),
"absent_over_time": newRollupFuncOneArg(rollupAbsent),
"present_over_time": newRollupFuncOneArg(rollupPresent),
"last_over_time": newRollupFuncOneArg(rollupLast),
// Additional rollup funcs.
"default_rollup": newRollupFuncOneArg(rollupDefault), // default rollup func
"range_over_time": newRollupFuncOneArg(rollupRange),
"sum2_over_time": newRollupFuncOneArg(rollupSum2),
"geomean_over_time": newRollupFuncOneArg(rollupGeomean),
"first_over_time": newRollupFuncOneArg(rollupFirst),
"last_over_time": newRollupFuncOneArg(rollupLast),
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
"increases_over_time": newRollupFuncOneArg(rollupIncreases),
"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
"increase_pure": newRollupFuncOneArg(rollupIncreasePure), // + rollupFuncsRemoveCounterResets
"integrate": newRollupFuncOneArg(rollupIntegrate),
"ideriv": newRollupFuncOneArg(rollupIderiv),
"lifetime": newRollupFuncOneArg(rollupLifetime),
"lag": newRollupFuncOneArg(rollupLag),
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
"tmin_over_time": newRollupFuncOneArg(rollupTmin),
"tmax_over_time": newRollupFuncOneArg(rollupTmax),
"tfirst_over_time": newRollupFuncOneArg(rollupTfirst),
"tlast_over_time": newRollupFuncOneArg(rollupTlast),
"share_le_over_time": newRollupShareLE,
"share_gt_over_time": newRollupShareGT,
"count_le_over_time": newRollupCountLE,
"count_gt_over_time": newRollupCountGT,
"count_eq_over_time": newRollupCountEQ,
"count_ne_over_time": newRollupCountNE,
"histogram_over_time": newRollupFuncOneArg(rollupHistogram),
"rollup": newRollupFuncOneArg(rollupFake),
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_deriv": newRollupFuncOneArg(rollupFake),
"rollup_delta": newRollupFuncOneArg(rollupFake),
"rollup_increase": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_candlestick": newRollupFuncOneArg(rollupFake),
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
"ascent_over_time": newRollupFuncOneArg(rollupAscentOverTime),
"descent_over_time": newRollupFuncOneArg(rollupDescentOverTime),
"zscore_over_time": newRollupFuncOneArg(rollupZScoreOverTime),
"default_rollup": newRollupFuncOneArg(rollupDefault), // default rollup func
"range_over_time": newRollupFuncOneArg(rollupRange),
"sum2_over_time": newRollupFuncOneArg(rollupSum2),
"geomean_over_time": newRollupFuncOneArg(rollupGeomean),
"first_over_time": newRollupFuncOneArg(rollupFirst),
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
"increases_over_time": newRollupFuncOneArg(rollupIncreases),
"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
"increase_pure": newRollupFuncOneArg(rollupIncreasePure), // + rollupFuncsRemoveCounterResets
"integrate": newRollupFuncOneArg(rollupIntegrate),
"ideriv": newRollupFuncOneArg(rollupIderiv),
"lifetime": newRollupFuncOneArg(rollupLifetime),
"lag": newRollupFuncOneArg(rollupLag),
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
"tmin_over_time": newRollupFuncOneArg(rollupTmin),
"tmax_over_time": newRollupFuncOneArg(rollupTmax),
"tfirst_over_time": newRollupFuncOneArg(rollupTfirst),
"tlast_over_time": newRollupFuncOneArg(rollupTlast),
"duration_over_time": newRollupDurationOverTime,
"share_le_over_time": newRollupShareLE,
"share_gt_over_time": newRollupShareGT,
"count_le_over_time": newRollupCountLE,
"count_gt_over_time": newRollupCountGT,
"count_eq_over_time": newRollupCountEQ,
"count_ne_over_time": newRollupCountNE,
"histogram_over_time": newRollupFuncOneArg(rollupHistogram),
"rollup": newRollupFuncOneArg(rollupFake),
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_deriv": newRollupFuncOneArg(rollupFake),
"rollup_delta": newRollupFuncOneArg(rollupFake),
"rollup_increase": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_candlestick": newRollupFuncOneArg(rollupFake),
"rollup_scrape_interval": newRollupFuncOneArg(rollupFake),
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
"ascent_over_time": newRollupFuncOneArg(rollupAscentOverTime),
"descent_over_time": newRollupFuncOneArg(rollupDescentOverTime),
"zscore_over_time": newRollupFuncOneArg(rollupZScoreOverTime),
"quantiles_over_time": newRollupQuantiles,
// `timestamp` function must return timestamp for the last datapoint on the current window
// in order to properly handle offset and timestamps unaligned to the current step.
@@ -144,32 +146,26 @@ var rollupAggrFuncs = map[string]rollupFunc{
"rate_over_sum": rollupRateOverSum,
}
var rollupFuncsCannotAdjustWindow = map[string]bool{
"changes": true,
"delta": true,
"holt_winters": true,
"idelta": true,
"increase": true,
"predict_linear": true,
"resets": true,
"avg_over_time": true,
"sum_over_time": true,
"count_over_time": true,
"quantile_over_time": true,
"stddev_over_time": true,
"stdvar_over_time": true,
"absent_over_time": true,
"present_over_time": true,
"sum2_over_time": true,
"geomean_over_time": true,
"distinct_over_time": true,
"increases_over_time": true,
"decreases_over_time": true,
"increase_pure": true,
"integrate": true,
"ascent_over_time": true,
"descent_over_time": true,
"zscore_over_time": true,
// VictoriaMetrics can increase lookbehind window in square brackets for these functions
// if the given window doesn't contain enough samples for calculations.
//
// This is needed in order to return the expected non-empty graphs when zooming in the graph in Grafana,
// which is built with `func_name(metric[$__interval])` query.
var rollupFuncsCanAdjustWindow = map[string]bool{
"default_rollup": true,
"deriv": true,
"deriv_fast": true,
"ideriv": true,
"irate": true,
"rate": true,
"rate_over_sum": true,
"rollup": true,
"rollup_candlestick": true,
"rollup_deriv": true,
"rollup_rate": true,
"rollup_scrape_interval": true,
"scrape_interval": true,
"timestamp": true,
}
var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -189,6 +185,7 @@ var rollupFuncsKeepMetricGroup = map[string]bool{
"min_over_time": true,
"max_over_time": true,
"quantile_over_time": true,
"quantiles_over_time": true,
"rollup": true,
"geomean_over_time": true,
"hoeffding_bound_lower": true,
@@ -196,6 +193,7 @@ var rollupFuncsKeepMetricGroup = map[string]bool{
"first_over_time": true,
"last_over_time": true,
"mode_over_time": true,
"rollup_candlestick": true,
}
func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) {
@@ -247,15 +245,17 @@ func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) {
return aggrFuncNames, nil
}
func getRollupArgIdx(funcName string) int {
funcName = strings.ToLower(funcName)
func getRollupArgIdx(fe *metricsql.FuncExpr) int {
funcName := strings.ToLower(fe.Name)
if rollupFuncs[funcName] == nil {
logger.Panicf("BUG: getRollupArgIdx is called for non-rollup func %q", funcName)
logger.Panicf("BUG: getRollupArgIdx is called for non-rollup func %q", fe.Name)
}
switch funcName {
case "quantile_over_time", "aggr_over_time",
"hoeffding_bound_lower", "hoeffding_bound_upper":
return 1
case "quantiles_over_time":
return len(fe.Args) - 1
default:
return 0
}
@@ -277,7 +277,7 @@ func getRollupConfigs(name string, rf rollupFunc, expr metricsql.Expr, start, en
End: end,
Step: step,
Window: window,
MayAdjustWindow: !rollupFuncsCannotAdjustWindow[name],
MayAdjustWindow: rollupFuncsCanAdjustWindow[name],
LookbackDelta: lookbackDelta,
Timestamps: sharedTimestamps,
isDefaultRollup: name == "default_rollup",
@@ -312,6 +312,24 @@ func getRollupConfigs(name string, rf rollupFunc, expr metricsql.Expr, start, en
rcs = append(rcs, newRollupConfig(rollupClose, "close"))
rcs = append(rcs, newRollupConfig(rollupLow, "low"))
rcs = append(rcs, newRollupConfig(rollupHigh, "high"))
case "rollup_scrape_interval":
preFuncPrev := preFunc
preFunc = func(values []float64, timestamps []int64) {
preFuncPrev(values, timestamps)
// Calculate intervals in seconds between samples.
tsSecsPrev := nan
for i, ts := range timestamps {
tsSecs := float64(ts) / 1000
values[i] = tsSecs - tsSecsPrev
tsSecsPrev = tsSecs
}
if len(values) > 1 {
// Overwrite the first NaN interval with the second interval,
// So min, max and avg rollups could be calculated properly, since they don't expect to receive NaNs.
values[0] = values[1]
}
}
rcs = appendRollupConfigs(rcs)
case "aggr_over_time":
aggrFuncNames, err := getRollupAggrFuncNames(expr)
if err != nil {
@@ -420,14 +438,16 @@ var (
const maxSilenceInterval = 5 * 60 * 1000
type timeseriesMap struct {
origin *timeseries
labelName string
h metrics.Histogram
m map[string]*timeseries
origin *timeseries
h metrics.Histogram
m map[string]*timeseries
}
func newTimeseriesMap(funcName string, sharedTimestamps []int64, mnSrc *storage.MetricName) *timeseriesMap {
if strings.ToLower(funcName) != "histogram_over_time" {
funcName = strings.ToLower(funcName)
switch funcName {
case "histogram_over_time", "quantiles_over_time":
default:
return nil
}
@@ -437,13 +457,14 @@ func newTimeseriesMap(funcName string, sharedTimestamps []int64, mnSrc *storage.
}
var origin timeseries
origin.MetricName.CopyFrom(mnSrc)
origin.MetricName.ResetMetricGroup()
if !rollupFuncsKeepMetricGroup[funcName] {
origin.MetricName.ResetMetricGroup()
}
origin.Timestamps = sharedTimestamps
origin.Values = values
return &timeseriesMap{
origin: &origin,
labelName: "vmrange",
m: make(map[string]*timeseries),
origin: &origin,
m: make(map[string]*timeseries),
}
}
@@ -454,15 +475,15 @@ func (tsm *timeseriesMap) AppendTimeseriesTo(dst []*timeseries) []*timeseries {
return dst
}
func (tsm *timeseriesMap) GetOrCreateTimeseries(labelValue string) *timeseries {
func (tsm *timeseriesMap) GetOrCreateTimeseries(labelName, labelValue string) *timeseries {
ts := tsm.m[labelValue]
if ts != nil {
return ts
}
ts = &timeseries{}
ts.CopyFromShallowTimestamps(tsm.origin)
ts.MetricName.RemoveTag(tsm.labelName)
ts.MetricName.AddTag(tsm.labelName, labelValue)
ts.MetricName.RemoveTag(labelName)
ts.MetricName.AddTag(labelName, labelValue)
tsm.m[labelValue] = ts
return ts
}
@@ -632,18 +653,20 @@ func getScrapeInterval(timestamps []int64) int64 {
}
// Estimate scrape interval as 0.6 quantile for the first 20 intervals.
h := histogram.GetFast()
tsPrev := timestamps[0]
timestamps = timestamps[1:]
if len(timestamps) > 20 {
timestamps = timestamps[:20]
}
a := getFloat64s()
intervals := a.A[:0]
for _, ts := range timestamps {
h.Update(float64(ts - tsPrev))
intervals = append(intervals, float64(ts-tsPrev))
tsPrev = ts
}
scrapeInterval := int64(h.Quantile(0.6))
histogram.PutFast(h)
scrapeInterval := int64(quantile(0.6, intervals))
a.A = intervals
putFloat64s(a)
if scrapeInterval <= 0 {
return int64(maxSilenceInterval)
}
@@ -831,40 +854,63 @@ func linearRegression(rfa *rollupFuncArg) (float64, float64) {
// before calling rollup funcs.
values := rfa.values
timestamps := rfa.timestamps
if len(values) == 0 {
return rfa.prevValue, 0
n := float64(len(values))
if n == 0 {
return nan, nan
}
if n == 1 {
return values[0], 0
}
// See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
tFirst := rfa.prevTimestamp
vSum := rfa.prevValue
interceptTime := rfa.currTimestamp
vSum := float64(0)
tSum := float64(0)
tvSum := float64(0)
ttSum := float64(0)
n := 1.0
if math.IsNaN(rfa.prevValue) {
tFirst = timestamps[0]
vSum = 0
n = 0
}
for i, v := range values {
dt := float64(timestamps[i]-tFirst) / 1e3
dt := float64(timestamps[i]-interceptTime) / 1e3
vSum += v
tSum += dt
tvSum += dt * v
ttSum += dt * dt
}
n += float64(len(values))
if n == 1 {
return vSum, 0
}
k := (n*tvSum - tSum*vSum) / (n*ttSum - tSum*tSum)
v := (vSum - k*tSum) / n
// Adjust v to the last timestamp on the given time range.
v += k * (float64(timestamps[len(timestamps)-1]-tFirst) / 1e3)
k := (tvSum - tSum*vSum/n) / (ttSum - tSum*tSum/n)
v := vSum/n - k*tSum/n
return v, k
}
func newRollupDurationOverTime(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
}
dMaxs, err := getScalar(args[1], 1)
if err != nil {
return nil, err
}
rf := func(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
return nan
}
timestamps := rfa.timestamps
tPrev := timestamps[0]
dSum := int64(0)
dMax := int64(dMaxs[rfa.idx] * 1000)
for _, t := range timestamps {
d := t - tPrev
if d <= dMax {
dSum += d
}
tPrev = t
}
return float64(dSum / 1000)
}
return rf, nil
}
func newRollupShareLE(args []interface{}) (rollupFunc, error) {
return newRollupShareFilter(args, countFilterLE)
}
@@ -1021,14 +1067,29 @@ func rollupHoeffdingBoundInternal(rfa *rollupFuncArg, phis []float64) (float64,
return bound, vAvg
}
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
func newRollupQuantiles(args []interface{}) (rollupFunc, error) {
if len(args) < 3 {
return nil, fmt.Errorf("unexpected number of args: %d; want at least 3 args", len(args))
}
phis, err := getScalar(args[0], 0)
tssPhi, ok := args[0].([]*timeseries)
if !ok {
return nil, fmt.Errorf("unexpected type for phi arg: %T; want string", args[0])
}
phiLabel, err := getString(tssPhi, 0)
if err != nil {
return nil, err
}
phiArgs := args[1 : len(args)-1]
phis := make([]float64, len(phiArgs))
phiStrs := make([]string, len(phiArgs))
for i, phiArg := range phiArgs {
phiValues, err := getScalar(phiArg, i+1)
if err != nil {
return nil, fmt.Errorf("cannot obtain phi from arg #%d: %w", i+1, err)
}
phis[i] = phiValues[0]
phiStrs[i] = fmt.Sprintf("%g", phiValues[0])
}
rf := func(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
@@ -1040,13 +1101,34 @@ func newRollupQuantile(args []interface{}) (rollupFunc, error) {
// Fast path - only a single value.
return values[0]
}
hf := histogram.GetFast()
for _, v := range values {
hf.Update(v)
qs := getFloat64s()
qs.A = quantiles(qs.A[:0], phis, values)
idx := rfa.idx
tsm := rfa.tsm
for i, phiStr := range phiStrs {
ts := tsm.GetOrCreateTimeseries(phiLabel, phiStr)
ts.Values[idx] = qs.A[i]
}
putFloat64s(qs)
return nan
}
return rf, nil
}
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
}
phis, err := getScalar(args[0], 0)
if err != nil {
return nil, err
}
rf := func(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
phi := phis[rfa.idx]
qv := hf.Quantile(phi)
histogram.PutFast(hf)
qv := quantile(phi, values)
return qv
}
return rf, nil
@@ -1061,7 +1143,7 @@ func rollupHistogram(rfa *rollupFuncArg) float64 {
}
idx := rfa.idx
tsm.h.VisitNonZeroBuckets(func(vmrange string, count uint64) {
ts := tsm.GetOrCreateTimeseries(vmrange)
ts := tsm.GetOrCreateTimeseries("vmrange", vmrange)
ts.Values[idx] = float64(count)
})
return nan
@@ -1285,10 +1367,7 @@ func rollupCount(rfa *rollupFuncArg) float64 {
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
if math.IsNaN(rfa.prevValue) {
return nan
}
return 0
return nan
}
return float64(len(values))
}
@@ -1305,14 +1384,11 @@ func rollupStdvar(rfa *rollupFuncArg) float64 {
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
if math.IsNaN(rfa.prevValue) {
return nan
}
return 0
return nan
}
if len(values) == 1 {
// Fast path.
return values[0]
return 0
}
var avg float64
var count float64
@@ -1340,7 +1416,7 @@ func rollupIncreasePure(rfa *rollupFuncArg) float64 {
prevValue = 0
}
if len(values) == 0 {
// Assume the counter didsn't change since prevValue.
// Assume the counter didn't change since prevValue.
return 0
}
return values[len(values)-1] - prevValue
@@ -1720,19 +1796,28 @@ func rollupModeOverTime(rfa *rollupFuncArg) float64 {
// before calling rollup funcs.
// Copy rfa.values to a.A, since modeNoNaNs modifies a.A contents.
a := float64sPool.Get().(*float64s)
a := getFloat64s()
a.A = append(a.A[:0], rfa.values...)
result := modeNoNaNs(rfa.prevValue, a.A)
float64sPool.Put(a)
putFloat64s(a)
return result
}
var float64sPool = &sync.Pool{
New: func() interface{} {
return &float64s{}
},
func getFloat64s() *float64s {
v := float64sPool.Get()
if v == nil {
v = &float64s{}
}
return v.(*float64s)
}
func putFloat64s(a *float64s) {
a.A = a.A[:0]
float64sPool.Put(a)
}
var float64sPool sync.Pool
type float64s struct {
A []float64
}
@@ -1897,6 +1982,18 @@ func getScalar(arg interface{}, argNum int) ([]float64, error) {
return ts[0].Values, nil
}
func getIntNumber(arg interface{}, argNum int) (int, error) {
v, err := getScalar(arg, argNum)
if err != nil {
return 0, err
}
n := 0
if len(v) > 0 {
n = int(v[0])
}
return n, nil
}
func getString(tss []*timeseries, argNum int) (string, error) {
if len(tss) != 1 {
return "", fmt.Errorf(`arg #%d must contain a single timeseries; got %d timeseries`, argNum+1, len(tss))

View File

@@ -23,11 +23,18 @@ import (
var (
cacheTimestampOffset = flag.Duration("search.cacheTimestampOffset", 5*time.Minute, "The maximum duration since the current time for response data, "+
"which is always queried from the original raw data, without using the response cache. Increase this value if you see gaps in responses "+
"due to time synchronization issues between VictoriaMetrics and data sources")
"due to time synchronization issues between VictoriaMetrics and data sources. See also -search.disableAutoCacheReset")
disableAutoCacheReset = flag.Bool("search.disableAutoCacheReset", false, "Whether to disable automatic response cache reset if a sample with timestamp "+
"outside -search.cacheTimestampOffset is inserted into VictoriaMetrics")
)
// ResetRollupResultCacheIfNeeded resets rollup result cache if mrs contains timestamps outside `now - search.cacheTimestampOffset`.
func ResetRollupResultCacheIfNeeded(mrs []storage.MetricRow) {
if *disableAutoCacheReset {
// Do not reset response cache if -search.disableAutoCacheReset is set.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1570 .
return
}
checkRollupResultCacheResetOnce.Do(func() {
rollupResultResetMetricRowSample.Store(&storage.MetricRow{})
go checkRollupResultCacheReset()

View File

@@ -335,14 +335,14 @@ func TestRollupQuantileOverTime(t *testing.T) {
testRollupFunc(t, "quantile_over_time", args, &me, vExpected)
}
f(-123, 12)
f(-0.5, 12)
f(-123, math.Inf(-1))
f(-0.5, math.Inf(-1))
f(0, 12)
f(0.1, 21)
f(0.1, 22.1)
f(0.5, 34)
f(0.9, 99)
f(0.9, 94.50000000000001)
f(1, 123)
f(234, 123)
f(234, math.Inf(+1))
}
func TestRollupPredictLinear(t *testing.T) {
@@ -357,10 +357,32 @@ func TestRollupPredictLinear(t *testing.T) {
testRollupFunc(t, "predict_linear", args, &me, vExpected)
}
f(0e-3, 30.382432471845043)
f(50e-3, 17.03950235614201)
f(100e-3, 3.696572240438975)
f(200e-3, -22.989287990967092)
f(0e-3, 65.07405077267295)
f(50e-3, 51.7311206569699)
f(100e-3, 38.38819054126685)
f(200e-3, 11.702330309860756)
}
func TestLinearRegression(t *testing.T) {
f := func(values []float64, timestamps []int64, expV, expK float64) {
t.Helper()
rfa := &rollupFuncArg{
values: values,
timestamps: timestamps,
currTimestamp: timestamps[0] + 100,
}
v, k := linearRegression(rfa)
if err := compareValues([]float64{v}, []float64{expV}); err != nil {
t.Fatalf("unexpected v err: %s", err)
}
if err := compareValues([]float64{k}, []float64{expK}); err != nil {
t.Fatalf("unexpected k err: %s", err)
}
}
f([]float64{1}, []int64{1}, math.NaN(), math.NaN())
f([]float64{1, 2}, []int64{100, 300}, 1.5, 5)
f([]float64{2, 4, 6, 8, 10}, []int64{100, 200, 300, 400, 500}, 4, 20)
}
func TestRollupHoltWinters(t *testing.T) {
@@ -448,7 +470,7 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
f("default_rollup", 34)
f("changes", 11)
f("delta", 34)
f("deriv", -266.85860231406065)
f("deriv", -266.85860231406093)
f("deriv_fast", -712)
f("idelta", 0)
f("increase", 398)
@@ -508,6 +530,7 @@ func TestRollupNewRollupFuncError(t *testing.T) {
f("holt_winters", nil)
f("predict_linear", nil)
f("quantile_over_time", nil)
f("quantiles_over_time", nil)
// Invalid arg type
scalarTs := []*timeseries{{
@@ -521,6 +544,7 @@ func TestRollupNewRollupFuncError(t *testing.T) {
f("predict_linear", []interface{}{123, 123})
f("predict_linear", []interface{}{me, 123})
f("quantile_over_time", []interface{}{123, 123})
f("quantiles_over_time", []interface{}{123, 123})
}
func TestRollupNoWindowNoPoints(t *testing.T) {
@@ -624,7 +648,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, nan, 123, 34, nan}
valuesExpected := []float64{nan, nan, 123, 34, 32}
timestampsExpected := []int64{-50, 0, 50, 100, 150}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -731,7 +755,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 123, 54, 44, nan}
valuesExpected := []float64{nan, 123, 54, 44, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -745,7 +769,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 4, 4, 3, nan}
valuesExpected := []float64{nan, 4, 4, 3, 1}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -759,7 +783,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 21, 12, 32, nan}
valuesExpected := []float64{nan, 21, 12, 32, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -773,7 +797,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 123, 99, 44, nan}
valuesExpected := []float64{nan, 123, 99, 44, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -787,7 +811,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 222, 199, 110, nan}
valuesExpected := []float64{nan, 222, 199, 110, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -801,7 +825,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, nan, -9, 22, nan}
valuesExpected := []float64{nan, 21, -9, 22, 0}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -829,7 +853,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.004, 0, 0, nan}
valuesExpected := []float64{nan, 0.004, 0, 0, 0.03}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -843,7 +867,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.031, 0.044, 0.04, nan}
valuesExpected := []float64{nan, 0.031, 0.044, 0.04, 0.01}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -857,7 +881,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.031, 0.075, 0.115, nan}
valuesExpected := []float64{nan, 0.031, 0.075, 0.115, 0.125}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -871,7 +895,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, nan}
valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, 0.01}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -885,7 +909,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, nan}
valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, 0.0125}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -899,7 +923,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 4, 4, 3, nan}
valuesExpected := []float64{nan, 4, 4, 3, 0}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -927,7 +951,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 2, 2, 1, nan}
valuesExpected := []float64{nan, 2, 2, 1, 0}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -941,7 +965,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 55.5, 49.75, 36.666666666666664, nan}
valuesExpected := []float64{nan, 55.5, 49.75, 36.666666666666664, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -955,7 +979,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{0, -2879.310344827587, 558.0608793686595, 422.84569138276544, nan}
valuesExpected := []float64{nan, -2879.310344827588, 127.87627310448904, -496.5831435079728, 0}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -983,7 +1007,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, -1916.6666666666665, -43500, 400, nan}
valuesExpected := []float64{nan, -1916.6666666666665, -43500, 400, 0}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -997,7 +1021,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 39.81519810323691, 32.080952292598795, 5.2493385826745405, nan}
valuesExpected := []float64{nan, 39.81519810323691, 32.080952292598795, 5.2493385826745405, 0}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -1011,7 +1035,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 2.148, 1.593, 1.156, nan}
valuesExpected := []float64{nan, 2.148, 1.593, 1.156, 1.36}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -1025,7 +1049,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 4, 4, 3, nan}
valuesExpected := []float64{nan, 4, 4, 3, 1}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -1039,7 +1063,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 4, 7, 6, nan}
valuesExpected := []float64{nan, 4, 7, 6, 3}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -1053,7 +1077,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 21, 34, 34, nan}
valuesExpected := []float64{nan, 21, 34, 34, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -1067,7 +1091,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 2775, 5262.5, 3678.5714285714284, nan}
valuesExpected := []float64{nan, 2775, 5262.5, 3678.5714285714284, 2880}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -1103,7 +1127,7 @@ func TestRollupBigNumberOfValues(t *testing.T) {
srcTimestamps[i] = int64(i / 2)
}
values := rc.Do(nil, srcValues, srcTimestamps)
valuesExpected := []float64{1, 4001, 8001, nan, nan, nan}
valuesExpected := []float64{1, 4001, 8001, 9999, nan, nan}
timestampsExpected := []int64{0, 2000, 4000, 6000, 8000, 10000}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
}
@@ -1136,8 +1160,18 @@ func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExp
}
continue
}
if math.Abs(v-vExpected) > 1e-15 {
t.Fatalf("unexpected value at values[%d]; got %f; want %f\nvalues=\n%v\nvaluesExpected=\n%v",
if math.IsNaN(vExpected) {
if !math.IsNaN(v) {
t.Fatalf("unexpected value at values[%d]; got %f; want nan\nvalues=\n%v\nvaluesExpected=\n%v",
i, v, values, valuesExpected)
}
continue
}
// Compare values with the reduced precision because of different precision errors
// on different OS/architectures. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1738
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1653
if math.Abs(v-vExpected)/math.Abs(vExpected) > 1e-13 {
t.Fatalf("unexpected value at values[%d]; got %v; want %v\nvalues=\n%v\nvaluesExpected=\n%v",
i, v, vExpected, values, valuesExpected)
}
}

View File

@@ -1,6 +1,7 @@
package promql
import (
"bytes"
"fmt"
"math"
"math/rand"
@@ -14,7 +15,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metricsql"
"github.com/valyala/histogram"
)
var transformFuncsKeepMetricGroup = map[string]bool{
@@ -44,13 +44,22 @@ var transformFuncs = map[string]transformFunc{
// See funcs accepting instant-vector on https://prometheus.io/docs/prometheus/latest/querying/functions/ .
"abs": newTransformFuncOneArg(transformAbs),
"absent": transformAbsent,
"acos": newTransformFuncOneArg(transformAcos),
"acosh": newTransformFuncOneArg(transformAcosh),
"asin": newTransformFuncOneArg(transformAsin),
"asinh": newTransformFuncOneArg(transformAsinh),
"atan": newTransformFuncOneArg(transformAtan),
"atanh": newTransformFuncOneArg(transformAtanh),
"ceil": newTransformFuncOneArg(transformCeil),
"clamp": transformClamp,
"clamp_max": transformClampMax,
"clamp_min": transformClampMin,
"cos": newTransformFuncOneArg(transformCos),
"cosh": newTransformFuncOneArg(transformCosh),
"day_of_month": newTransformFuncDateTime(transformDayOfMonth),
"day_of_week": newTransformFuncDateTime(transformDayOfWeek),
"days_in_month": newTransformFuncDateTime(transformDaysInMonth),
"deg": newTransformFuncOneArg(transformDeg),
"exp": newTransformFuncOneArg(transformExp),
"floor": newTransformFuncOneArg(transformFloor),
"histogram_quantile": transformHistogramQuantile,
@@ -62,68 +71,75 @@ var transformFuncs = map[string]transformFunc{
"log10": newTransformFuncOneArg(transformLog10),
"minute": newTransformFuncDateTime(transformMinute),
"month": newTransformFuncDateTime(transformMonth),
"pi": transformPi,
"rad": newTransformFuncOneArg(transformRad),
"round": transformRound,
"sign": transformSign,
"scalar": transformScalar,
"sgn": transformSgn,
"sin": newTransformFuncOneArg(transformSin),
"sinh": newTransformFuncOneArg(transformSinh),
"sort": newTransformFuncSort(false),
"sort_desc": newTransformFuncSort(true),
"sqrt": newTransformFuncOneArg(transformSqrt),
"tan": newTransformFuncOneArg(transformTan),
"tanh": newTransformFuncOneArg(transformTanh),
"time": transformTime,
// "timestamp" has been moved to rollup funcs. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415
"vector": transformVector,
"year": newTransformFuncDateTime(transformYear),
// New funcs
"label_set": transformLabelSet,
"label_map": transformLabelMap,
"label_uppercase": transformLabelUppercase,
"label_lowercase": transformLabelLowercase,
"label_del": transformLabelDel,
"label_keep": transformLabelKeep,
"label_copy": transformLabelCopy,
"label_move": transformLabelMove,
"label_transform": transformLabelTransform,
"label_value": transformLabelValue,
"label_match": transformLabelMatch,
"label_mismatch": transformLabelMismatch,
"union": transformUnion,
"": transformUnion, // empty func is a synonym to union
"keep_last_value": transformKeepLastValue,
"keep_next_value": transformKeepNextValue,
"interpolate": transformInterpolate,
"start": newTransformFuncZeroArgs(transformStart),
"end": newTransformFuncZeroArgs(transformEnd),
"step": newTransformFuncZeroArgs(transformStep),
"running_sum": newTransformFuncRunning(runningSum),
"running_max": newTransformFuncRunning(runningMax),
"running_min": newTransformFuncRunning(runningMin),
"running_avg": newTransformFuncRunning(runningAvg),
"range_sum": newTransformFuncRange(runningSum),
"range_max": newTransformFuncRange(runningMax),
"range_min": newTransformFuncRange(runningMin),
"range_avg": newTransformFuncRange(runningAvg),
"range_first": transformRangeFirst,
"range_last": transformRangeLast,
"range_quantile": transformRangeQuantile,
"smooth_exponential": transformSmoothExponential,
"remove_resets": transformRemoveResets,
"rand": newTransformRand(newRandFloat64),
"rand_normal": newTransformRand(newRandNormFloat64),
"rand_exponential": newTransformRand(newRandExpFloat64),
"pi": transformPi,
"sin": newTransformFuncOneArg(transformSin),
"cos": newTransformFuncOneArg(transformCos),
"asin": newTransformFuncOneArg(transformAsin),
"acos": newTransformFuncOneArg(transformAcos),
"prometheus_buckets": transformPrometheusBuckets,
"buckets_limit": transformBucketsLimit,
"histogram_share": transformHistogramShare,
"histogram_avg": transformHistogramAvg,
"histogram_stdvar": transformHistogramStdvar,
"histogram_stddev": transformHistogramStddev,
"sort_by_label": newTransformFuncSortByLabel(false),
"sort_by_label_desc": newTransformFuncSortByLabel(true),
"timezone_offset": transformTimezoneOffset,
"label_set": transformLabelSet,
"label_map": transformLabelMap,
"label_uppercase": transformLabelUppercase,
"label_lowercase": transformLabelLowercase,
"label_del": transformLabelDel,
"label_keep": transformLabelKeep,
"label_copy": transformLabelCopy,
"label_move": transformLabelMove,
"label_transform": transformLabelTransform,
"label_value": transformLabelValue,
"label_match": transformLabelMatch,
"label_mismatch": transformLabelMismatch,
"label_graphite_group": transformLabelGraphiteGroup,
"union": transformUnion,
"": transformUnion, // empty func is a synonym to union
"keep_last_value": transformKeepLastValue,
"keep_next_value": transformKeepNextValue,
"interpolate": transformInterpolate,
"start": newTransformFuncZeroArgs(transformStart),
"end": newTransformFuncZeroArgs(transformEnd),
"step": newTransformFuncZeroArgs(transformStep),
"running_sum": newTransformFuncRunning(runningSum),
"running_max": newTransformFuncRunning(runningMax),
"running_min": newTransformFuncRunning(runningMin),
"running_avg": newTransformFuncRunning(runningAvg),
"range_sum": newTransformFuncRange(runningSum),
"range_max": newTransformFuncRange(runningMax),
"range_min": newTransformFuncRange(runningMin),
"range_avg": newTransformFuncRange(runningAvg),
"range_first": transformRangeFirst,
"range_last": transformRangeLast,
"range_quantile": transformRangeQuantile,
"smooth_exponential": transformSmoothExponential,
"remove_resets": transformRemoveResets,
"rand": newTransformRand(newRandFloat64),
"rand_normal": newTransformRand(newRandNormFloat64),
"rand_exponential": newTransformRand(newRandExpFloat64),
"prometheus_buckets": transformPrometheusBuckets,
"buckets_limit": transformBucketsLimit,
"histogram_share": transformHistogramShare,
"histogram_avg": transformHistogramAvg,
"histogram_stdvar": transformHistogramStdvar,
"histogram_stddev": transformHistogramStddev,
"sort_by_label": newTransformFuncSortByLabel(false),
"sort_by_label_desc": newTransformFuncSortByLabel(true),
"timezone_offset": transformTimezoneOffset,
"bitmap_and": newTransformBitmap(bitmapAnd),
"bitmap_or": newTransformBitmap(bitmapOr),
"bitmap_xor": newTransformBitmap(bitmapXor),
"histogram_quantiles": transformHistogramQuantiles,
}
func getTransformFunc(s string) transformFunc {
@@ -343,7 +359,10 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
if err != nil {
return nil, err
}
limit := int(limits[0])
limit := 0
if len(limits) > 0 {
limit = int(limits[0])
}
if limit <= 0 {
return nil, nil
}
@@ -786,6 +805,43 @@ func stdvarForLeTimeseries(i int, xss []leTimeseries) float64 {
return stdvar
}
func transformHistogramQuantiles(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 3 {
return nil, fmt.Errorf("unexpected number of args: %d; expecting at least 3 args", len(args))
}
dstLabel, err := getString(args[0], 0)
if err != nil {
return nil, fmt.Errorf("cannot obtain dstLabel: %w", err)
}
phiArgs := args[1 : len(args)-1]
tssOrig := args[len(args)-1]
// Calculate quantile individually per each phi.
var rvs []*timeseries
for _, phiArg := range phiArgs {
phiStr := fmt.Sprintf("%g", phiArg[0].Values[0])
tss := copyTimeseries(tssOrig)
tfaTmp := &transformFuncArg{
ec: tfa.ec,
fe: tfa.fe,
args: [][]*timeseries{
phiArg,
tss,
},
}
tssTmp, err := transformHistogramQuantile(tfaTmp)
if err != nil {
return nil, fmt.Errorf("cannot calculate quantile %s: %w", phiStr, err)
}
for _, ts := range tssTmp {
ts.MetricName.RemoveTag(dstLabel)
ts.MetricName.AddTag(dstLabel, phiStr)
}
rvs = append(rvs, tssTmp...)
}
return rvs, nil
}
func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 2 || len(args) > 3 {
@@ -1132,28 +1188,31 @@ func transformRangeQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
if err != nil {
return nil, err
}
if len(phis) == 0 {
return nil, nil
phi := float64(0)
if len(phis) > 0 {
phi = phis[0]
}
phi := phis[0]
rvs := args[1]
hf := histogram.GetFast()
a := getFloat64s()
values := a.A[:0]
for _, ts := range rvs {
hf.Reset()
lastIdx := -1
values := ts.Values
for i, v := range values {
originValues := ts.Values
values = values[:0]
for i, v := range originValues {
if math.IsNaN(v) {
continue
}
hf.Update(v)
values = append(values, v)
lastIdx = i
}
if lastIdx >= 0 {
values[lastIdx] = hf.Quantile(phi)
sort.Float64s(values)
originValues[lastIdx] = quantileSorted(phi, values)
}
}
histogram.PutFast(hf)
a.A = values
putFloat64s(a)
setLastValues(rvs)
return rvs, nil
}
@@ -1680,6 +1739,39 @@ func transformLabelMismatch(tfa *transformFuncArg) ([]*timeseries, error) {
return rvs, nil
}
func transformLabelGraphiteGroup(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 2 {
return nil, fmt.Errorf("unexpected number of args: %d; want at least 2 args", len(args))
}
tss := args[0]
groupArgs := args[1:]
groupIDs := make([]int, len(groupArgs))
for i, arg := range groupArgs {
groupID, err := getIntNumber(arg, i+1)
if err != nil {
return nil, fmt.Errorf("cannot get group name from arg #%d: %w", i+1, err)
}
groupIDs[i] = groupID
}
for _, ts := range tss {
groups := bytes.Split(ts.MetricName.MetricGroup, dotSeparator)
groupName := ts.MetricName.MetricGroup[:0]
for j, groupID := range groupIDs {
if groupID >= 0 && groupID < len(groups) {
groupName = append(groupName, groups[groupID]...)
}
if j < len(groupIDs)-1 {
groupName = append(groupName, '.')
}
}
ts.MetricName.MetricGroup = groupName
}
return tss, nil
}
var dotSeparator = []byte(".")
func transformLn(v float64) float64 {
return math.Log(v)
}
@@ -1734,7 +1826,7 @@ func transformRound(tfa *transformFuncArg) ([]*timeseries, error) {
return doTransformValues(args[0], tf, tfa.fe)
}
func transformSign(tfa *transformFuncArg) ([]*timeseries, error) {
func transformSgn(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
@@ -1855,18 +1947,58 @@ func transformSin(v float64) float64 {
return math.Sin(v)
}
func transformSinh(v float64) float64 {
return math.Sinh(v)
}
func transformCos(v float64) float64 {
return math.Cos(v)
}
func transformCosh(v float64) float64 {
return math.Cosh(v)
}
func transformTan(v float64) float64 {
return math.Tan(v)
}
func transformTanh(v float64) float64 {
return math.Tanh(v)
}
func transformAsin(v float64) float64 {
return math.Asin(v)
}
func transformAsinh(v float64) float64 {
return math.Asinh(v)
}
func transformAtan(v float64) float64 {
return math.Atan(v)
}
func transformAtanh(v float64) float64 {
return math.Atanh(v)
}
func transformAcos(v float64) float64 {
return math.Acos(v)
}
func transformAcosh(v float64) float64 {
return math.Acosh(v)
}
func transformDeg(v float64) float64 {
return v * 180 / math.Pi
}
func transformRad(v float64) float64 {
return v * math.Pi / 180
}
func newTransformRand(newRandFunc func(r *rand.Rand) func() float64) transformFunc {
return func(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
@@ -1879,7 +2011,9 @@ func newTransformRand(newRandFunc func(r *rand.Rand) func() float64) transformFu
if err != nil {
return nil, err
}
seed = int64(tmp[0])
if len(tmp) > 0 {
seed = int64(tmp[0])
}
} else {
seed = time.Now().UnixNano()
}
@@ -1914,6 +2048,37 @@ func transformPi(tfa *transformFuncArg) ([]*timeseries, error) {
return evalNumber(tfa.ec, math.Pi), nil
}
func bitmapAnd(a, b uint64) uint64 {
return a & b
}
func bitmapOr(a, b uint64) uint64 {
return a | b
}
func bitmapXor(a, b uint64) uint64 {
return a ^ b
}
func newTransformBitmap(bitmapFunc func(a, b uint64) uint64) func(tfa *transformFuncArg) ([]*timeseries, error) {
return func(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 2); err != nil {
return nil, err
}
ns, err := getScalar(args[1], 1)
if err != nil {
return nil, err
}
tf := func(values []float64) {
for i, v := range values {
values[i] = float64(bitmapFunc(uint64(v), uint64(ns[i])))
}
}
return doTransformValues(args[0], tf, tfa.fe)
}
}
func transformTimezoneOffset(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {
@@ -1982,6 +2147,17 @@ func transformEnd(tfa *transformFuncArg) float64 {
return float64(tfa.ec.End) / 1e3
}
// copyTimeseries returns a copy of tss.
func copyTimeseries(tss []*timeseries) []*timeseries {
rvs := make([]*timeseries, len(tss))
for i, src := range tss {
var dst timeseries
dst.CopyFromShallowTimestamps(src)
rvs[i] = &dst
}
return rvs
}
// copyTimeseriesMetricNames returns a copy of tss with real copy of MetricNames,
// but with shallow copy of Timestamps and Values if makeCopy is set.
//
@@ -1999,7 +2175,7 @@ func copyTimeseriesMetricNames(tss []*timeseries, makeCopy bool) []*timeseries {
return rvs
}
// copyShallow returns a copy of arg with shallow copies of MetricNames,
// copyTimeseriesShallow returns a copy of arg with shallow copies of MetricNames,
// Timestamps and Values.
func copyTimeseriesShallow(arg []*timeseries) []*timeseries {
rvs := make([]*timeseries, len(arg))

View File

@@ -1,17 +1,19 @@
{
"files": {
"main.css": "./static/css/main.6452b577.chunk.css",
"main.js": "./static/js/main.e5416b79.chunk.js",
"runtime-main.js": "./static/js/runtime-main.0270250c.js",
"static/js/2.63374ed0.chunk.js": "./static/js/2.63374ed0.chunk.js",
"static/js/3.a5d02d16.chunk.js": "./static/js/3.a5d02d16.chunk.js",
"main.css": "./static/css/main.674f8c98.chunk.css",
"main.js": "./static/js/main.9d24c3b2.chunk.js",
"runtime-main.js": "./static/js/runtime-main.c0002ac8.js",
"static/css/2.81b2a0ac.chunk.css": "./static/css/2.81b2a0ac.chunk.css",
"static/js/2.8fa069e1.chunk.js": "./static/js/2.8fa069e1.chunk.js",
"static/js/3.0dc73915.chunk.js": "./static/js/3.0dc73915.chunk.js",
"index.html": "./index.html",
"static/js/2.63374ed0.chunk.js.LICENSE.txt": "./static/js/2.63374ed0.chunk.js.LICENSE.txt"
"static/js/2.8fa069e1.chunk.js.LICENSE.txt": "./static/js/2.8fa069e1.chunk.js.LICENSE.txt"
},
"entrypoints": [
"static/js/runtime-main.0270250c.js",
"static/js/2.63374ed0.chunk.js",
"static/css/main.6452b577.chunk.css",
"static/js/main.e5416b79.chunk.js"
"static/js/runtime-main.c0002ac8.js",
"static/css/2.81b2a0ac.chunk.css",
"static/js/2.8fa069e1.chunk.js",
"static/css/main.674f8c98.chunk.css",
"static/js/main.9d24c3b2.chunk.js"
]
}

View File

@@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="VM-UI is a metric explorer for Victoria Metrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&display=swap"/><link href="./static/css/main.6452b577.chunk.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div><script>!function(e){function r(r){for(var n,i,a=r[0],c=r[1],l=r[2],s=0,p=[];s<a.length;s++)i=a[s],Object.prototype.hasOwnProperty.call(o,i)&&o[i]&&p.push(o[i][0]),o[i]=0;for(n in c)Object.prototype.hasOwnProperty.call(c,n)&&(e[n]=c[n]);for(f&&f(r);p.length;)p.shift()();return u.push.apply(u,l||[]),t()}function t(){for(var e,r=0;r<u.length;r++){for(var t=u[r],n=!0,a=1;a<t.length;a++){var c=t[a];0!==o[c]&&(n=!1)}n&&(u.splice(r--,1),e=i(i.s=t[0]))}return e}var n={},o={1:0},u=[];function i(r){if(n[r])return n[r].exports;var t=n[r]={i:r,l:!1,exports:{}};return e[r].call(t.exports,t,t.exports,i),t.l=!0,t.exports}i.e=function(e){var r=[],t=o[e];if(0!==t)if(t)r.push(t[2]);else{var n=new Promise((function(r,n){t=o[e]=[r,n]}));r.push(t[2]=n);var u,a=document.createElement("script");a.charset="utf-8",a.timeout=120,i.nc&&a.setAttribute("nonce",i.nc),a.src=function(e){return i.p+"static/js/"+({}[e]||e)+"."+{3:"a5d02d16"}[e]+".chunk.js"}(e);var c=new Error;u=function(r){a.onerror=a.onload=null,clearTimeout(l);var t=o[e];if(0!==t){if(t){var n=r&&("load"===r.type?"missing":r.type),u=r&&r.target&&r.target.src;c.message="Loading chunk "+e+" failed.\n("+n+": "+u+")",c.name="ChunkLoadError",c.type=n,c.request=u,t[1](c)}o[e]=void 0}};var l=setTimeout((function(){u({type:"timeout",target:a})}),12e4);a.onerror=a.onload=u,document.head.appendChild(a)}return Promise.all(r)},i.m=e,i.c=n,i.d=function(e,r,t){i.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},i.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.t=function(e,r){if(1&r&&(e=i(e)),8&r)return e;if(4&r&&"object"==typeof e&&e&&e.__esModule)return e;var t=Object.create(null);if(i.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:e}),2&r&&"string"!=typeof e)for(var n in e)i.d(t,n,function(r){return e[r]}.bind(null,n));return t},i.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return i.d(r,"a",r),r},i.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},i.p="./",i.oe=function(e){throw console.error(e),e};var a=this.webpackJsonpvmui=this.webpackJsonpvmui||[],c=a.push.bind(a);a.push=r,a=a.slice();for(var l=0;l<a.length;l++)r(a[l]);var f=c;t()}([])</script><script src="./static/js/2.63374ed0.chunk.js"></script><script src="./static/js/main.e5416b79.chunk.js"></script></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="VM-UI is a metric explorer for Victoria Metrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&display=swap"/><link href="./static/css/2.81b2a0ac.chunk.css" rel="stylesheet"><link href="./static/css/main.674f8c98.chunk.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div><script>!function(e){function r(r){for(var n,i,a=r[0],c=r[1],l=r[2],s=0,p=[];s<a.length;s++)i=a[s],Object.prototype.hasOwnProperty.call(o,i)&&o[i]&&p.push(o[i][0]),o[i]=0;for(n in c)Object.prototype.hasOwnProperty.call(c,n)&&(e[n]=c[n]);for(f&&f(r);p.length;)p.shift()();return u.push.apply(u,l||[]),t()}function t(){for(var e,r=0;r<u.length;r++){for(var t=u[r],n=!0,a=1;a<t.length;a++){var c=t[a];0!==o[c]&&(n=!1)}n&&(u.splice(r--,1),e=i(i.s=t[0]))}return e}var n={},o={1:0},u=[];function i(r){if(n[r])return n[r].exports;var t=n[r]={i:r,l:!1,exports:{}};return e[r].call(t.exports,t,t.exports,i),t.l=!0,t.exports}i.e=function(e){var r=[],t=o[e];if(0!==t)if(t)r.push(t[2]);else{var n=new Promise((function(r,n){t=o[e]=[r,n]}));r.push(t[2]=n);var u,a=document.createElement("script");a.charset="utf-8",a.timeout=120,i.nc&&a.setAttribute("nonce",i.nc),a.src=function(e){return i.p+"static/js/"+({}[e]||e)+"."+{3:"0dc73915"}[e]+".chunk.js"}(e);var c=new Error;u=function(r){a.onerror=a.onload=null,clearTimeout(l);var t=o[e];if(0!==t){if(t){var n=r&&("load"===r.type?"missing":r.type),u=r&&r.target&&r.target.src;c.message="Loading chunk "+e+" failed.\n("+n+": "+u+")",c.name="ChunkLoadError",c.type=n,c.request=u,t[1](c)}o[e]=void 0}};var l=setTimeout((function(){u({type:"timeout",target:a})}),12e4);a.onerror=a.onload=u,document.head.appendChild(a)}return Promise.all(r)},i.m=e,i.c=n,i.d=function(e,r,t){i.o(e,r)||Object.defineProperty(e,r,{enumerable:!0,get:t})},i.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.t=function(e,r){if(1&r&&(e=i(e)),8&r)return e;if(4&r&&"object"==typeof e&&e&&e.__esModule)return e;var t=Object.create(null);if(i.r(t),Object.defineProperty(t,"default",{enumerable:!0,value:e}),2&r&&"string"!=typeof e)for(var n in e)i.d(t,n,function(r){return e[r]}.bind(null,n));return t},i.n=function(e){var r=e&&e.__esModule?function(){return e.default}:function(){return e};return i.d(r,"a",r),r},i.o=function(e,r){return Object.prototype.hasOwnProperty.call(e,r)},i.p="./",i.oe=function(e){throw console.error(e),e};var a=this.webpackJsonpvmui=this.webpackJsonpvmui||[],c=a.push.bind(a);a.push=r,a=a.slice();for(var l=0;l<a.length;l++)r(a[l]);var f=c;t()}([])</script><script src="./static/js/2.8fa069e1.chunk.js"></script><script src="./static/js/main.9d24c3b2.chunk.js"></script></body></html>

Some files were not shown because too many files have changed in this diff Show More