mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-08 03:14:09 +03:00
Compare commits
226 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed00eb3f33 | ||
|
|
7615a3ab8d | ||
|
|
7be9bedaf9 | ||
|
|
00b1659dde | ||
|
|
528e25bdde | ||
|
|
b3849a90fd | ||
|
|
7d89fafe1a | ||
|
|
cd96248480 | ||
|
|
7554be172d | ||
|
|
4beab7ad39 | ||
|
|
41d23f84ed | ||
|
|
184670fb9b | ||
|
|
52791fd1c0 | ||
|
|
576da0fe46 | ||
|
|
215967437d | ||
|
|
d1ad3adcbe | ||
|
|
42960feff4 | ||
|
|
07246bc31c | ||
|
|
e646674b23 | ||
|
|
4628deecd1 | ||
|
|
eead3ee8ec | ||
|
|
c402265e88 | ||
|
|
ff495a74f6 | ||
|
|
45962fb8c2 | ||
|
|
fd6c690276 | ||
|
|
e730788477 | ||
|
|
ef7e2af8f5 | ||
|
|
15aa6142ef | ||
|
|
5492edcc6c | ||
|
|
e969ef2639 | ||
|
|
c098988a18 | ||
|
|
1bdfa29ef7 | ||
|
|
8adba82c02 | ||
|
|
8d9eb5f808 | ||
|
|
582c74cd93 | ||
|
|
f3d33e23c9 | ||
|
|
455bf50a91 | ||
|
|
2791008e19 | ||
|
|
a499de45cc | ||
|
|
23c9e6b727 | ||
|
|
9d32fb1d9e | ||
|
|
d4b6d22987 | ||
|
|
0be5b09fb4 | ||
|
|
81746d14b9 | ||
|
|
807c2b076c | ||
|
|
84fd8af6d3 | ||
|
|
9043a509a3 | ||
|
|
1ad3de5c54 | ||
|
|
d60908bba4 | ||
|
|
716754fae6 | ||
|
|
bb61a4769b | ||
|
|
ac45082216 | ||
|
|
e5202a4eae | ||
|
|
68e4f40a72 | ||
|
|
ada2ae69ec | ||
|
|
bc8381613d | ||
|
|
8e44fba76d | ||
|
|
7dbe335426 | ||
|
|
3f85c06b65 | ||
|
|
d20c2156e4 | ||
|
|
ad730d8a17 | ||
|
|
dbbdfbe7ee | ||
|
|
639b26b40c | ||
|
|
8f16388428 | ||
|
|
aaa497ff0b | ||
|
|
ef94333808 | ||
|
|
c25b0c2cd5 | ||
|
|
5d0c37bec0 | ||
|
|
bba1442649 | ||
|
|
a9ffd233df | ||
|
|
a034f02fb2 | ||
|
|
e6eee2bebf | ||
|
|
509d12643b | ||
|
|
5e71fab8a6 | ||
|
|
d01f3c1943 | ||
|
|
3f498cf2dc | ||
|
|
8c8c14c127 | ||
|
|
44a86e1be3 | ||
|
|
f0c678c41b | ||
|
|
e255c066cc | ||
|
|
e7959094f6 | ||
|
|
922d9aadf2 | ||
|
|
68716488db | ||
|
|
67a64c142d | ||
|
|
328b52e5ff | ||
|
|
700737c181 | ||
|
|
2f735f112d | ||
|
|
1ca0c8a29b | ||
|
|
d81d586b86 | ||
|
|
0f63da3698 | ||
|
|
62ed38c6f0 | ||
|
|
79c30cf4cb | ||
|
|
2f1e7298ce | ||
|
|
0da202023b | ||
|
|
48d0ec1363 | ||
|
|
a1a065a47e | ||
|
|
0516e3f330 | ||
|
|
5b81bdde39 | ||
|
|
865610a7c8 | ||
|
|
cb8c6908dc | ||
|
|
894dcb7c1c | ||
|
|
215eba0b82 | ||
|
|
edb1eca6f1 | ||
|
|
97b6f5d223 | ||
|
|
a090627059 | ||
|
|
53c87ba341 | ||
|
|
bb161497cf | ||
|
|
994fa2f3bf | ||
|
|
e151c5c644 | ||
|
|
3107c633e3 | ||
|
|
3e557c9861 | ||
|
|
54ef2d8112 | ||
|
|
b1f6843bd0 | ||
|
|
039c9d2441 | ||
|
|
2a45871823 | ||
|
|
461481fbdf | ||
|
|
4c8b49b193 | ||
|
|
e79de9774b | ||
|
|
34563916f7 | ||
|
|
9257eee982 | ||
|
|
6f05c4d351 | ||
|
|
2f612e0c67 | ||
|
|
61c611f5ad | ||
|
|
9224ede54f | ||
|
|
228d137936 | ||
|
|
e4303d3d21 | ||
|
|
ad8d3b387d | ||
|
|
62e76ca805 | ||
|
|
4f526cc816 | ||
|
|
dfb113f175 | ||
|
|
31ae5911a8 | ||
|
|
d3442b40b2 | ||
|
|
caa2952aa6 | ||
|
|
e00cfc854d | ||
|
|
b9c8f6bf34 | ||
|
|
ad6290953c | ||
|
|
efcbb51968 | ||
|
|
ed0df37ee7 | ||
|
|
004d2924e2 | ||
|
|
11be704109 | ||
|
|
5a4675c528 | ||
|
|
ecb1b2564a | ||
|
|
b35cb293f5 | ||
|
|
1c641037e8 | ||
|
|
6b5ad535ae | ||
|
|
8949d65ad1 | ||
|
|
3198fd31fa | ||
|
|
aa5d88055d | ||
|
|
df01836818 | ||
|
|
dfa156e6aa | ||
|
|
8c14ca93fa | ||
|
|
e4e1cd1de2 | ||
|
|
ef6ee72108 | ||
|
|
ed7580ad22 | ||
|
|
9eb71dda3d | ||
|
|
328814ee60 | ||
|
|
7398e5701b | ||
|
|
4e770e9120 | ||
|
|
b442a42d8e | ||
|
|
6d77bfae4f | ||
|
|
4081e2295e | ||
|
|
e1107fec10 | ||
|
|
25f80d320b | ||
|
|
cde18d1f43 | ||
|
|
457e61900d | ||
|
|
7e347972c4 | ||
|
|
19dd121968 | ||
|
|
829ec4f9cf | ||
|
|
55d83e777d | ||
|
|
1033dc7e2a | ||
|
|
619b0a25c9 | ||
|
|
666c795b98 | ||
|
|
a730b3f6a1 | ||
|
|
508ad46e0e | ||
|
|
e5b9f47623 | ||
|
|
ca74b80f10 | ||
|
|
cba820e390 | ||
|
|
6fe3c48a6e | ||
|
|
9c350bc20d | ||
|
|
256fd9a87e | ||
|
|
2d9b3ad5b3 | ||
|
|
b66c7c13ac | ||
|
|
3e1d7d8489 | ||
|
|
47c7ea5c60 | ||
|
|
4f737d1cbd | ||
|
|
742da690f4 | ||
|
|
99f54e44ff | ||
|
|
cb92113632 | ||
|
|
e7557e0252 | ||
|
|
e59b9916aa | ||
|
|
d0b694c5c8 | ||
|
|
eb45185eef | ||
|
|
32b9fb58b8 | ||
|
|
12b16077c4 | ||
|
|
a23806f486 | ||
|
|
6daa5f7500 | ||
|
|
703def4b2e | ||
|
|
de137aef98 | ||
|
|
acf828a759 | ||
|
|
8bb762124a | ||
|
|
ff6a0955eb | ||
|
|
8b133e40d5 | ||
|
|
44a54b8b3d | ||
|
|
d59cdbe90c | ||
|
|
0b2086b7a5 | ||
|
|
8f628cd805 | ||
|
|
91b3482894 | ||
|
|
e5500bfcf2 | ||
|
|
5d3db3ff7c | ||
|
|
4dd3de9286 | ||
|
|
8da3f773ae | ||
|
|
9d5f5b6878 | ||
|
|
9a2ba5b6d1 | ||
|
|
b277ba8121 | ||
|
|
84a37098ed | ||
|
|
56ccfa5218 | ||
|
|
7c2c8b2981 | ||
|
|
d5dddb0953 | ||
|
|
586c5be404 | ||
|
|
1cd01b5359 | ||
|
|
88538df267 | ||
|
|
63e5ee0d29 | ||
|
|
eba4e92994 | ||
|
|
82ecfa3b32 | ||
|
|
dc4e3f0e0b | ||
|
|
8f2e88234f |
3
Makefile
3
Makefile
@@ -133,6 +133,9 @@ app-local:
|
||||
app-local-pure:
|
||||
CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
app-local-with-goarch:
|
||||
GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-$(GOARCH)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
quicktemplate-gen: install-qtc
|
||||
qtc
|
||||
|
||||
|
||||
109
README.md
109
README.md
@@ -79,6 +79,7 @@ See [features available for enterprise customers](https://github.com/VictoriaMet
|
||||
* [HTTP OpenTSDB /api/put requests](#sending-opentsdb-data-via-http-apiput-requests) if `-opentsdbHTTPListenAddr` is set.
|
||||
* [/api/v1/import](#how-to-import-time-series-data).
|
||||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* Supports metrics' relabeling. See [these docs](#relabeling) for details.
|
||||
* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various Enterprise workloads.
|
||||
* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
* See also technical [Articles about VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Articles).
|
||||
@@ -98,6 +99,8 @@ See [features available for enterprise customers](https://github.com/VictoriaMet
|
||||
* [How to send data from Graphite-compatible agents such as StatsD](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
|
||||
* [Querying Graphite data](#querying-graphite-data)
|
||||
* [How to send data from OpenTSDB-compatible agents](#how-to-send-data-from-opentsdb-compatible-agents)
|
||||
* [How to import data in Prometheus exposition format](#how-to-import-data-in-prometheus-exposition-format)
|
||||
* [How to import CSV data](#how-to-import-csv-data)
|
||||
* [Prometheus querying API usage](#prometheus-querying-api-usage)
|
||||
* [How to build from sources](#how-to-build-from-sources)
|
||||
* [Development build](#development-build)
|
||||
@@ -111,6 +114,7 @@ See [features available for enterprise customers](https://github.com/VictoriaMet
|
||||
* [How to delete time series](#how-to-delete-time-series)
|
||||
* [How to export time series](#how-to-export-time-series)
|
||||
* [How to import time series data](#how-to-import-time-series-data)
|
||||
* [Relabeling](#relabeling)
|
||||
* [Federation](#federation)
|
||||
* [Capacity planning](#capacity-planning)
|
||||
* [High availability](#high-availability)
|
||||
@@ -126,6 +130,7 @@ See [features available for enterprise customers](https://github.com/VictoriaMet
|
||||
* [Monitoring](#monitoring)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
* [Backfilling](#backfilling)
|
||||
* [Data updates](#data-updates)
|
||||
* [Replication](#replication)
|
||||
* [Backups](#backups)
|
||||
* [Profiling](#profiling)
|
||||
@@ -134,7 +139,6 @@ See [features available for enterprise customers](https://github.com/VictoriaMet
|
||||
* [Contacts](#contacts)
|
||||
* [Community and contributions](#community-and-contributions)
|
||||
* [Reporting bugs](#reporting-bugs)
|
||||
* [Roadmap](#roadmap)
|
||||
* [Victoria Metrics Logo](#victoria-metrics-logo)
|
||||
* [Logo Usage Guidelines](#logo-usage-guidelines)
|
||||
* [Font used](#font-used)
|
||||
@@ -243,10 +247,12 @@ VictoriaMetrics supports native PromQL and [extends it with useful features](htt
|
||||
### How to upgrade VictoriaMetrics
|
||||
|
||||
It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
say otherwise. It is recommended performing regular upgrades to the latest version,
|
||||
since it may contain important bug fixes, performance optimizations or new features.
|
||||
say otherwise. It is safe skipping multiple versions during the upgrade unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise.
|
||||
It is recommended performing regular upgrades to the latest version, since it may contain important bug fixes, performance optimizations or new features.
|
||||
|
||||
Follow the following steps during the upgrade:
|
||||
It is also safe downgrading to the previous version unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise.
|
||||
|
||||
The following steps must be performed during the upgrade / downgrade:
|
||||
|
||||
1) Send `SIGINT` signal to VictoriaMetrics process in order to gracefully stop it.
|
||||
2) Wait until the process stops. This can take a few seconds.
|
||||
@@ -282,6 +288,10 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
|
||||
|
||||
In the future other `*_sd_config` types will be supported.
|
||||
|
||||
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
|
||||
VictoriaMetrics also supports [importing data in Prometheus exposition format](#how-to-import-data-in-prometheus-exposition-format).
|
||||
|
||||
See also [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md), which can be used as drop-in replacement for Prometheus.
|
||||
|
||||
### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)
|
||||
@@ -305,7 +315,8 @@ VictoriaMetrics maps Influx data using the following rules:
|
||||
unless `db` tag exists in the Influx line.
|
||||
* Field names are mapped to time series names prefixed with `{measurement}{separator}` value,
|
||||
where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag.
|
||||
See also `-influxSkipSingleField` command-line flag. If `{measurement}` is empty, then time series names correspond to field names.
|
||||
See also `-influxSkipSingleField` command-line flag.
|
||||
If `{measurement}` is empty or `-influxSkipMeasurement` command-line flag is set, then time series names correspond to field names.
|
||||
* Field values are mapped to time series values.
|
||||
* Tags are mapped to Prometheus labels as-is.
|
||||
|
||||
@@ -388,6 +399,7 @@ or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/mas
|
||||
|
||||
VictoriaMetrics supports [telnet put protocol](http://opentsdb.net/docs/build/html/api_telnet/put.html)
|
||||
and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) for ingesting OpenTSDB data.
|
||||
The same protocol is used for [ingesting data in KairosDB](https://kairosdb.github.io/docs/build/html/PushingData.html).
|
||||
|
||||
#### Sending data via `telnet put` protocol
|
||||
|
||||
@@ -507,6 +519,32 @@ The following response should be returned:
|
||||
Note that it could be required to flush response cache after importing historical data. See [these docs](#backfilling) for detail.
|
||||
|
||||
|
||||
### How to import data in Prometheus exposition format
|
||||
|
||||
VictoriaMetrics accepts data in [Prometheus exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format)
|
||||
via `/api/v1/import/prometheus` path. For example, the following line imports a single line in Prometheus exposition format into VictoriaMetrics:
|
||||
|
||||
```bash
|
||||
curl -d 'foo{bar="baz"} 123' -X POST 'http://localhost:8428/api/v1/import/prometheus'
|
||||
```
|
||||
|
||||
The following command may be used for verifying the imported data:
|
||||
|
||||
```bash
|
||||
curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__=~"foo"}'
|
||||
```
|
||||
|
||||
It should return somethins like the following:
|
||||
|
||||
```
|
||||
{"metric":{"__name__":"foo","bar":"baz"},"values":[123],"timestamps":[1594370496905]}
|
||||
```
|
||||
|
||||
VictoriaMetrics accepts arbitrary number of lines in a single request to `/api/v1/import/prometheus`, i.e. it supports data streaming.
|
||||
|
||||
VictoriaMetrics also may scrape Prometheus targets - see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
|
||||
|
||||
### Prometheus querying API usage
|
||||
|
||||
VictoriaMetrics supports the following handlers from [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/):
|
||||
@@ -531,6 +569,7 @@ Additionally VictoriaMetrics provides the following handlers:
|
||||
* `/api/v1/series/count` - it returns the total number of time series in the database. Note that this handler scans all the inverted index,
|
||||
so it can be slow if the database contains tens of millions of time series.
|
||||
* `/api/v1/labels/count` - it returns a list of `label: values_count` entries. It can be used for determining labels with the maximum number of values.
|
||||
* `/api/v1/status/active_queries` - it returns a list of currently running queries.
|
||||
|
||||
### How to build from sources
|
||||
|
||||
@@ -583,8 +622,9 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.
|
||||
|
||||
By default the image is built on top of `alpine` image for improved debuggability. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `scratch` image:
|
||||
By default the image is built on top of [alpine](https://hub.docker.com/_/alpine) image for improved debuggability.
|
||||
It is possible to build the package on top of any other base image by setting it via `<ROOT_IMAGE>` environment variable.
|
||||
For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=scratch make package-victoria-metrics
|
||||
@@ -593,7 +633,7 @@ ROOT_IMAGE=scratch make package-victoria-metrics
|
||||
### Start with docker-compose
|
||||
|
||||
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
|
||||
helps to spin up VictoriaMetrics, Prometheus and Grafana with one command.
|
||||
helps to spin up VictoriaMetrics, [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) and Grafana with one command.
|
||||
More details may be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#folder-contains-basic-images-and-tools-for-building-and-running-victoria-metrics-in-docker).
|
||||
|
||||
### Setting up service
|
||||
@@ -649,7 +689,7 @@ The delete API is intended mainly for the following cases:
|
||||
It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
|
||||
|
||||
* Regular cleanups for unneeded data. Just prevent writing unneeded data into VictoriaMetrics.
|
||||
This can be done with relabeling in [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
|
||||
This can be done with [relabeling](#relabeling).
|
||||
See [this article](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts) for details.
|
||||
* Reducing disk space usage by deleting unneeded time series. This doesn't work as expected, since the deleted
|
||||
time series occupy disk space until the next merge operation, which can never occur when deleting too old data.
|
||||
@@ -697,6 +737,7 @@ Time series data can be imported via any supported ingestion protocol:
|
||||
* [OpenTSDB http /api/put](#sending-opentsdb-data-via-http-apiput-requests)
|
||||
* `/api/v1/import` http POST handler, which accepts data from [/api/v1/export](#how-to-export-time-series).
|
||||
* `/api/v1/import/csv` http POST handler, which accepts CSV data. See [these docs](#how-to-import-csv-data) for details.
|
||||
* `/api/v1/import/prometheus` http POST handler, which accepts data in Prometheus exposition format. See [these docs](#how-to-import-data-in-prometheus-exposition-format) for details.
|
||||
|
||||
The most efficient protocol for importing data into VictoriaMetrics is `/api/v1/import`. Example for importing data obtained via `/api/v1/export`:
|
||||
|
||||
@@ -723,6 +764,22 @@ Note that it could be required to flush response cache after importing historica
|
||||
Each request to `/api/v1/import` can load up to a single vCPU core on VictoriaMetrics. Import speed can be improved by splitting the original file into smaller parts
|
||||
and importing them concurrently. Note that the original file must be split on newlines.
|
||||
|
||||
|
||||
### Relabeling
|
||||
|
||||
VictoriaMetrics supports Prometheus-compatible relabeling for all the ingested metrics if `-relabelConfig` command-line flag points
|
||||
to a file containing a list of [relabel_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) entries.
|
||||
|
||||
VictoriaMetrics provides the following extra actions for relabeling rules:
|
||||
|
||||
* `replace_all`: replaces all the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the result in the `target_label`.
|
||||
* `labelmap_all`: replaces all the occurences of `regex` in all the label names with the `replacement`.
|
||||
* `keep_if_equal`: keeps the entry if all label values from `source_labels` are equal.
|
||||
* `drop_if_equal`: drops the entry if all the label values from `source_labels` are equal.
|
||||
|
||||
See also [relabeling in vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#relabeling).
|
||||
|
||||
|
||||
### Federation
|
||||
|
||||
VictoriaMetrics exports [Prometheus-compatible federation data](https://prometheus.io/docs/prometheus/latest/federation/)
|
||||
@@ -742,7 +799,7 @@ A rough estimation of the required resources for ingestion path:
|
||||
Time series is considered active if new data points have been added to it recently or if it has been recently queried.
|
||||
The number of active time series may be obtained from `vm_cache_entries{type="storage/hour_metric_ids"}` metric
|
||||
exported on the `/metrics` page.
|
||||
VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited by `-memory.allowedPercent` flag.
|
||||
VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited with `-memory.allowedPercent` or `-memory.allowedBytes` flags.
|
||||
|
||||
* CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
|
||||
the insert stream of 1M data points per second. The ingestion rate may be lower for high cardinality data or for time series with high number of labels.
|
||||
@@ -768,6 +825,8 @@ The required resources for query path:
|
||||
The higher number of scanned time series and lower `step` argument results in the higher RAM usage.
|
||||
|
||||
* CPU cores: a CPU core per 30 millions of scanned data points per second.
|
||||
This means that heavy queries that touch big number of time series (over 10K) and/or big number data points (over 100M)
|
||||
usually require more CPU resources than tiny queries that touch a few time series with small number of data points.
|
||||
|
||||
* Network usage: depends on the frequency and the type of incoming requests. Typical Grafana dashboards usually
|
||||
require negligible network bandwidth.
|
||||
@@ -949,6 +1008,8 @@ The most interesting metrics are:
|
||||
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||
of the current number of active time series.
|
||||
|
||||
VictoriaMetrics also exposes currently running queries with their execution times at `/api/v1/status/active_queries` page.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
@@ -956,7 +1017,9 @@ The most interesting metrics are:
|
||||
of tweaking these flag values arises.
|
||||
|
||||
* It is recommended upgrading to the latest available release from [this page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
|
||||
since the issue could be already fixed there.
|
||||
since the encountered issue could be already fixed there.
|
||||
|
||||
* It is recommended inspecting logs during troubleshooting, since they may contain useful information.
|
||||
|
||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||
then it is likely you have too many active time series for the current amount of RAM.
|
||||
@@ -966,6 +1029,9 @@ The most interesting metrics are:
|
||||
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
||||
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
||||
|
||||
* VictoriaMetrics prioritizes data ingestion over data querying. So if it has no enough resources for data ingestion,
|
||||
then data querying may slow down significantly.
|
||||
|
||||
* VictoriaMetrics requires free disk space for [merging data files to bigger ones](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
It may slow down when there is no enough free space left. So make sure `-storageDataPath` directory
|
||||
has at least 20% of free space comparing to disk size. The remaining amount of free space
|
||||
@@ -1014,6 +1080,14 @@ Yet another solution is to increase `-search.cacheTimestampOffset` flag value in
|
||||
for data with timestamps close to the current time.
|
||||
|
||||
|
||||
### Data updates
|
||||
|
||||
VictoriaMetrics doesn't support updating already exiting sample values to new ones. It stores all the ingested data points
|
||||
for the same time series with identical timestamps. While is possible substituting old time series with new time series via
|
||||
[removal of old time series](#how-to-delete-timeseries) and then [writing new time series](#backfilling), this approach
|
||||
should be used only for one-off updates. It shouldn't be used for frequent updates because of non-zero overhead related to data removal.
|
||||
|
||||
|
||||
### Replication
|
||||
|
||||
Single-node VictoriaMetrics doesn't support application-level replication. Use cluster version instead.
|
||||
@@ -1053,6 +1127,9 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
|
||||
|
||||
## Integrations
|
||||
|
||||
* [Helm charts for single-node and cluster versions of VictoriaMetrics](https://github.com/VictoriaMetrics/helm-charts).
|
||||
* [Kubernetes operator for VictoriaMetrics](https://github.com/VictoriaMetrics/operator).
|
||||
* [vmctl tool for data migration to VictoriaMetrics](https://github.com/VictoriaMetrics/vmctl).
|
||||
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
|
||||
See [these docs](https://github.com/netdata/netdata#integrations).
|
||||
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend.
|
||||
@@ -1102,16 +1179,6 @@ Adhering `KISS` principle simplifies the resulting code and architecture, so it
|
||||
|
||||
Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
|
||||
|
||||
## Roadmap
|
||||
|
||||
* [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
|
||||
* [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
|
||||
* [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
|
||||
* [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
|
||||
* [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
|
||||
|
||||
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.
|
||||
|
||||
|
||||
## Victoria Metrics Logo
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
@@ -31,6 +32,7 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
cgroup.UpdateGOMAXPROCSToCPUQuota()
|
||||
logger.Infof("starting VictoriaMetrics at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
storage.SetMinScrapeIntervalForDeduplication(*minScrapeInterval)
|
||||
|
||||
@@ -373,7 +373,7 @@ func checkMetricsResult(got, want []Metric) error {
|
||||
want = removeIfFoundMetrics(r, want)
|
||||
}
|
||||
if len(want) > 0 {
|
||||
return fmt.Errorf("exptected metrics %+v not found in %+v", want, got)
|
||||
return fmt.Errorf("expected metrics %+v not found in %+v", want, got)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -85,7 +85,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
mr.Timestamp = currentTimestamp
|
||||
mr.Value = r.Value
|
||||
}
|
||||
logger.Infof("writing %d rows at timestamp %d", len(mrs), currentTimestamp)
|
||||
vmstorage.AddRows(mrs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
"empty_label_match 1 {TIME_S-1m}",
|
||||
"empty_label_match;foo=bar 2 {TIME_S-1m}",
|
||||
"empty_label_match;foo=baz 3 {TIME_S-1m}"],
|
||||
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S}&end={TIME_S}&step=60"],
|
||||
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S-1m}&end={TIME_S}&step=60"],
|
||||
"result_query_range": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"matrix",
|
||||
"result":[
|
||||
{"metric":{"__name__":"empty_label_match"},"values":[["{TIME_S}","1"]]},
|
||||
{"metric":{"__name__":"empty_label_match","foo":"bar"},"values":[["{TIME_S}","2"]]}
|
||||
{"metric":{"__name__":"empty_label_match"},"values":[["{TIME_S-1m}","1"],["{TIME_S}","1"]]},
|
||||
{"metric":{"__name__":"empty_label_match","foo":"bar"},"values":[["{TIME_S-1m}","2"],["{TIME_S}","2"]]}
|
||||
]}}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
["{TIME_S-120s}","3"],
|
||||
["{TIME_S-60s}","2"],
|
||||
["{TIME_S-30s}","1"],
|
||||
["{TIME_S-20s}","1"]
|
||||
["{TIME_S-20s}","1"],
|
||||
["{TIME_S-10s}","1"],
|
||||
["{TIME_S-0s}","1"]
|
||||
]}]}}
|
||||
}
|
||||
|
||||
@@ -59,19 +59,22 @@ run-vmagent:
|
||||
$(MAKE) run-via-docker
|
||||
|
||||
vmagent-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-amd64 ./app/vmagent
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-arm ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-arm64 ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-ppc64le ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-386 ./app/vmagent
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmagent-local-with-goarch
|
||||
|
||||
vmagent-local-with-goarch:
|
||||
APP_NAME=vmagent $(MAKE) app-local-with-goarch
|
||||
|
||||
vmagent-pure:
|
||||
APP_NAME=vmagent $(MAKE) app-local-pure
|
||||
|
||||
@@ -26,6 +26,7 @@ to `vmagent` (like the ability to push metrics instead of pulling them). We did
|
||||
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-opentsdb-compatible-agents).
|
||||
* Prometheus remote write protocol via `http://<vmagent>:8429/api/v1/write`.
|
||||
* JSON lines import protocol via `http://<vmagent>:8429/api/v1/import`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-time-series-data).
|
||||
* Data in Prometheus exposition format. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-data-in-prometheus-exposition-format) for details.
|
||||
* Arbitrary CSV data via `http://<vmagent>:8429/api/v1/import/csv`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-csv-data).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems.
|
||||
* Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
@@ -147,12 +148,20 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
|
||||
* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
|
||||
See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.
|
||||
|
||||
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
|
||||
`vmagent` also support the following additional options in `scrape_config` section:
|
||||
|
||||
* `disable_compression: true` - for disabling response compression on a per-job basis. By default `vmagent` requests compressed responses from scrape targets
|
||||
in order to save network bandwidth.
|
||||
* `disable_keepalive: true` - for disabling [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis.
|
||||
By default `vmagent` uses keep-alive connections to scrape targets in order to reduce overhead on connection re-establishing.
|
||||
|
||||
Note that `vmagent` doesn't support `refresh_interval` option these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
|
||||
command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs`
|
||||
entries to 60s. Run `vmagent -help` in order to see default values for `-promscrape.*CheckInterval` flags.
|
||||
|
||||
|
||||
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
|
||||
|
||||
### Adding labels to metrics
|
||||
@@ -193,12 +202,16 @@ Read more about relabeling in the following articles:
|
||||
|
||||
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
|
||||
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview.
|
||||
If you have suggestions, improvements or found a bug - feel free to open an issue on github or add review to the dashboard.
|
||||
|
||||
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
* It is recommended [setting up the official Grafana dashboard](#monitoring) in order to monitor `vmagent` state.
|
||||
|
||||
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
|
||||
since `vmagent` establishes at least a single TCP connection per each target.
|
||||
|
||||
@@ -208,17 +221,35 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
|
||||
* It is recommended to increase `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
and `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows.
|
||||
|
||||
* If you see gaps on the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set, then try increasing `-remoteWrite.queues`.
|
||||
Such gaps may appear because `vmagent` cannot keep up with sending the collected data to remote storage, so it starts dropping the buffered data
|
||||
if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow large when remote storage is unavailable for extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want to send all the data from the directory to remote storage, simply stop `vmagent` and delete the directory.
|
||||
|
||||
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports.
|
||||
Just add the following relabeling rule to `relabel_configs` section in order to filter out targets with unneeded ports:
|
||||
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
|
||||
or they use init container.
|
||||
|
||||
```yml
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
```
|
||||
The following `relabel_configs` section may help determining `__meta_*` labels resulting in duplicate targets:
|
||||
```yml
|
||||
- action: labelmap
|
||||
regex: __meta_(.*)
|
||||
```
|
||||
|
||||
The following relabeling rule may be added to `relabel_configs` section in order to filter out pods with unneeded ports:
|
||||
```yml
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
```
|
||||
|
||||
The following relabeling rule may be added to `relabel_configs` section in order to filter out init container pods:
|
||||
```yml
|
||||
- action: drop
|
||||
source_labels: [__meta_kubernetes_pod_container_init]
|
||||
regex: true
|
||||
```
|
||||
|
||||
|
||||
### How to build from sources
|
||||
@@ -244,13 +275,29 @@ Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
By default the image is built on top of [alpine](https://hub.docker.com/_/alpine) image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmagent
|
||||
ROOT_IMAGE=scratch make package-vmagent
|
||||
```
|
||||
|
||||
#### ARM build
|
||||
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
#### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmagent-arm` or `make vmagent-arm64` from the root folder of the repository.
|
||||
It builds `vmagent-arm` or `vmagent-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
#### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmagent-arm-prod` or `make vmagent-arm64-prod` from the root folder of the repository.
|
||||
It builds `vmagent-arm-prod` or `vmagent-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
|
||||
### Profiling
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
var (
|
||||
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol")
|
||||
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field")
|
||||
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -62,24 +63,27 @@ func insertRows(db string, rows []parser.Row) error {
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
commonLabels = commonLabels[:0]
|
||||
hasDBLabel := false
|
||||
hasDBKey := false
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
if tag.Key == "db" {
|
||||
hasDBLabel = true
|
||||
hasDBKey = true
|
||||
}
|
||||
commonLabels = append(commonLabels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
if len(db) > 0 && !hasDBLabel {
|
||||
if len(db) > 0 && !hasDBKey {
|
||||
commonLabels = append(commonLabels, prompbmarshal.Label{
|
||||
Name: "db",
|
||||
Value: db,
|
||||
})
|
||||
}
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
|
||||
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
|
||||
if !*skipMeasurement {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, r.Measurement...)
|
||||
}
|
||||
skipFieldKey := len(r.Fields) == 1 && *skipSingleField
|
||||
if len(ctx.metricGroupBuf) > 0 && !skipFieldKey {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, *measurementFieldSeparator...)
|
||||
|
||||
@@ -13,10 +13,12 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/prometheusimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
|
||||
@@ -34,7 +36,8 @@ var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to `http://<vmagent>:8429/write`")
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
@@ -58,6 +61,7 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
cgroup.UpdateGOMAXPROCSToCPUQuota()
|
||||
|
||||
if *dryRun {
|
||||
if err := flag.Set("promscrape.config.strictParse", "true"); err != nil {
|
||||
@@ -135,7 +139,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -144,7 +148,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
vmimportRequests.Inc()
|
||||
if err := vmimport.InsertHandler(r); err != nil {
|
||||
vmimportErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -153,7 +157,16 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
csvimportRequests.Inc()
|
||||
if err := csvimport.InsertHandler(r); err != nil {
|
||||
csvimportErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/api/v1/import/prometheus":
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(r); err != nil {
|
||||
prometheusimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -162,7 +175,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
influxWriteRequests.Inc()
|
||||
if err := influx.InsertHandlerForHTTP(r); err != nil {
|
||||
influxWriteErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -197,6 +210,9 @@ var (
|
||||
csvimportRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/import/csv", protocol="csvimport"}`)
|
||||
csvimportErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/import/csv", protocol="csvimport"}`)
|
||||
|
||||
prometheusimportRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/import/prometheus", protocol="prometheusimport"}`)
|
||||
prometheusimportErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/import/prometheus", protocol="prometheusimport"}`)
|
||||
|
||||
influxWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/write", protocol="influx"}`)
|
||||
influxWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/write", protocol="influx"}`)
|
||||
|
||||
|
||||
64
app/vmagent/prometheusimport/request_handler.go
Normal file
64
app/vmagent/prometheusimport/request_handler.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package prometheusimport
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="prometheus"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="prometheus"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes `/api/v1/import/prometheus` request.
|
||||
func InsertHandler(req *http.Request) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzipped, insertRows)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
labelsLen := len(labels)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: r.Metric,
|
||||
})
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(&ctx.WriteRequest)
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
}
|
||||
@@ -1,10 +1,14 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -13,12 +17,13 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
sendTimeout = flag.Duration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to -remoteWrite.url")
|
||||
proxyURL = flagutil.NewArray("remoteWrite.proxyURL", "Optional proxy URL for writing data to -remoteWrite.url. Supported proxies: http, https, socks5. "+
|
||||
"Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
tlsCertFile = flagutil.NewArray("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url. "+
|
||||
@@ -41,11 +46,9 @@ var (
|
||||
type client struct {
|
||||
urlLabelValue string
|
||||
remoteWriteURL string
|
||||
host string
|
||||
requestURI string
|
||||
authHeader string
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *fasthttp.HostClient
|
||||
hc *http.Client
|
||||
|
||||
requestDuration *metrics.Histogram
|
||||
requestsOKCount *metrics.Counter
|
||||
@@ -57,6 +60,30 @@ type client struct {
|
||||
}
|
||||
|
||||
func newClient(argIdx int, remoteWriteURL, urlLabelValue string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
tlsCfg, err := getTLSConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot initialize TLS config: %s", err)
|
||||
}
|
||||
tr := &http.Transport{
|
||||
Dial: statDial,
|
||||
TLSClientConfig: tlsCfg,
|
||||
TLSHandshakeTimeout: 5 * time.Second,
|
||||
MaxConnsPerHost: 2 * concurrency,
|
||||
MaxIdleConnsPerHost: 2 * concurrency,
|
||||
IdleConnTimeout: time.Minute,
|
||||
WriteBufferSize: 64 * 1024,
|
||||
}
|
||||
pURL := proxyURL.GetOptionalArg(argIdx)
|
||||
if len(pURL) > 0 {
|
||||
if !strings.Contains(pURL, "://") {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: it must start with `http://`, `https://` or `socks5://`", pURL)
|
||||
}
|
||||
urlProxy, err := url.Parse(pURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: %s", pURL, err)
|
||||
}
|
||||
tr.Proxy = http.ProxyURL(urlProxy)
|
||||
}
|
||||
authHeader := ""
|
||||
username := basicAuthUsername.GetOptionalArg(argIdx)
|
||||
password := basicAuthPassword.GetOptionalArg(argIdx)
|
||||
@@ -73,63 +100,16 @@ func newClient(argIdx int, remoteWriteURL, urlLabelValue string, fq *persistentq
|
||||
}
|
||||
authHeader = "Bearer " + token
|
||||
}
|
||||
|
||||
readTimeout := *sendTimeout
|
||||
if readTimeout <= 0 {
|
||||
readTimeout = time.Minute
|
||||
}
|
||||
writeTimeout := readTimeout
|
||||
var u fasthttp.URI
|
||||
u.Update(remoteWriteURL)
|
||||
scheme := string(u.Scheme())
|
||||
switch scheme {
|
||||
case "http", "https":
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme in -remoteWrite.url=%q: %q. It must be http or https", remoteWriteURL, scheme)
|
||||
}
|
||||
host := string(u.Host())
|
||||
if len(host) == 0 {
|
||||
logger.Fatalf("invalid -remoteWrite.url=%q: host cannot be empty. Make sure the url looks like `http://host:port/path`", remoteWriteURL)
|
||||
}
|
||||
requestURI := string(u.RequestURI())
|
||||
isTLS := scheme == "https"
|
||||
var tlsCfg *tls.Config
|
||||
if isTLS {
|
||||
var err error
|
||||
tlsCfg, err = getTLSConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot initialize TLS config: %s", err)
|
||||
}
|
||||
}
|
||||
if !strings.Contains(host, ":") {
|
||||
if isTLS {
|
||||
host += ":443"
|
||||
} else {
|
||||
host += ":80"
|
||||
}
|
||||
}
|
||||
maxConns := 2 * concurrency
|
||||
hc := &fasthttp.HostClient{
|
||||
Addr: host,
|
||||
Name: "vmagent",
|
||||
Dial: statDial,
|
||||
IsTLS: isTLS,
|
||||
TLSConfig: tlsCfg,
|
||||
MaxConns: maxConns,
|
||||
MaxIdleConnDuration: 10 * readTimeout,
|
||||
ReadTimeout: readTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
MaxResponseBodySize: 1024 * 1024,
|
||||
}
|
||||
c := &client{
|
||||
urlLabelValue: urlLabelValue,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
host: host,
|
||||
requestURI: requestURI,
|
||||
authHeader: authHeader,
|
||||
fq: fq,
|
||||
hc: hc,
|
||||
stopCh: make(chan struct{}),
|
||||
hc: &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: *sendTimeout,
|
||||
},
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.urlLabelValue))
|
||||
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.urlLabelValue))
|
||||
@@ -153,16 +133,19 @@ func (c *client) MustStop() {
|
||||
}
|
||||
|
||||
func getTLSConfig(argIdx int) (*tls.Config, error) {
|
||||
tlsConfig := &promauth.TLSConfig{
|
||||
c := &promauth.TLSConfig{
|
||||
CAFile: tlsCAFile.GetOptionalArg(argIdx),
|
||||
CertFile: tlsCertFile.GetOptionalArg(argIdx),
|
||||
KeyFile: tlsKeyFile.GetOptionalArg(argIdx),
|
||||
ServerName: tlsServerName.GetOptionalArg(argIdx),
|
||||
InsecureSkipVerify: *tlsInsecureSkipVerify,
|
||||
}
|
||||
cfg, err := promauth.NewConfig(".", nil, "", "", tlsConfig)
|
||||
if c.CAFile == "" && c.CertFile == "" && c.KeyFile == "" && c.ServerName == "" && !c.InsecureSkipVerify {
|
||||
return nil, nil
|
||||
}
|
||||
cfg, err := promauth.NewConfig(".", nil, "", "", c)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot populate TLS config: %s", err)
|
||||
return nil, fmt.Errorf("cannot populate TLS config: %w", err)
|
||||
}
|
||||
tlsCfg := cfg.NewTLSConfig()
|
||||
return tlsCfg, nil
|
||||
@@ -201,32 +184,24 @@ func (c *client) runWorker() {
|
||||
}
|
||||
|
||||
func (c *client) sendBlock(block []byte) {
|
||||
req := fasthttp.AcquireRequest()
|
||||
req.SetRequestURI(c.requestURI)
|
||||
req.SetHost(c.host)
|
||||
req.Header.SetMethod("POST")
|
||||
req.Header.Add("Content-Type", "application/x-protobuf")
|
||||
req.Header.Add("Content-Encoding", "snappy")
|
||||
req.Header.Add("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
retryDuration := time.Second
|
||||
|
||||
again:
|
||||
req, err := http.NewRequest("POST", c.remoteWriteURL, bytes.NewBuffer(block))
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexected error from http.NewRequest(%q): %s", c.remoteWriteURL, err)
|
||||
}
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vmagent")
|
||||
h.Set("Content-Type", "application/x-protobuf")
|
||||
h.Set("Content-Encoding", "snappy")
|
||||
h.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
if c.authHeader != "" {
|
||||
req.Header.Set("Authorization", c.authHeader)
|
||||
}
|
||||
req.SetBody(block)
|
||||
|
||||
retryDuration := time.Second
|
||||
resp := fasthttp.AcquireResponse()
|
||||
|
||||
again:
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
fasthttp.ReleaseRequest(req)
|
||||
fasthttp.ReleaseResponse(resp)
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
err := doRequestWithPossibleRetry(c.hc, req, resp)
|
||||
resp, err := c.hc.Do(req)
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
@@ -236,39 +211,44 @@ again:
|
||||
}
|
||||
logger.Errorf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.remoteWriteURL, err, retryDuration.Seconds())
|
||||
time.Sleep(retryDuration)
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
statusCode := resp.StatusCode()
|
||||
if statusCode/100 != 2 {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.urlLabelValue, statusCode)).Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
t := time.NewTimer(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
t.Stop()
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q: %d; response body=%q; re-sending the block in %.3f seconds",
|
||||
len(block), c.remoteWriteURL, statusCode, resp.Body(), retryDuration.Seconds())
|
||||
time.Sleep(retryDuration)
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
c.requestsOKCount.Inc()
|
||||
|
||||
// The block has been successfully sent to the remote storage.
|
||||
fasthttp.ReleaseResponse(resp)
|
||||
fasthttp.ReleaseRequest(req)
|
||||
}
|
||||
|
||||
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response) error {
|
||||
// There is no need in calling DoTimeout, since the timeout must be already set in hc.ReadTimeout.
|
||||
err := hc.Do(req, resp)
|
||||
if err == nil {
|
||||
return nil
|
||||
statusCode := resp.StatusCode
|
||||
if statusCode/100 == 2 {
|
||||
_ = resp.Body.Close()
|
||||
c.requestsOKCount.Inc()
|
||||
return
|
||||
}
|
||||
if err != fasthttp.ErrConnectionClosed {
|
||||
return err
|
||||
|
||||
// Unexpected status code returned
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.urlLabelValue, statusCode)).Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > time.Minute {
|
||||
retryDuration = time.Minute
|
||||
}
|
||||
// Retry request if the server closed the keep-alive connection during the first attempt.
|
||||
return hc.Do(req, resp)
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
logger.Errorf("cannot read response body from %q: %s", c.remoteWriteURL, err)
|
||||
} else {
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q: %d; response body=%q; re-sending the block in %.3f seconds",
|
||||
len(block), c.remoteWriteURL, statusCode, body, retryDuration.Seconds())
|
||||
}
|
||||
t := time.NewTimer(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
t.Stop()
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ var (
|
||||
unparsedLabelsGlobal = flagutil.NewArray("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple flags to metrics before sending them to remote storage")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. These entries are applied to all the metrics "+
|
||||
"before sending them to -remoteWrite.url. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config for details")
|
||||
"before sending them to -remoteWrite.url. See https://victoriametrics.github.io/vmagent.html#relabeling for details")
|
||||
relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url")
|
||||
)
|
||||
|
||||
@@ -33,7 +33,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
if *relabelConfigPathGlobal != "" {
|
||||
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %s", *relabelConfigPathGlobal, err)
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
||||
}
|
||||
rcs.global = global
|
||||
}
|
||||
@@ -43,9 +43,13 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
}
|
||||
rcs.perURL = make([][]promrelabel.ParsedRelabelConfig, len(*remoteWriteURLs))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
// Skip empty relabel config.
|
||||
continue
|
||||
}
|
||||
prc, err := promrelabel.LoadRelabelConfigs(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %s", path, err)
|
||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
||||
}
|
||||
rcs.perURL[i] = prc
|
||||
}
|
||||
@@ -59,7 +63,6 @@ type relabelConfigs struct {
|
||||
|
||||
// initLabelsGlobal must be called after parsing command-line flags.
|
||||
func initLabelsGlobal() {
|
||||
// Init labelsGlobal
|
||||
labelsGlobal = nil
|
||||
for _, s := range *unparsedLabelsGlobal {
|
||||
n := strings.IndexByte(s, '=')
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -25,11 +26,14 @@ var (
|
||||
queues = flag.Int("remoteWrite.queues", 1, "The number of concurrent queues to each -remoteWrite.url. Set more queues if a single queue "+
|
||||
"isn't enough for sending high volume of collected data to remote storage")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensistive auth info")
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
maxPendingBytesPerURL = flag.Int("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
|
||||
"Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. "+
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
decimalPlaces = flag.Int("remoteWrite.decimalPlaces", 0, "The number of significant decimal places to leave in metric values before writing them to remote storage. "+
|
||||
"See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant decimal places. "+
|
||||
"This option may be used for increasing on-disk compression level for the stored metrics")
|
||||
)
|
||||
|
||||
var rwctxs []*remoteWriteCtx
|
||||
@@ -118,8 +122,19 @@ func Stop() {
|
||||
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
//
|
||||
// Note that wr may be modified by Push due to relabeling.
|
||||
// Note that wr may be modified by Push due to relabeling and rounding.
|
||||
func Push(wr *prompbmarshal.WriteRequest) {
|
||||
if *decimalPlaces > 0 {
|
||||
// Round values according to decimalPlaces
|
||||
for i := range wr.Timeseries {
|
||||
samples := wr.Timeseries[i].Samples
|
||||
for j := range samples {
|
||||
s := &samples[j]
|
||||
s.Value = decimal.Round(s.Value, *decimalPlaces)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var rctx *relabelCtx
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
prcsGlobal := rcs.global
|
||||
@@ -128,7 +143,7 @@ func Push(wr *prompbmarshal.WriteRequest) {
|
||||
}
|
||||
tss := wr.Timeseries
|
||||
for len(tss) > 0 {
|
||||
// Process big tss in smaller blocks in order to reduce maxmimum memory usage
|
||||
// Process big tss in smaller blocks in order to reduce the maximum memory usage
|
||||
tssBlock := tss
|
||||
if len(tssBlock) > maxRowsPerBlock {
|
||||
tssBlock = tss[:maxRowsPerBlock]
|
||||
@@ -162,8 +177,6 @@ type remoteWriteCtx struct {
|
||||
pss []*pendingSeries
|
||||
pssNextIdx uint64
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
relabelMetricsDropped *metrics.Counter
|
||||
}
|
||||
|
||||
@@ -208,15 +221,17 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
prcs := rcs.perURL[rwctx.idx]
|
||||
if len(prcs) > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467 for details.
|
||||
rwctx.tss = append(rwctx.tss[:0], tss...)
|
||||
tss = rwctx.tss
|
||||
rctx = getRelabelCtx()
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/599
|
||||
v = tssRelabelPool.Get().(*[]prompbmarshal.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
tssLen := len(tss)
|
||||
tss = rctx.applyRelabeling(tss, nil, prcs)
|
||||
rwctx.relabelMetricsDropped.Add(tssLen - len(tss))
|
||||
@@ -225,8 +240,15 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssRelabelPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
// Zero rwctx.tss in order to free up GC references.
|
||||
rwctx.tss = prompbmarshal.ResetTimeSeries(rwctx.tss)
|
||||
}
|
||||
}
|
||||
|
||||
var tssRelabelPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
a := []prompbmarshal.TimeSeries{}
|
||||
return &a
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
@@ -9,7 +11,10 @@ import (
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func statDial(addr string) (conn net.Conn, err error) {
|
||||
func statDial(network, addr string) (conn net.Conn, err error) {
|
||||
if !strings.HasPrefix(network, "tcp") {
|
||||
return nil, fmt.Errorf("unexpected network passed to statDial: %q; it must start from `tcp`", network)
|
||||
}
|
||||
if netutil.TCP6Enabled() {
|
||||
conn, err = fasthttp.DialDualStack(addr)
|
||||
} else {
|
||||
|
||||
@@ -61,24 +61,30 @@ run-vmalert: vmalert
|
||||
./bin/vmalert -rule=app/vmalert/config/testdata/rules2-good.rules \
|
||||
-datasource.url=http://localhost:8428 \
|
||||
-notifier.url=http://localhost:9093 \
|
||||
-notifier.url=http://127.0.0.1:9093 \
|
||||
-remoteWrite.url=http://localhost:8428 \
|
||||
-remoteRead.url=http://localhost:8428 \
|
||||
-external.label=cluster=east-1 \
|
||||
-external.label=replica=a \
|
||||
-evaluationInterval=3s
|
||||
|
||||
vmalert-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-amd64 ./app/vmalert
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm64 ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-ppc64le ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-386 ./app/vmalert
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmalert-local-with-goarch
|
||||
|
||||
vmalert-local-with-goarch:
|
||||
APP_NAME=vmalert $(MAKE) app-local-with-goarch
|
||||
|
||||
vmalert-pure:
|
||||
APP_NAME=vmalert $(MAKE) app-local-pure
|
||||
|
||||
@@ -44,10 +44,19 @@ compatible storage address for storing recording rules results and alerts state
|
||||
Then configure `vmalert` accordingly:
|
||||
```
|
||||
./bin/vmalert -rule=alert.rules \
|
||||
-datasource.url=http://localhost:8428 \
|
||||
-notifier.url=http://localhost:9093
|
||||
-datasource.url=http://localhost:8428 \ # PromQL compatible datasource
|
||||
-notifier.url=http://localhost:9093 \ # AlertManager URL
|
||||
-notifier.url=http://127.0.0.1:9093 \ # AlertManager replica URL
|
||||
-remoteWrite.url=http://localhost:8428 \ # remote write compatible storage to persist rules
|
||||
-remoteRead.url=http://localhost:8428 \ # PromQL compatible datasource to restore alerts state from
|
||||
-external.label=cluster=east-1 \ # External label to be applied for each rule
|
||||
-external.label=replica=a \ # Multiple external labels may be set
|
||||
-evaluationInterval=3s # Default evaluation interval if not specified in rules group
|
||||
```
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
similar to Prometheus rules and configured using YAML. Configuration examples may be found
|
||||
@@ -153,94 +162,154 @@ Used as alert source in AlertManager.
|
||||
|
||||
The shortlist of configuration flags is the following:
|
||||
```
|
||||
Usage of vmalert:
|
||||
-datasource.basicAuth.password string
|
||||
Optional basic auth password for -datasource.url
|
||||
Optional basic auth password for -datasource.url
|
||||
-datasource.basicAuth.username string
|
||||
Optional basic auth username for -datasource.url
|
||||
-datasource.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default system CA is used.
|
||||
-datasource.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -datasource.url.
|
||||
Optional basic auth username for -datasource.url
|
||||
-datasource.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default system CA is used
|
||||
-datasource.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -datasource.url
|
||||
-datasource.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -datasource.url
|
||||
-datasource.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -datasource.url.
|
||||
-datasource.tlsServerName value
|
||||
Optional TLS server name to use for connections to -datasource.url. By default the server name from -datasource.url is used.
|
||||
Whether to skip tls verification when connecting to -datasource.url
|
||||
-datasource.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -datasource.url
|
||||
-datasource.tlsServerName string
|
||||
Optional TLS server name to use for connections to -datasource.url. By default the server name from -datasource.url is used
|
||||
-datasource.url string
|
||||
Victoria Metrics or VMSelect url. Required parameter. E.g. http://127.0.0.1:8428
|
||||
Victoria Metrics or VMSelect url. Required parameter. E.g. http://127.0.0.1:8428
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP is used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-evaluationInterval duration
|
||||
How often to evaluate the rules (default 1m0s)
|
||||
How often to evaluate the rules (default 1m0s)
|
||||
-external.alert.source string
|
||||
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used
|
||||
-external.label array
|
||||
Optional label in the form 'name=value' to add to all generated recording rules and alerts. Pass multiple -label flags in order to add multiple label sets.
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-external.url string
|
||||
External URL is used as alert's source for sent alerts to the notifier
|
||||
External URL is used as alert's source for sent alerts to the notifier
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for graceful shutdown of HTTP server. Highly loaded server may require increased value for graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this dealy the servier returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8880")
|
||||
Address to listen for http connections (default ":8880")
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-memory.allowedBytes int
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-notifier.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -notifier.url. By default system CA is used.
|
||||
-notifier.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -notifier.url.
|
||||
Auth key for /metrics. It overrides httpAuth settings
|
||||
-notifier.basicAuth.password array
|
||||
Optional basic auth password for -datasource.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.basicAuth.username array
|
||||
Optional basic auth username for -datasource.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to -notifier.url. By default system CA is used
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -notifier.url
|
||||
-notifier.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -notifier.url.
|
||||
-notifier.tlsServerName value
|
||||
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used.
|
||||
-notifier.url string
|
||||
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
|
||||
Whether to skip tls verification when connecting to -notifier.url
|
||||
-notifier.tlsKeyFile array
|
||||
Optional path to client-side TLS certificate key to use when connecting to -notifier.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.tlsServerName array
|
||||
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-notifier.url array
|
||||
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof. It overrides httpAuth settings
|
||||
-remoteRead.basicAuth.password string
|
||||
Optional basic auth password for -remoteRead.url
|
||||
Optional basic auth password for -remoteRead.url
|
||||
-remoteRead.basicAuth.username string
|
||||
Optional basic auth username for -remoteRead.url
|
||||
Optional basic auth username for -remoteRead.url
|
||||
-remoteRead.lookback duration
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteRead.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteRead.url. By default system CA is used.
|
||||
-remoteRead.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url.
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteRead.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteRead.url. By default system CA is used
|
||||
-remoteRead.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url
|
||||
-remoteRead.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -remoteRead.url
|
||||
-remoteRead.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url.
|
||||
-remoteRead.tlsServerName value
|
||||
Optional TLS server name to use for connections to -remoteRead.url. By default the server name from -remoteRead.url is used.
|
||||
Whether to skip tls verification when connecting to -remoteRead.url
|
||||
-remoteRead.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url
|
||||
-remoteRead.tlsServerName string
|
||||
Optional TLS server name to use for connections to -remoteRead.url. By default the server name from -remoteRead.url is used
|
||||
-remoteRead.url vmalert
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
-remoteWrite.basicAuth.password string
|
||||
Optional basic auth password for -remoteWrite.url
|
||||
Optional basic auth password for -remoteWrite.url
|
||||
-remoteWrite.basicAuth.username string
|
||||
Optional basic auth username for -remoteWrite.url
|
||||
Optional basic auth username for -remoteWrite.url
|
||||
-remoteWrite.concurrency int
|
||||
Defines number of readers that concurrently write into remote storage (default 1)
|
||||
Defines number of writers for concurrent writing into remote querier (default 1)
|
||||
-remoteWrite.flushInterval duration
|
||||
Defines interval of flushes to remote write endpoint (default 5s)
|
||||
-remoteWrite.maxBatchSize int
|
||||
Defines defines max number of timeseries to be flushed at once (default 1000)
|
||||
Defines defines max number of timeseries to be flushed at once (default 1000)
|
||||
-remoteWrite.maxQueueSize int
|
||||
Defines the max number of pending datapoints to remote write endpoint (default 100000)
|
||||
-remoteWrite.tlsCAFile value
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default system CA is used.
|
||||
-remoteWrite.tlsCertFile value
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url.
|
||||
Defines the max number of pending datapoints to remote write endpoint (default 100000)
|
||||
-remoteWrite.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default system CA is used
|
||||
-remoteWrite.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsKeyFile value
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url.
|
||||
-remoteWrite.tlsServerName value
|
||||
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used.
|
||||
Whether to skip tls verification when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url
|
||||
-remoteWrite.tlsServerName string
|
||||
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used
|
||||
-remoteWrite.url string
|
||||
Optional URL to Victoria Metrics or VMInsert where to persist alerts state in form of timeseries. E.g. http://127.0.0.1:8428
|
||||
-rule value
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule /path/to/file. Path to a single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Optional URL to Victoria Metrics or VMInsert where to persist alerts state and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428
|
||||
-rule array
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule /path/to/file. Path to a single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-rule.validateExpressions
|
||||
Whether to validate rules expressions via MetricsQL engine (default true)
|
||||
Whether to validate rules expressions via MetricsQL engine (default true)
|
||||
-rule.validateTemplates
|
||||
Whether to validate annotation and label templates (default true)
|
||||
Whether to validate annotation and label templates (default true)
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
Pass `-help` to `vmalert` in order to see the full list of supported
|
||||
@@ -273,3 +342,20 @@ It is recommended using
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-prod` from the root folder of the repository.
|
||||
It builds `vmalert-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
|
||||
#### ARM build
|
||||
|
||||
ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/).
|
||||
|
||||
#### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmalert-arm` or `make vmalert-arm64` from the root folder of the repository.
|
||||
It builds `vmalert-arm` or `vmalert-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
#### Production ARM build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-arm-prod` or `make vmalert-arm64-prod` from the root folder of the repository.
|
||||
It builds `vmalert-arm-prod` or `vmalert-arm64-prod` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// AlertingRule is basic alert entity
|
||||
@@ -36,19 +37,71 @@ type AlertingRule struct {
|
||||
// resets on every successful Exec
|
||||
// may be used as Health state
|
||||
lastExecError error
|
||||
|
||||
metrics *alertingRuleMetrics
|
||||
}
|
||||
|
||||
func newAlertingRule(gID uint64, cfg config.Rule) *AlertingRule {
|
||||
return &AlertingRule{
|
||||
type alertingRuleMetrics struct {
|
||||
errors *gauge
|
||||
pending *gauge
|
||||
active *gauge
|
||||
}
|
||||
|
||||
func newAlertingRule(group *Group, cfg config.Rule) *AlertingRule {
|
||||
ar := &AlertingRule{
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Alert,
|
||||
Expr: cfg.Expr,
|
||||
For: cfg.For,
|
||||
Labels: cfg.Labels,
|
||||
Annotations: cfg.Annotations,
|
||||
GroupID: gID,
|
||||
GroupID: group.ID(),
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
metrics: &alertingRuleMetrics{},
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = getOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StatePending {
|
||||
num++
|
||||
}
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.active = getOrCreateGauge(fmt.Sprintf(`vmalert_alerts_firing{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateFiring {
|
||||
num++
|
||||
}
|
||||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.errors = getOrCreateGauge(fmt.Sprintf(`vmalert_alerts_error{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
if ar.lastExecError == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
return ar
|
||||
}
|
||||
|
||||
// Close unregisters rule metrics
|
||||
func (ar *AlertingRule) Close() {
|
||||
metrics.UnregisterMetric(ar.metrics.active.name)
|
||||
metrics.UnregisterMetric(ar.metrics.pending.name)
|
||||
metrics.UnregisterMetric(ar.metrics.errors.name)
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
@@ -72,7 +125,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
||||
ar.lastExecError = err
|
||||
ar.lastExecTime = time.Now()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute query %q: %s", ar.Expr, err)
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||
}
|
||||
|
||||
for h, a := range ar.alerts {
|
||||
@@ -103,7 +156,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
||||
a, err := ar.newAlert(m, ar.lastExecTime)
|
||||
if err != nil {
|
||||
ar.lastExecError = err
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
a.ID = h
|
||||
a.State = notifier.StatePending
|
||||
@@ -331,15 +384,22 @@ func alertForToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) p
|
||||
// Restore restores only Start field. Field State will be always Pending and supposed
|
||||
// to be updated on next Exec, as well as Value field.
|
||||
// Only rules with For > 0 will be restored.
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
|
||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration, labels map[string]string) error {
|
||||
if q == nil {
|
||||
return fmt.Errorf("querier is nil")
|
||||
}
|
||||
|
||||
// account for external labels in filter
|
||||
var labelsFilter string
|
||||
for k, v := range labels {
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
|
||||
// Get the last datapoint in range via MetricsQL `last_over_time`.
|
||||
// We don't use plain PromQL since Prometheus doesn't support
|
||||
// remote write protocol which is used for state persistence in vmalert.
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q}[%ds])",
|
||||
alertForStateMetricName, ar.Name, int(lookback.Seconds()))
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q%s}[%ds])",
|
||||
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
|
||||
qMetrics, err := q.Query(ctx, expr)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -363,7 +423,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||
|
||||
a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create alert: %s", err)
|
||||
return fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
a.ID = hash(m)
|
||||
a.State = notifier.StatePending
|
||||
|
||||
@@ -419,7 +419,7 @@ func TestAlertingRule_Restore(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
fq.add(tc.metrics...)
|
||||
if err := tc.rule.Restore(context.TODO(), fq, time.Hour); err != nil {
|
||||
if err := tc.rule.Restore(context.TODO(), fq, time.Hour, nil); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
@@ -46,19 +47,19 @@ func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
|
||||
}
|
||||
uniqueRules[r.ID] = struct{}{}
|
||||
if err := r.Validate(); err != nil {
|
||||
return fmt.Errorf("invalid rule %q.%q: %s", g.Name, ruleName, err)
|
||||
return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if validateExpressions {
|
||||
if _, err := metricsql.Parse(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q.%q: %s", g.Name, ruleName, err)
|
||||
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
if validateAnnotations {
|
||||
if err := notifier.ValidateTemplates(r.Annotations); err != nil {
|
||||
return fmt.Errorf("invalid annotations for rule %q.%q: %s", g.Name, ruleName, err)
|
||||
return fmt.Errorf("invalid annotations for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
if err := notifier.ValidateTemplates(r.Labels); err != nil {
|
||||
return fmt.Errorf("invalid labels for rule %q.%q: %s", g.Name, ruleName, err)
|
||||
return fmt.Errorf("invalid labels for rule %q.%q: %w", g.Name, ruleName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -90,6 +91,14 @@ func (r *Rule) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Name returns Rule name according to its type
|
||||
func (r *Rule) Name() string {
|
||||
if r.Record != "" {
|
||||
return r.Record
|
||||
}
|
||||
return r.Alert
|
||||
}
|
||||
|
||||
// HashRule hashes significant Rule fields into
|
||||
// unique hash value
|
||||
func HashRule(r Rule) uint64 {
|
||||
@@ -137,7 +146,7 @@ func Parse(pathPatterns []string, validateAnnotations, validateExpressions bool)
|
||||
for _, pattern := range pathPatterns {
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading file pattern %s: %v", pattern, err)
|
||||
return nil, fmt.Errorf("error reading file pattern %s: %w", pattern, err)
|
||||
}
|
||||
fp = append(fp, matches...)
|
||||
}
|
||||
@@ -150,7 +159,7 @@ func Parse(pathPatterns []string, validateAnnotations, validateExpressions bool)
|
||||
}
|
||||
for _, g := range gr {
|
||||
if err := g.Validate(validateAnnotations, validateExpressions); err != nil {
|
||||
return nil, fmt.Errorf("invalid group %q in file %q: %s", g.Name, file, err)
|
||||
return nil, fmt.Errorf("invalid group %q in file %q: %w", g.Name, file, err)
|
||||
}
|
||||
if _, ok := uniqueGroups[g.Name]; ok {
|
||||
return nil, fmt.Errorf("group name %q duplicate in file %q", g.Name, file)
|
||||
@@ -171,6 +180,7 @@ func parseFile(path string) ([]Group, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading alert rule file: %w", err)
|
||||
}
|
||||
data = envtemplate.Replace(data)
|
||||
g := struct {
|
||||
Groups []Group `yaml:"groups"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
|
||||
@@ -70,13 +70,13 @@ func TestParseBad(t *testing.T) {
|
||||
|
||||
func TestRule_Validate(t *testing.T) {
|
||||
if err := (&Rule{}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty name error")
|
||||
t.Errorf("expected empty name error")
|
||||
}
|
||||
if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty expr error")
|
||||
t.Errorf("expected empty expr error")
|
||||
}
|
||||
if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
|
||||
t.Errorf("exptected valid rule; got %s", err)
|
||||
t.Errorf("expected valid rule; got %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
38
app/vmalert/datasource/init.go
Normal file
38
app/vmalert/datasource/init.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("datasource.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -datasource.url")
|
||||
tlsCertFile = flag.String("datasource.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -datasource.url")
|
||||
tlsKeyFile = flag.String("datasource.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -datasource.url")
|
||||
tlsCAFile = flag.String("datasource.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -datasource.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flag.String("datasource.tlsServerName", "", "Optional TLS server name to use for connections to -datasource.url. "+
|
||||
"By default the server name from -datasource.url is used")
|
||||
)
|
||||
|
||||
// Init creates a Querier from provided flag values.
|
||||
func Init() (Querier, error) {
|
||||
if *addr == "" {
|
||||
flag.PrintDefaults()
|
||||
return nil, fmt.Errorf("datasource.url is empty")
|
||||
}
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
c := &http.Client{Transport: tr}
|
||||
return NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, c), nil
|
||||
}
|
||||
@@ -32,7 +32,7 @@ func (r response) metrics() ([]Metric, error) {
|
||||
for i, res := range r.Data.Result {
|
||||
f, err = strconv.ParseFloat(res.TV[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %s", res, res.TV[1], err)
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
|
||||
}
|
||||
m.Labels = nil
|
||||
for k, v := range r.Data.Result[i].Labels {
|
||||
@@ -49,9 +49,10 @@ const queryPath = "/api/v1/query?query="
|
||||
|
||||
// VMStorage represents vmstorage entity with ability to read and write metrics
|
||||
type VMStorage struct {
|
||||
c *http.Client
|
||||
queryURL string
|
||||
basicAuthUser, basicAuthPass string
|
||||
c *http.Client
|
||||
queryURL string
|
||||
basicAuthUser string
|
||||
basicAuthPass string
|
||||
}
|
||||
|
||||
// NewVMStorage is a constructor for VMStorage
|
||||
@@ -79,25 +80,25 @@ func (s *VMStorage) Query(ctx context.Context, query string) ([]Metric, error) {
|
||||
}
|
||||
resp, err := s.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting response from %s:%s", req.URL, err)
|
||||
return nil, fmt.Errorf("error getting response from %s: %w", req.URL, err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("datasource returns unxeprected response code %d for %s with err %s. Reponse body %s", resp.StatusCode, req.URL, err, body)
|
||||
return nil, fmt.Errorf("datasource returns unexpected response code %d for %s with err %w. Reponse body %s", resp.StatusCode, req.URL, err, body)
|
||||
}
|
||||
r := &response{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing metrics for %s:%s", req.URL, err)
|
||||
return nil, fmt.Errorf("error parsing metrics for %s: %w", req.URL, err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return nil, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL, r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return nil, fmt.Errorf("unkown status:%s, Expected success or error ", r.Status)
|
||||
return nil, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
}
|
||||
if r.Data.ResultType != rtVector {
|
||||
return nil, fmt.Errorf("unkown restul type:%s. Expected vector", r.Data.ResultType)
|
||||
return nil, fmt.Errorf("unknown restul type:%s. Expected vector", r.Data.ResultType)
|
||||
}
|
||||
return r.metrics()
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
t.Errorf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
|
||||
}
|
||||
if r.URL.Query().Get("query") != query {
|
||||
t.Errorf("exptected %s in query param, got %s", query, r.URL.Query().Get("query"))
|
||||
t.Errorf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
@@ -76,7 +76,7 @@ func TestVMSelectQuery(t *testing.T) {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("exptected 1 metric got %d in %+v", len(m), m)
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected := Metric{
|
||||
Labels: []Label{{Value: "vm_rows", Name: "__name__"}},
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -29,9 +30,24 @@ type Group struct {
|
||||
// channel accepts new Group obj
|
||||
// which supposed to update current group
|
||||
updateCh chan *Group
|
||||
|
||||
metrics *groupMetrics
|
||||
}
|
||||
|
||||
func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
|
||||
type groupMetrics struct {
|
||||
iterationTotal *counter
|
||||
iterationDuration *summary
|
||||
}
|
||||
|
||||
func newGroupMetrics(name, file string) *groupMetrics {
|
||||
m := &groupMetrics{}
|
||||
labels := fmt.Sprintf(`group=%q, file=%q`, name, file)
|
||||
m.iterationTotal = getOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels))
|
||||
m.iterationDuration = getOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels))
|
||||
return m
|
||||
}
|
||||
|
||||
func newGroup(cfg config.Group, defaultInterval time.Duration, labels map[string]string) *Group {
|
||||
g := &Group{
|
||||
Name: cfg.Name,
|
||||
File: cfg.File,
|
||||
@@ -41,6 +57,7 @@ func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
|
||||
finishedCh: make(chan struct{}),
|
||||
updateCh: make(chan *Group),
|
||||
}
|
||||
g.metrics = newGroupMetrics(g.Name, g.File)
|
||||
if g.Interval == 0 {
|
||||
g.Interval = defaultInterval
|
||||
}
|
||||
@@ -49,6 +66,17 @@ func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
|
||||
}
|
||||
rules := make([]Rule, len(cfg.Rules))
|
||||
for i, r := range cfg.Rules {
|
||||
// override rule labels with external labels
|
||||
for k, v := range labels {
|
||||
if prevV, ok := r.Labels[k]; ok {
|
||||
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
||||
k, prevV, g.Name, r.Name(), k, v)
|
||||
}
|
||||
if r.Labels == nil {
|
||||
r.Labels = map[string]string{}
|
||||
}
|
||||
r.Labels[k] = v
|
||||
}
|
||||
rules[i] = g.newRule(r)
|
||||
}
|
||||
g.Rules = rules
|
||||
@@ -57,9 +85,9 @@ func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
|
||||
|
||||
func (g *Group) newRule(rule config.Rule) Rule {
|
||||
if rule.Alert != "" {
|
||||
return newAlertingRule(g.ID(), rule)
|
||||
return newAlertingRule(g, rule)
|
||||
}
|
||||
return newRecordingRule(g.ID(), rule)
|
||||
return newRecordingRule(g, rule)
|
||||
}
|
||||
|
||||
// ID return unique group ID that consists of
|
||||
@@ -73,7 +101,7 @@ func (g *Group) ID() uint64 {
|
||||
}
|
||||
|
||||
// Restore restores alerts state for group rules
|
||||
func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
|
||||
func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration, labels map[string]string) error {
|
||||
for _, rule := range g.Rules {
|
||||
rr, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
@@ -82,8 +110,8 @@ func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time
|
||||
if rr.For < 1 {
|
||||
continue
|
||||
}
|
||||
if err := rr.Restore(ctx, q, lookback); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %s", rule, err)
|
||||
if err := rr.Restore(ctx, q, lookback, labels); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -105,6 +133,7 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
if !ok {
|
||||
// old rule is not present in the new list
|
||||
// so we mark it for removing
|
||||
g.Rules[i].Close()
|
||||
g.Rules[i] = nil
|
||||
continue
|
||||
}
|
||||
@@ -132,19 +161,9 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||
}
|
||||
|
||||
var (
|
||||
iterationTotal = metrics.NewCounter(`vmalert_iteration_total`)
|
||||
iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
|
||||
|
||||
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
|
||||
alertsSent = metrics.NewCounter(`vmalert_alerts_sent_total`)
|
||||
alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
|
||||
|
||||
remoteWriteSent = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
)
|
||||
|
||||
func (g *Group) close() {
|
||||
@@ -153,12 +172,18 @@ func (g *Group) close() {
|
||||
}
|
||||
close(g.doneCh)
|
||||
<-g.finishedCh
|
||||
|
||||
metrics.UnregisterMetric(g.metrics.iterationDuration.name)
|
||||
metrics.UnregisterMetric(g.metrics.iterationTotal.name)
|
||||
for _, rule := range g.Rules {
|
||||
rule.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (g *Group) start(ctx context.Context, querier datasource.Querier, nr notifier.Notifier, rw *remotewrite.Client) {
|
||||
func (g *Group) start(ctx context.Context, querier datasource.Querier, nts []notifier.Notifier, rw *remotewrite.Client) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
e := &executor{querier, nr, rw}
|
||||
e := &executor{querier, nts, rw}
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
@@ -185,7 +210,7 @@ func (g *Group) start(ctx context.Context, querier datasource.Querier, nr notifi
|
||||
g.mu.Unlock()
|
||||
logger.Infof("group %q re-started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
|
||||
case <-t.C:
|
||||
iterationTotal.Inc()
|
||||
g.metrics.iterationTotal.Inc()
|
||||
iterationStart := time.Now()
|
||||
|
||||
errs := e.execConcurrently(ctx, g.Rules, g.Concurrency, g.Interval)
|
||||
@@ -195,15 +220,15 @@ func (g *Group) start(ctx context.Context, querier datasource.Querier, nr notifi
|
||||
}
|
||||
}
|
||||
|
||||
iterationDuration.UpdateDuration(iterationStart)
|
||||
g.metrics.iterationDuration.UpdateDuration(iterationStart)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type executor struct {
|
||||
querier datasource.Querier
|
||||
notifier notifier.Notifier
|
||||
rw *remotewrite.Client
|
||||
querier datasource.Querier
|
||||
notifiers []notifier.Notifier
|
||||
rw *remotewrite.Client
|
||||
}
|
||||
|
||||
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurrency int, interval time.Duration) chan error {
|
||||
@@ -240,6 +265,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurren
|
||||
return res
|
||||
}
|
||||
|
||||
var (
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
|
||||
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
)
|
||||
|
||||
func (e *executor) exec(ctx context.Context, rule Rule, returnSeries bool, interval time.Duration) error {
|
||||
execTotal.Inc()
|
||||
execStart := time.Now()
|
||||
@@ -250,15 +283,14 @@ func (e *executor) exec(ctx context.Context, rule Rule, returnSeries bool, inter
|
||||
tss, err := rule.Exec(ctx, e.querier, returnSeries)
|
||||
if err != nil {
|
||||
execErrors.Inc()
|
||||
return fmt.Errorf("rule %q: failed to execute: %s", rule, err)
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
|
||||
}
|
||||
|
||||
if len(tss) > 0 && e.rw != nil {
|
||||
remoteWriteSent.Add(len(tss))
|
||||
for _, ts := range tss {
|
||||
if err := e.rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
return fmt.Errorf("rule %q: remote write failure: %s", rule, err)
|
||||
return fmt.Errorf("rule %q: remote write failure: %w", rule, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -286,10 +318,14 @@ func (e *executor) exec(ctx context.Context, rule Rule, returnSeries bool, inter
|
||||
if len(alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
alertsSent.Add(len(alerts))
|
||||
if err := e.notifier.Send(ctx, alerts); err != nil {
|
||||
alertsSendErrors.Inc()
|
||||
return fmt.Errorf("rule %q: failed to send alerts: %s", rule, err)
|
||||
errGr := new(utils.ErrGroup)
|
||||
for _, nt := range e.notifiers {
|
||||
if err := nt.Send(ctx, alerts); err != nil {
|
||||
alertsSendErrors.Inc()
|
||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts: %w", rule, err))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return errGr.Err()
|
||||
}
|
||||
|
||||
@@ -150,7 +150,7 @@ func TestGroupStart(t *testing.T) {
|
||||
t.Fatalf("failed to parse rules: %s", err)
|
||||
}
|
||||
const evalInterval = time.Millisecond
|
||||
g := newGroup(groups[0], evalInterval)
|
||||
g := newGroup(groups[0], evalInterval, map[string]string{"cluster": "east-1"})
|
||||
g.Concurrency = 2
|
||||
|
||||
fn := &fakeNotifier{}
|
||||
@@ -179,7 +179,7 @@ func TestGroupStart(t *testing.T) {
|
||||
fs.add(m1)
|
||||
fs.add(m2)
|
||||
go func() {
|
||||
g.start(context.Background(), fs, fn, nil)
|
||||
g.start(context.Background(), fs, []notifier.Notifier{fn}, nil)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
|
||||
@@ -2,12 +2,8 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
@@ -16,15 +12,16 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -34,67 +31,22 @@ Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule /path/to/file. Path to a single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.`)
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.`)
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
|
||||
|
||||
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
|
||||
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
|
||||
datasourceURL = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
|
||||
datasourceTLSInsecureSkipVerify = flag.Bool("datasource.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -datasource.url")
|
||||
datasourceTLSCertFile = flag.String("datasource.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -datasource.url")
|
||||
datasourceTLSKeyFile = flag.String("datasource.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -datasource.url")
|
||||
datasourceTLSCAFile = flag.String("datasource.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -datasource.url. "+
|
||||
"By default system CA is used")
|
||||
datasourceTLSServerName = flag.String("datasource.tlsServerName", "", "Optional TLS server name to use for connections to -datasource.url. "+
|
||||
"By default the server name from -datasource.url is used")
|
||||
|
||||
remoteWriteURL = flag.String("remoteWrite.url", "", "Optional URL to Victoria Metrics or VMInsert where to persist alerts state"+
|
||||
" and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428")
|
||||
remoteWriteUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
|
||||
remoteWritePassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
|
||||
remoteWriteMaxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
remoteWriteMaxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines defines max number of timeseries to be flushed at once")
|
||||
remoteWriteConcurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote storage")
|
||||
remoteWriteTLSInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
remoteWriteTLSCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
|
||||
remoteWriteTLSKeyFile = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
|
||||
remoteWriteTLSCAFile = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
|
||||
"By default system CA is used")
|
||||
remoteWriteTLSServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
|
||||
remoteReadURL = flag.String("remoteRead.url", "", "Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts"+
|
||||
" state. This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
remoteReadUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
|
||||
remoteReadPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
remoteReadTLSInsecureSkipVerify = flag.Bool("remoteRead.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteRead.url")
|
||||
remoteReadTLSCertFile = flag.String("remoteRead.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url")
|
||||
remoteReadTLSKeyFile = flag.String("remoteRead.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url")
|
||||
remoteReadTLSCAFile = flag.String("remoteRead.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteRead.url. "+
|
||||
"By default system CA is used")
|
||||
remoteReadTLSServerName = flag.String("remoteRead.tlsServerName", "", "Optional TLS server name to use for connections to -remoteRead.url. "+
|
||||
"By default the server name from -remoteRead.url is used")
|
||||
|
||||
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
|
||||
notifierURL = flag.String("notifier.url", "", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
|
||||
notifierTLSInsecureSkipVerify = flag.Bool("notifier.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -notifier.url")
|
||||
notifierTLSCertFile = flag.String("notifier.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
notifierTLSKeyFile = flag.String("notifier.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
notifierTLSCAFile = flag.String("notifier.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
"By default system CA is used")
|
||||
notifierTLSServerName = flag.String("notifier.tlsServerName", "", "Optional TLS server name to use for connections to -notifier.url. "+
|
||||
"By default the server name from -notifier.url is used")
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used`)
|
||||
externalLabels = flagutil.NewArray("external.label", "Optional label in the form 'name=value' to add to all generated recording rules and alerts. "+
|
||||
"Pass multiple -label flags in order to add multiple label sets.")
|
||||
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -104,64 +56,13 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
checkFlags()
|
||||
cgroup.UpdateGOMAXPROCSToCPUQuota()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
|
||||
manager, err := newManager(ctx)
|
||||
if err != nil {
|
||||
logger.Fatalf("can not get external url: %s ", err)
|
||||
logger.Fatalf("failed to init: %s", err)
|
||||
}
|
||||
notifier.InitTemplateFunc(eu)
|
||||
aug, err := getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
|
||||
if err != nil {
|
||||
logger.Fatalf("URL generator error: %s", err)
|
||||
}
|
||||
|
||||
dst, err := getTransport(datasourceURL, datasourceTLSCertFile, datasourceTLSKeyFile, datasourceTLSCAFile, datasourceTLSServerName, datasourceTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create datasource transport: %s", err)
|
||||
}
|
||||
|
||||
nt, err := getTransport(notifierURL, notifierTLSCertFile, notifierTLSKeyFile, notifierTLSCAFile, notifierTLSServerName, notifierTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create notifier transport: %s", err)
|
||||
}
|
||||
|
||||
manager := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
storage: datasource.NewVMStorage(*datasourceURL, *basicAuthUsername, *basicAuthPassword, &http.Client{Transport: dst}),
|
||||
notifier: notifier.NewAlertManager(*notifierURL, aug, &http.Client{Transport: nt}),
|
||||
}
|
||||
if *remoteWriteURL != "" {
|
||||
t, err := getTransport(remoteWriteURL, remoteWriteTLSCertFile, remoteWriteTLSKeyFile, remoteWriteTLSCAFile, remoteWriteTLSServerName, remoteWriteTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create remoteWrite transport: %s", err)
|
||||
}
|
||||
|
||||
c, err := remotewrite.NewClient(ctx, remotewrite.Config{
|
||||
Addr: *remoteWriteURL,
|
||||
Concurrency: *remoteWriteConcurrency,
|
||||
MaxQueueSize: *remoteWriteMaxQueueSize,
|
||||
MaxBatchSize: *remoteWriteMaxBatchSize,
|
||||
FlushInterval: *evaluationInterval,
|
||||
BasicAuthUser: *remoteWriteUsername,
|
||||
BasicAuthPass: *remoteWritePassword,
|
||||
Transport: t,
|
||||
})
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init remotewrite client: %s", err)
|
||||
}
|
||||
manager.rw = c
|
||||
}
|
||||
|
||||
if *remoteReadURL != "" {
|
||||
t, err := getTransport(remoteReadURL, remoteReadTLSCertFile, remoteReadTLSKeyFile, remoteReadTLSCAFile, remoteReadTLSServerName, remoteReadTLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create remoteRead transport: %s", err)
|
||||
}
|
||||
|
||||
manager.rr = datasource.NewVMStorage(*remoteReadURL, *remoteReadUsername, *remoteReadPassword, &http.Client{Transport: t})
|
||||
}
|
||||
|
||||
if err := manager.start(ctx, *rulePath, *validateTemplates, *validateExpressions); err != nil {
|
||||
logger.Fatalf("failed to start: %s", err)
|
||||
}
|
||||
@@ -206,6 +107,53 @@ var (
|
||||
configTimestamp = metrics.NewCounter(`vmalert_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
func newManager(ctx context.Context) (*manager, error) {
|
||||
q, err := datasource.Init()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init datasource: %w", err)
|
||||
}
|
||||
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init `external.url`: %w", err)
|
||||
}
|
||||
notifier.InitTemplateFunc(eu)
|
||||
aug, err := getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init `external.alert.source`: %w", err)
|
||||
}
|
||||
nts, err := notifier.Init(aug)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
||||
}
|
||||
|
||||
manager := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
querier: q,
|
||||
notifiers: nts,
|
||||
labels: map[string]string{},
|
||||
}
|
||||
rw, err := remotewrite.Init(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init remoteWrite: %w", err)
|
||||
}
|
||||
manager.rw = rw
|
||||
|
||||
rr, err := remoteread.Init()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init remoteRead: %w", err)
|
||||
}
|
||||
manager.rr = rr
|
||||
|
||||
for _, s := range *externalLabels {
|
||||
n := strings.IndexByte(s, '=')
|
||||
if n < 0 {
|
||||
return nil, fmt.Errorf("missing '=' in `-label`. It must contain label in the form `name=value`; got %q", s)
|
||||
}
|
||||
manager.labels[s[:n]] = s[n+1:]
|
||||
}
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL, error) {
|
||||
if externalURL != "" {
|
||||
return url.Parse(externalURL)
|
||||
@@ -235,7 +183,7 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
||||
if err := notifier.ValidateTemplates(map[string]string{
|
||||
"tpl": externalAlertSource,
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("error validating source template %s:%w", externalAlertSource, err)
|
||||
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
||||
}
|
||||
}
|
||||
m := map[string]string{
|
||||
@@ -250,68 +198,6 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getTLSConfig(certFile, keyFile, CAFile, serverName *string, insecureSkipVerify *bool) (*tls.Config, error) {
|
||||
var certs []tls.Certificate
|
||||
if *certFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(*certFile, *keyFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load TLS certificate from `cert_file`=%q, `key_file`=%q: %s", *certFile, *keyFile, err)
|
||||
}
|
||||
|
||||
certs = []tls.Certificate{cert}
|
||||
}
|
||||
|
||||
var rootCAs *x509.CertPool
|
||||
if *CAFile != "" {
|
||||
pem, err := ioutil.ReadFile(*CAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read `ca_file` %q: %s", *CAFile, err)
|
||||
}
|
||||
|
||||
rootCAs = x509.NewCertPool()
|
||||
if !rootCAs.AppendCertsFromPEM(pem) {
|
||||
return nil, fmt.Errorf("cannot parse data from `ca_file` %q", *CAFile)
|
||||
}
|
||||
}
|
||||
|
||||
return &tls.Config{
|
||||
Certificates: certs,
|
||||
InsecureSkipVerify: *insecureSkipVerify,
|
||||
RootCAs: rootCAs,
|
||||
ServerName: *serverName,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getTransport(URL, certFile, keyFile, CAFile, serverName *string, insecureSkipVerify *bool) (*http.Transport, error) {
|
||||
var u fasthttp.URI
|
||||
u.Update(*URL)
|
||||
|
||||
var t *http.Transport
|
||||
if string(u.Scheme()) == "https" {
|
||||
t = http.DefaultTransport.(*http.Transport).Clone()
|
||||
|
||||
tlsCfg, err := getTLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t.TLSClientConfig = tlsCfg
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func checkFlags() {
|
||||
if *notifierURL == "" {
|
||||
flag.PrintDefaults()
|
||||
logger.Fatalf("notifier.url is empty")
|
||||
}
|
||||
if *datasourceURL == "" {
|
||||
flag.PrintDefaults()
|
||||
logger.Fatalf("datasource.url is empty")
|
||||
}
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmalert processes alerts and recording rules.
|
||||
|
||||
@@ -41,7 +41,7 @@ func TestGetAlertURLGenerator(t *testing.T) {
|
||||
}
|
||||
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
|
||||
if err == nil {
|
||||
t.Errorf("exptected tempalte validation error got nil")
|
||||
t.Errorf("expected tempalte validation error got nil")
|
||||
}
|
||||
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}", true)
|
||||
if err != nil {
|
||||
@@ -51,55 +51,3 @@ func TestGetAlertURLGenerator(t *testing.T) {
|
||||
t.Errorf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetTLSConfig(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
serverName = "test"
|
||||
insecureSkipVerify = true
|
||||
tlsCfg, err := getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tlsCfg == nil {
|
||||
t.Errorf("expected tlsConfig to be set, got nil")
|
||||
}
|
||||
if tlsCfg.ServerName != serverName {
|
||||
t.Errorf("unexpected ServerName, want %s, got %s", serverName, tlsCfg.ServerName)
|
||||
}
|
||||
if tlsCfg.InsecureSkipVerify != insecureSkipVerify {
|
||||
t.Errorf("unexpected InsecureSkipVerify, want %v, got %v", insecureSkipVerify, tlsCfg.InsecureSkipVerify)
|
||||
}
|
||||
certFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected keypair error, got nil")
|
||||
}
|
||||
certFile = ""
|
||||
CAFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected read error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetTransport(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
URL := "http://victoriametrics.com"
|
||||
tr, err := getTransport(&URL, &certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tr != nil {
|
||||
t.Errorf("expected Transport to be nil, got %v", tr)
|
||||
}
|
||||
URL = "https://victoriametrics.com"
|
||||
tr, err = getTransport(&URL, &certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tr.TLSClientConfig == nil {
|
||||
t.Errorf("expected TLSClientConfig to be set, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,13 +15,14 @@ import (
|
||||
|
||||
// manager controls group states
|
||||
type manager struct {
|
||||
storage datasource.Querier
|
||||
notifier notifier.Notifier
|
||||
querier datasource.Querier
|
||||
notifiers []notifier.Notifier
|
||||
|
||||
rw *remotewrite.Client
|
||||
rr datasource.Querier
|
||||
|
||||
wg sync.WaitGroup
|
||||
wg sync.WaitGroup
|
||||
labels map[string]string
|
||||
|
||||
groupsMu sync.RWMutex
|
||||
groups map[uint64]*Group
|
||||
@@ -64,7 +65,7 @@ func (m *manager) close() {
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) {
|
||||
if restore && m.rr != nil {
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack)
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack, m.labels)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring state for group %q: %s", group.Name, err)
|
||||
}
|
||||
@@ -73,7 +74,7 @@ func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) {
|
||||
m.wg.Add(1)
|
||||
id := group.ID()
|
||||
go func() {
|
||||
group.start(ctx, m.storage, m.notifier, m.rw)
|
||||
group.start(ctx, m.querier, m.notifiers, m.rw)
|
||||
m.wg.Done()
|
||||
}()
|
||||
m.groups[id] = group
|
||||
@@ -83,12 +84,12 @@ func (m *manager) update(ctx context.Context, path []string, validateTpl, valida
|
||||
logger.Infof("reading rules configuration file from %q", strings.Join(path, ";"))
|
||||
groupsCfg, err := config.Parse(path, validateTpl, validateExpr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse configuration file: %s", err)
|
||||
return fmt.Errorf("cannot parse configuration file: %w", err)
|
||||
}
|
||||
|
||||
groupsRegistry := make(map[uint64]*Group)
|
||||
for _, cfg := range groupsCfg {
|
||||
ng := newGroup(cfg, *evaluationInterval)
|
||||
ng := newGroup(cfg, *evaluationInterval, m.labels)
|
||||
groupsRegistry[ng.ID()] = ng
|
||||
}
|
||||
|
||||
|
||||
@@ -37,9 +37,9 @@ func TestManagerUpdateError(t *testing.T) {
|
||||
// Should be executed with -race flag
|
||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
m := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
storage: &fakeQuerier{},
|
||||
notifier: &fakeNotifier{},
|
||||
groups: make(map[uint64]*Group),
|
||||
querier: &fakeQuerier{},
|
||||
notifiers: []notifier.Notifier{&fakeNotifier{}},
|
||||
}
|
||||
paths := []string{
|
||||
"config/testdata/dir/rules0-good.rules",
|
||||
@@ -184,7 +184,7 @@ func TestManagerUpdate(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
m := &manager{groups: make(map[uint64]*Group), storage: &fakeQuerier{}}
|
||||
m := &manager{groups: make(map[uint64]*Group), querier: &fakeQuerier{}}
|
||||
path := []string{tc.initPath}
|
||||
if err := m.update(ctx, path, true, true, false); err != nil {
|
||||
t.Fatalf("failed to complete initial rules update: %s", err)
|
||||
|
||||
39
app/vmalert/metrics.go
Normal file
39
app/vmalert/metrics.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package main
|
||||
|
||||
import "github.com/VictoriaMetrics/metrics"
|
||||
|
||||
type gauge struct {
|
||||
name string
|
||||
*metrics.Gauge
|
||||
}
|
||||
|
||||
func getOrCreateGauge(name string, f func() float64) *gauge {
|
||||
return &gauge{
|
||||
name: name,
|
||||
Gauge: metrics.GetOrCreateGauge(name, f),
|
||||
}
|
||||
}
|
||||
|
||||
type counter struct {
|
||||
name string
|
||||
*metrics.Counter
|
||||
}
|
||||
|
||||
func getOrCreateCounter(name string) *counter {
|
||||
return &counter{
|
||||
name: name,
|
||||
Counter: metrics.GetOrCreateCounter(name),
|
||||
}
|
||||
}
|
||||
|
||||
type summary struct {
|
||||
name string
|
||||
*metrics.Summary
|
||||
}
|
||||
|
||||
func getOrCreateSummary(name string) *summary {
|
||||
return &summary{
|
||||
name: name,
|
||||
Summary: metrics.GetOrCreateSummary(name),
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
// Alert the triggered alert
|
||||
@@ -77,7 +79,7 @@ func ValidateTemplates(annotations map[string]string) error {
|
||||
func templateAnnotations(annotations map[string]string, header string, data alertTplData) (map[string]string, error) {
|
||||
var builder strings.Builder
|
||||
var buf bytes.Buffer
|
||||
eg := errGroup{}
|
||||
eg := new(utils.ErrGroup)
|
||||
r := make(map[string]string, len(annotations))
|
||||
for key, text := range annotations {
|
||||
r[key] = text
|
||||
@@ -87,12 +89,12 @@ func templateAnnotations(annotations map[string]string, header string, data aler
|
||||
builder.WriteString(header)
|
||||
builder.WriteString(text)
|
||||
if err := templateAnnotation(&buf, builder.String(), data); err != nil {
|
||||
eg.errs = append(eg.errs, fmt.Sprintf("key %q, template %q: %s", key, text, err))
|
||||
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||
continue
|
||||
}
|
||||
r[key] = buf.String()
|
||||
}
|
||||
return r, eg.err()
|
||||
return r, eg.Err()
|
||||
}
|
||||
|
||||
func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
|
||||
|
||||
@@ -12,9 +12,11 @@ import (
|
||||
// AlertManager represents integration provider with Prometheus alert manager
|
||||
// https://github.com/prometheus/alertmanager
|
||||
type AlertManager struct {
|
||||
alertURL string
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
alertURL string
|
||||
basicAuthUser string
|
||||
basicAuthPass string
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// Send an alert or resolve message
|
||||
@@ -28,6 +30,9 @@ func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(ctx)
|
||||
if am.basicAuthPass != "" {
|
||||
req.SetBasicAuth(am.basicAuthUser, am.basicAuthPass)
|
||||
}
|
||||
resp, err := am.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -38,7 +43,7 @@ func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read response from %q: %s", am.alertURL, err)
|
||||
return fmt.Errorf("failed to read response from %q: %w", am.alertURL, err)
|
||||
}
|
||||
return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, am.alertURL, string(body))
|
||||
}
|
||||
@@ -51,10 +56,13 @@ type AlertURLGenerator func(Alert) string
|
||||
const alertManagerPath = "/api/v2/alerts"
|
||||
|
||||
// NewAlertManager is a constructor for AlertManager
|
||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, c *http.Client) *AlertManager {
|
||||
func NewAlertManager(alertManagerURL, user, pass string, fn AlertURLGenerator, c *http.Client) *AlertManager {
|
||||
addr := strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath
|
||||
return &AlertManager{
|
||||
alertURL: strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath,
|
||||
argFunc: fn,
|
||||
client: c,
|
||||
alertURL: addr,
|
||||
argFunc: fn,
|
||||
client: c,
|
||||
basicAuthUser: user,
|
||||
basicAuthPass: pass,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,12 +11,21 @@ import (
|
||||
)
|
||||
|
||||
func TestAlertManager_Send(t *testing.T) {
|
||||
const baUser, baPass = "foo", "bar"
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
|
||||
user, pass, ok := r.BasicAuth()
|
||||
if !ok {
|
||||
t.Errorf("unauthorized request")
|
||||
}
|
||||
if user != baUser || pass != baPass {
|
||||
t.Errorf("wrong creds %q:%q; expected %q:%q",
|
||||
user, pass, baUser, baPass)
|
||||
}
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("expected POST method got %s", r.Method)
|
||||
@@ -43,22 +52,22 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
t.Errorf("expected 1 alert in array got %d", len(a))
|
||||
}
|
||||
if a[0].GeneratorURL != "0/0" {
|
||||
t.Errorf("exptected 0/0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
t.Errorf("expected 0/0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
}
|
||||
if a[0].Labels["alertname"] != "alert0" {
|
||||
t.Errorf("exptected alert0 as alert name got %s", a[0].Labels["alertname"])
|
||||
t.Errorf("expected alert0 as alert name got %s", a[0].Labels["alertname"])
|
||||
}
|
||||
if a[0].StartsAt.IsZero() {
|
||||
t.Errorf("exptected non-zero start time")
|
||||
t.Errorf("expected non-zero start time")
|
||||
}
|
||||
if a[0].EndAt.IsZero() {
|
||||
t.Errorf("exptected non-zero end time")
|
||||
t.Errorf("expected non-zero end time")
|
||||
}
|
||||
}
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
am := NewAlertManager(srv.URL, func(alert Alert) string {
|
||||
am := NewAlertManager(srv.URL, baUser, baPass, func(alert Alert) string {
|
||||
return strconv.FormatUint(alert.GroupID, 10) + "/" + strconv.FormatUint(alert.ID, 10)
|
||||
}, srv.Client())
|
||||
if err := am.Send(context.Background(), []Alert{{}, {}}); err == nil {
|
||||
|
||||
47
app/vmalert/notifier/init.go
Normal file
47
app/vmalert/notifier/init.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
)
|
||||
|
||||
var (
|
||||
addrs = flagutil.NewArray("notifier.url", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
|
||||
basicAuthUsername = flagutil.NewArray("notifier.basicAuth.username", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flagutil.NewArray("notifier.basicAuth.password", "Optional basic auth password for -datasource.url")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("notifier.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -notifier.url")
|
||||
tlsCertFile = flagutil.NewArray("notifier.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
tlsKeyFile = flagutil.NewArray("notifier.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
tlsCAFile = flagutil.NewArray("notifier.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flagutil.NewArray("notifier.tlsServerName", "Optional TLS server name to use for connections to -notifier.url. "+
|
||||
"By default the server name from -notifier.url is used")
|
||||
)
|
||||
|
||||
// Init creates a Notifier object based on provided flags.
|
||||
func Init(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
if len(*addrs) == 0 {
|
||||
flag.PrintDefaults()
|
||||
return nil, fmt.Errorf("at least one `-notifier.url` must be set")
|
||||
}
|
||||
|
||||
var notifiers []Notifier
|
||||
for i, addr := range *addrs {
|
||||
cert, key := tlsCertFile.GetOptionalArg(i), tlsKeyFile.GetOptionalArg(i)
|
||||
ca, serverName := tlsCAFile.GetOptionalArg(i), tlsServerName.GetOptionalArg(i)
|
||||
tr, err := utils.Transport(addr, cert, key, ca, serverName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
user, pass := basicAuthUsername.GetOptionalArg(i), basicAuthPassword.GetOptionalArg(i)
|
||||
am := NewAlertManager(addr, user, pass, gen, &http.Client{Transport: tr})
|
||||
notifiers = append(notifiers, am)
|
||||
}
|
||||
|
||||
return notifiers, nil
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type errGroup struct {
|
||||
errs []string
|
||||
}
|
||||
|
||||
func (eg *errGroup) err() error {
|
||||
if eg == nil || len(eg.errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return eg
|
||||
}
|
||||
|
||||
func (eg *errGroup) Error() string {
|
||||
return fmt.Sprintf("errors: %s", strings.Join(eg.errs, "\n"))
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// RecordingRule is a Rule that supposed
|
||||
@@ -32,6 +33,12 @@ type RecordingRule struct {
|
||||
// resets on every successful Exec
|
||||
// may be used as Health state
|
||||
lastExecError error
|
||||
|
||||
metrics *recordingRuleMetrics
|
||||
}
|
||||
|
||||
type recordingRuleMetrics struct {
|
||||
errors *gauge
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
@@ -45,14 +52,31 @@ func (rr *RecordingRule) ID() uint64 {
|
||||
return rr.RuleID
|
||||
}
|
||||
|
||||
func newRecordingRule(gID uint64, cfg config.Rule) *RecordingRule {
|
||||
return &RecordingRule{
|
||||
func newRecordingRule(group *Group, cfg config.Rule) *RecordingRule {
|
||||
rr := &RecordingRule{
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: gID,
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
}
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = getOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
rr.mu.Lock()
|
||||
defer rr.mu.Unlock()
|
||||
if rr.lastExecError == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
return rr
|
||||
}
|
||||
|
||||
// Close unregisters rule metrics
|
||||
func (rr *RecordingRule) Close() {
|
||||
metrics.UnregisterMetric(rr.metrics.errors.name)
|
||||
}
|
||||
|
||||
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels")
|
||||
@@ -71,7 +95,7 @@ func (rr *RecordingRule) Exec(ctx context.Context, q datasource.Querier, series
|
||||
rr.lastExecTime = time.Now()
|
||||
rr.lastExecError = err
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute query %q: %s", rr.Expr, err)
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
|
||||
}
|
||||
|
||||
duplicates := make(map[uint64]prompbmarshal.TimeSeries, len(qMetrics))
|
||||
|
||||
39
app/vmalert/remoteread/init.go
Normal file
39
app/vmalert/remoteread/init.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package remoteread
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("remoteRead.url", "", "Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts"+
|
||||
" state. This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
|
||||
basicAuthPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteRead.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteRead.url")
|
||||
tlsCertFile = flag.String("remoteRead.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url")
|
||||
tlsKeyFile = flag.String("remoteRead.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url")
|
||||
tlsCAFile = flag.String("remoteRead.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteRead.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flag.String("remoteRead.tlsServerName", "", "Optional TLS server name to use for connections to -remoteRead.url. "+
|
||||
"By default the server name from -remoteRead.url is used")
|
||||
)
|
||||
|
||||
// Init creates a Querier from provided flag values.
|
||||
// Returns nil if addr flag wasn't set.
|
||||
func Init() (datasource.Querier, error) {
|
||||
if *addr == "" {
|
||||
return nil, nil
|
||||
}
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
c := &http.Client{Transport: tr}
|
||||
return datasource.NewVMStorage(*addr, *basicAuthUsername, *basicAuthPassword, c), nil
|
||||
}
|
||||
54
app/vmalert/remotewrite/init.go
Normal file
54
app/vmalert/remotewrite/init.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("remoteWrite.url", "", "Optional URL to Victoria Metrics or VMInsert where to persist alerts state"+
|
||||
" and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
|
||||
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
|
||||
|
||||
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines defines max number of timeseries to be flushed at once")
|
||||
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote querier")
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
tlsCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
|
||||
tlsKeyFile = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
|
||||
tlsCAFile = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
)
|
||||
|
||||
// Init creates Client object from given flags.
|
||||
// Returns nil if addr flag wasn't set.
|
||||
func Init(ctx context.Context) (*Client, error) {
|
||||
if *addr == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
t, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||
}
|
||||
|
||||
return NewClient(ctx, Config{
|
||||
Addr: *addr,
|
||||
Concurrency: *concurrency,
|
||||
MaxQueueSize: *maxQueueSize,
|
||||
MaxBatchSize: *maxBatchSize,
|
||||
FlushInterval: *flushInterval,
|
||||
BasicAuthUser: *basicAuthUsername,
|
||||
BasicAuthPass: *basicAuthPassword,
|
||||
Transport: t,
|
||||
})
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
@@ -61,7 +62,7 @@ const (
|
||||
defaultConcurrency = 4
|
||||
defaultMaxBatchSize = 1e3
|
||||
defaultMaxQueueSize = 1e5
|
||||
defaultFlushInterval = time.Second
|
||||
defaultFlushInterval = 5 * time.Second
|
||||
defaultWriteTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
@@ -85,6 +86,9 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
if cfg.WriteTimeout == 0 {
|
||||
cfg.WriteTimeout = defaultWriteTimeout
|
||||
}
|
||||
if cfg.Transport == nil {
|
||||
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
|
||||
}
|
||||
c := &Client{
|
||||
c: &http.Client{
|
||||
Timeout: cfg.WriteTimeout,
|
||||
@@ -95,6 +99,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
baPass: cfg.BasicAuthPass,
|
||||
flushInterval: cfg.FlushInterval,
|
||||
maxBatchSize: cfg.MaxBatchSize,
|
||||
maxQueueSize: cfg.MaxQueueSize,
|
||||
doneCh: make(chan struct{}),
|
||||
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
|
||||
}
|
||||
@@ -137,14 +142,11 @@ func (c *Client) Close() error {
|
||||
|
||||
func (c *Client) run(ctx context.Context) {
|
||||
ticker := time.NewTicker(c.flushInterval)
|
||||
wr := prompbmarshal.WriteRequest{}
|
||||
wr := &prompbmarshal.WriteRequest{}
|
||||
shutdown := func() {
|
||||
for ts := range c.input {
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
}
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
||||
c.flush(lastCtx, wr)
|
||||
cancel()
|
||||
@@ -163,44 +165,82 @@ func (c *Client) run(ctx context.Context) {
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.flush(ctx, wr)
|
||||
wr = prompbmarshal.WriteRequest{}
|
||||
case ts := <-c.input:
|
||||
case ts, ok := <-c.input:
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||
c.flush(ctx, wr)
|
||||
wr = prompbmarshal.WriteRequest{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (c *Client) flush(ctx context.Context, wr prompbmarshal.WriteRequest) {
|
||||
var (
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
|
||||
)
|
||||
|
||||
// flush is a blocking function that marshals WriteRequest and sends
|
||||
// it to remote write endpoint. Flush performs limited amount of retries
|
||||
// if request fails.
|
||||
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
defer prompbmarshal.ResetWriteRequest(wr)
|
||||
|
||||
data, err := wr.Marshal()
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal WriteRequest: %s", err)
|
||||
return
|
||||
}
|
||||
req, err := http.NewRequest("POST", c.addr, bytes.NewReader(snappy.Encode(nil, data)))
|
||||
|
||||
const attempts = 5
|
||||
b := snappy.Encode(nil, data)
|
||||
for i := 0; i < attempts; i++ {
|
||||
err := c.send(ctx, b)
|
||||
if err == nil {
|
||||
sentRows.Add(len(wr.Timeseries))
|
||||
sentBytes.Add(len(b))
|
||||
return
|
||||
}
|
||||
|
||||
logger.Errorf("attempt %d to send request failed: %s", i+1, err)
|
||||
// sleeping to avoid remote db hammering
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
droppedRows.Add(len(wr.Timeseries))
|
||||
droppedBytes.Add(len(b))
|
||||
logger.Errorf("all %d attempts to send request failed - dropping %d timeseries",
|
||||
attempts, len(wr.Timeseries))
|
||||
}
|
||||
|
||||
func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
r := bytes.NewReader(data)
|
||||
req, err := http.NewRequest("POST", c.addr, r)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to create new HTTP request: %s", err)
|
||||
return
|
||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||
}
|
||||
if c.baPass != "" {
|
||||
req.SetBasicAuth(c.baUser, c.baPass)
|
||||
}
|
||||
resp, err := c.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
logger.Errorf("error getting response from %s:%s", req.URL, err)
|
||||
return
|
||||
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
|
||||
req.URL, err, len(data), r.Size())
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusNoContent {
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
logger.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL, body)
|
||||
return
|
||||
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||
resp.StatusCode, req.URL, body)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
102
app/vmalert/remotewrite/remotewrite_test.go
Normal file
102
app/vmalert/remotewrite/remotewrite_test.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func TestClient_Push(t *testing.T) {
|
||||
testSrv := newRWServer()
|
||||
cfg := Config{
|
||||
Addr: testSrv.URL,
|
||||
MaxBatchSize: 100,
|
||||
}
|
||||
client, err := NewClient(context.Background(), cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create client: %s", err)
|
||||
}
|
||||
const rowsN = 1e4
|
||||
var sent int
|
||||
for i := 0; i < rowsN; i++ {
|
||||
s := prompbmarshal.TimeSeries{
|
||||
Samples: []prompbmarshal.Sample{{
|
||||
Value: rand.Float64(),
|
||||
Timestamp: time.Now().Unix(),
|
||||
}},
|
||||
}
|
||||
err := client.Push(s)
|
||||
if err == nil {
|
||||
sent++
|
||||
}
|
||||
}
|
||||
if sent == 0 {
|
||||
t.Fatalf("0 series sent")
|
||||
}
|
||||
if err := client.Close(); err != nil {
|
||||
t.Fatalf("failed to close client: %s", err)
|
||||
}
|
||||
got := testSrv.accepted()
|
||||
if got != sent {
|
||||
t.Fatalf("expected to have %d series; got %d", sent, got)
|
||||
}
|
||||
}
|
||||
|
||||
func newRWServer() *rwServer {
|
||||
rw := &rwServer{}
|
||||
rw.Server = httptest.NewServer(http.HandlerFunc(rw.handler))
|
||||
return rw
|
||||
}
|
||||
|
||||
type rwServer struct {
|
||||
// WARN: ordering of fields is important for alignment!
|
||||
// see https://golang.org/pkg/sync/atomic/#pkg-note-BUG
|
||||
acceptedRows uint64
|
||||
*httptest.Server
|
||||
}
|
||||
|
||||
func (rw *rwServer) accepted() int {
|
||||
return int(atomic.LoadUint64(&rw.acceptedRows))
|
||||
}
|
||||
|
||||
func (rw *rwServer) err(w http.ResponseWriter, err error) {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
w.Write([]byte(err.Error()))
|
||||
}
|
||||
|
||||
func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
rw.err(w, fmt.Errorf("bad method %q", r.Method))
|
||||
return
|
||||
}
|
||||
data, err := ioutil.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
rw.err(w, fmt.Errorf("body read err: %w", err))
|
||||
return
|
||||
}
|
||||
defer func() { _ = r.Body.Close() }()
|
||||
|
||||
b, err := snappy.Decode(nil, data)
|
||||
if err != nil {
|
||||
rw.err(w, fmt.Errorf("decode err: %w", err))
|
||||
return
|
||||
}
|
||||
wr := &prompb.WriteRequest{}
|
||||
if err := wr.Unmarshal(b); err != nil {
|
||||
rw.err(w, fmt.Errorf("unmarhsal err: %w", err))
|
||||
return
|
||||
}
|
||||
atomic.AddUint64(&rw.acceptedRows, uint64(len(wr.Timeseries)))
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
@@ -21,4 +21,7 @@ type Rule interface {
|
||||
// UpdateWith performs modification of current Rule
|
||||
// with fields of the given Rule.
|
||||
UpdateWith(Rule) error
|
||||
// Close performs the shutdown procedures for rule
|
||||
// such as metrics unregister
|
||||
Close()
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func newTimeSeries(value float64, labels map[string]string, timestamp time.Time) prompbmarshal.TimeSeries {
|
||||
|
||||
43
app/vmalert/utils/err_group.go
Normal file
43
app/vmalert/utils/err_group.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ErrGroup accumulates multiple errors
|
||||
// and produces single error message.
|
||||
type ErrGroup struct {
|
||||
errs []error
|
||||
}
|
||||
|
||||
// Add adds a new error to group.
|
||||
// Isn't thread-safe.
|
||||
func (eg *ErrGroup) Add(err error) {
|
||||
eg.errs = append(eg.errs, err)
|
||||
}
|
||||
|
||||
// Err checks if group contains at least
|
||||
// one error.
|
||||
func (eg *ErrGroup) Err() error {
|
||||
if eg == nil || len(eg.errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return eg
|
||||
}
|
||||
|
||||
// Error satisfies Error interface
|
||||
func (eg *ErrGroup) Error() string {
|
||||
if len(eg.errs) == 0 {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
|
||||
for i, err := range eg.errs {
|
||||
b.WriteString(err.Error())
|
||||
if i != len(eg.errs)-1 {
|
||||
b.WriteString("\n")
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
38
app/vmalert/utils/err_group_test.go
Normal file
38
app/vmalert/utils/err_group_test.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestErrGroup(t *testing.T) {
|
||||
testCases := []struct {
|
||||
errs []error
|
||||
exp string
|
||||
}{
|
||||
{nil, ""},
|
||||
{[]error{errors.New("timeout")}, "errors(1): timeout"},
|
||||
{
|
||||
[]error{errors.New("timeout"), errors.New("deadline")},
|
||||
"errors(2): timeout\ndeadline",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
eg := new(ErrGroup)
|
||||
for _, err := range tc.errs {
|
||||
eg.Add(err)
|
||||
}
|
||||
if len(tc.errs) == 0 {
|
||||
if eg.Err() != nil {
|
||||
t.Fatalf("expected to get nil error")
|
||||
}
|
||||
continue
|
||||
}
|
||||
if eg.Err() == nil {
|
||||
t.Fatalf("expected to get non-nil error")
|
||||
}
|
||||
if eg.Error() != tc.exp {
|
||||
t.Fatalf("expected to have: \n%q\ngot:\n%q", tc.exp, eg.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
58
app/vmalert/utils/tls.go
Normal file
58
app/vmalert/utils/tls.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Transport creates http.Transport object based on provided URL.
|
||||
// Returns Transport with TLS configuration if URL contains `https` prefix
|
||||
func Transport(URL, certFile, keyFile, CAFile, serverName string, insecureSkipVerify bool) (*http.Transport, error) {
|
||||
t := http.DefaultTransport.(*http.Transport).Clone()
|
||||
if !strings.HasPrefix(URL, "https") {
|
||||
return t, nil
|
||||
}
|
||||
tlsCfg, err := TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t.TLSClientConfig = tlsCfg
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// TLSConfig creates tls.Config object from provided arguments
|
||||
func TLSConfig(certFile, keyFile, CAFile, serverName string, insecureSkipVerify bool) (*tls.Config, error) {
|
||||
var certs []tls.Certificate
|
||||
if certFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(certFile, keyFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load TLS certificate from `cert_file`=%q, `key_file`=%q: %w", certFile, keyFile, err)
|
||||
}
|
||||
|
||||
certs = []tls.Certificate{cert}
|
||||
}
|
||||
|
||||
var rootCAs *x509.CertPool
|
||||
if CAFile != "" {
|
||||
pem, err := ioutil.ReadFile(CAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read `ca_file` %q: %w", CAFile, err)
|
||||
}
|
||||
|
||||
rootCAs = x509.NewCertPool()
|
||||
if !rootCAs.AppendCertsFromPEM(pem) {
|
||||
return nil, fmt.Errorf("cannot parse data from `ca_file` %q", CAFile)
|
||||
}
|
||||
}
|
||||
|
||||
return &tls.Config{
|
||||
Certificates: certs,
|
||||
InsecureSkipVerify: insecureSkipVerify,
|
||||
RootCAs: rootCAs,
|
||||
ServerName: serverName,
|
||||
}, nil
|
||||
}
|
||||
52
app/vmalert/utils/tls_test.go
Normal file
52
app/vmalert/utils/tls_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package utils
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTLSConfig(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
serverName = "test"
|
||||
insecureSkipVerify = true
|
||||
tlsCfg, err := TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tlsCfg == nil {
|
||||
t.Errorf("expected tlsConfig to be set, got nil")
|
||||
}
|
||||
if tlsCfg.ServerName != serverName {
|
||||
t.Errorf("unexpected ServerName, want %s, got %s", serverName, tlsCfg.ServerName)
|
||||
}
|
||||
if tlsCfg.InsecureSkipVerify != insecureSkipVerify {
|
||||
t.Errorf("unexpected InsecureSkipVerify, want %v, got %v", insecureSkipVerify, tlsCfg.InsecureSkipVerify)
|
||||
}
|
||||
certFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected keypair error, got nil")
|
||||
}
|
||||
certFile = ""
|
||||
CAFile = "/path/to/nonexisting/cert/file"
|
||||
_, err = TLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err == nil {
|
||||
t.Errorf("expected read error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransport(t *testing.T) {
|
||||
var certFile, keyFile, CAFile, serverName string
|
||||
var insecureSkipVerify bool
|
||||
URL := "http://victoriametrics.com"
|
||||
_, err := Transport(URL, certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
URL = "https://victoriametrics.com"
|
||||
tr, err := Transport(URL, certFile, keyFile, CAFile, serverName, insecureSkipVerify)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if tr.TLSClientConfig == nil {
|
||||
t.Errorf("expected TLSClientConfig to be set, got nil")
|
||||
}
|
||||
}
|
||||
@@ -27,7 +27,6 @@ var pathList = [][]string{
|
||||
}
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
resph := responseHandler{w}
|
||||
switch r.URL.Path {
|
||||
case "/":
|
||||
for _, path := range pathList {
|
||||
@@ -36,10 +35,22 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return true
|
||||
case "/api/v1/groups":
|
||||
resph.handle(rh.listGroups())
|
||||
data, err := rh.listGroups()
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/api/v1/alerts":
|
||||
resph.handle(rh.listAlerts())
|
||||
data, err := rh.listAlerts()
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/-/reload":
|
||||
logger.Infof("api config reload was called, sending sighup")
|
||||
@@ -47,12 +58,18 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
default:
|
||||
if !strings.HasSuffix(r.URL.Path, "/status") {
|
||||
return false
|
||||
}
|
||||
// /api/v1/<groupName>/<alertID>/status
|
||||
if strings.HasSuffix(r.URL.Path, "/status") {
|
||||
resph.handle(rh.alert(r.URL.Path))
|
||||
data, err := rh.alert(r.URL.Path)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +97,7 @@ func (rh *requestHandler) listGroups() ([]byte, error) {
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %s`, err),
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
@@ -117,7 +134,7 @@ func (rh *requestHandler) listAlerts() ([]byte, error) {
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %s`, err),
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
@@ -138,11 +155,11 @@ func (rh *requestHandler) alert(path string) ([]byte, error) {
|
||||
|
||||
groupID, err := uint64FromPath(parts[0])
|
||||
if err != nil {
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse groupID: %s`, err))
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse groupID: %w`, err))
|
||||
}
|
||||
alertID, err := uint64FromPath(parts[1])
|
||||
if err != nil {
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse alertID: %s`, err))
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse alertID: %w`, err))
|
||||
}
|
||||
resp, err := rh.m.AlertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
@@ -151,18 +168,6 @@ func (rh *requestHandler) alert(path string) ([]byte, error) {
|
||||
return json.Marshal(resp)
|
||||
}
|
||||
|
||||
// responseHandler wrapper on http.ResponseWriter with sugar
|
||||
type responseHandler struct{ http.ResponseWriter }
|
||||
|
||||
func (w responseHandler) handle(b []byte, err error) {
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, "%s", err)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(b)
|
||||
}
|
||||
|
||||
func uint64FromPath(path string) (uint64, error) {
|
||||
s := strings.TrimRight(path, "/")
|
||||
return strconv.ParseUint(s, 10, 0)
|
||||
|
||||
@@ -58,19 +58,22 @@ run-vmauth:
|
||||
$(MAKE) run-via-docker
|
||||
|
||||
vmauth-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-amd64 ./app/vmauth
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-arm ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-arm64 ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-ppc64le ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-386 ./app/vmauth
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmauth-local-with-goarch
|
||||
|
||||
vmauth-local-with-goarch:
|
||||
APP_NAME=vmauth $(MAKE) app-local-with-goarch
|
||||
|
||||
vmauth-pure:
|
||||
APP_NAME=vmauth $(MAKE) app-local-pure
|
||||
|
||||
@@ -64,6 +64,9 @@ users:
|
||||
url_prefix: "http://vminsert:8480/insert/42/prometheus"
|
||||
```
|
||||
|
||||
The config may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
This may be useful for passing secrets to the config.
|
||||
|
||||
|
||||
### Security
|
||||
|
||||
@@ -110,11 +113,11 @@ Run `make package-vmauth`. It builds `victoriametrics/vmauth:<PKG_TAG>` docker i
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmauth`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
By default the image is built on top of [alpine](https://hub.docker.com/_/alpine) image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmauth
|
||||
ROOT_IMAGE=scratch make package-vmauth
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -82,20 +83,21 @@ var stopCh chan struct{}
|
||||
func readAuthConfig(path string) (map[string]*UserInfo, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read %q: %s", path, err)
|
||||
return nil, fmt.Errorf("cannot read %q: %w", path, err)
|
||||
}
|
||||
m, err := parseAuthConfig(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse %q: %s", path, err)
|
||||
return nil, fmt.Errorf("cannot parse %q: %w", path, err)
|
||||
}
|
||||
logger.Infof("Loaded information about %d users from %q", len(m), path)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
data = envtemplate.Replace(data)
|
||||
var ac AuthConfig
|
||||
if err := yaml.UnmarshalStrict(data, &ac); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %s", err)
|
||||
return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %w", err)
|
||||
}
|
||||
uis := ac.Users
|
||||
if len(uis) == 0 {
|
||||
@@ -115,7 +117,7 @@ func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
// Validate urlPrefix
|
||||
target, err := url.Parse(urlPrefix)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid `url_prefix: %q`: %s", urlPrefix, err)
|
||||
return nil, fmt.Errorf("invalid `url_prefix: %q`: %w", urlPrefix, err)
|
||||
}
|
||||
if target.Scheme != "http" && target.Scheme != "https" {
|
||||
return nil, fmt.Errorf("unsupported scheme for `url_prefix: %q`: %q; must be `http` or `https`", urlPrefix, target.Scheme)
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -27,6 +28,7 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
cgroup.UpdateGOMAXPROCSToCPUQuota()
|
||||
logger.Infof("starting vmauth at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
initAuthConfig()
|
||||
@@ -49,20 +51,21 @@ func main() {
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
username, password, ok := r.BasicAuth()
|
||||
if !ok {
|
||||
httpserver.Errorf(w, "Missing `Authorization: Basic *` header")
|
||||
w.Header().Set("WWW-Authenticate", `Basic realm="Restricted"`)
|
||||
http.Error(w, "missing `Authorization: Basic *` header", http.StatusUnauthorized)
|
||||
return true
|
||||
}
|
||||
ac := authConfig.Load().(map[string]*UserInfo)
|
||||
info := ac[username]
|
||||
if info == nil || info.Password != password {
|
||||
httpserver.Errorf(w, "Cannot find the provided username %q or password in config", username)
|
||||
httpserver.Errorf(w, r, "cannot find the provided username %q or password in config", username)
|
||||
return true
|
||||
}
|
||||
info.requests.Inc()
|
||||
|
||||
targetURL := createTargetURL(info.URLPrefix, r.URL)
|
||||
if _, err := url.Parse(targetURL); err != nil {
|
||||
httpserver.Errorf(w, "Invalid targetURL=%q: %s", targetURL, err)
|
||||
httpserver.Errorf(w, r, "invalid targetURL=%q: %s", targetURL, err)
|
||||
return true
|
||||
}
|
||||
r.Header.Set("vm-target-url", targetURL)
|
||||
|
||||
@@ -51,20 +51,23 @@ package-vmbackup-386:
|
||||
publish-vmbackup:
|
||||
APP_NAME=vmbackup $(MAKE) publish-via-docker
|
||||
|
||||
vmbackup-pure:
|
||||
APP_NAME=vmbackup $(MAKE) app-local-pure
|
||||
|
||||
vmbackup-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-amd64 ./app/vmbackup
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm64 ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-ppc64le ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-386 ./app/vmbackup
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmbackup-local-with-goarch
|
||||
|
||||
vmbackup-local-with-goarch:
|
||||
APP_NAME=vmbackup $(MAKE) app-local-with-goarch
|
||||
|
||||
vmbackup-pure:
|
||||
APP_NAME=vmbackup $(MAKE) app-local-pure
|
||||
|
||||
@@ -152,7 +152,9 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
@@ -161,8 +163,10 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond int
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
-memory.allowedBytes int
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-origin string
|
||||
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
|
||||
-snapshot.createURL string
|
||||
@@ -201,9 +205,9 @@ Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` dock
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
By default the image is built on top of [alpine](https://hub.docker.com/_/alpine) image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmbackup
|
||||
ROOT_IMAGE=scratch make package-vmbackup
|
||||
```
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
@@ -36,6 +37,8 @@ func main() {
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
cgroup.UpdateGOMAXPROCSToCPUQuota()
|
||||
|
||||
if len(*snapshotCreateURL) > 0 {
|
||||
logger.Infof("%s", "Snapshots enabled")
|
||||
@@ -110,12 +113,12 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
// Verify the snapshot exists.
|
||||
f, err := os.Open(snapshotPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot open snapshot at %q: %s", snapshotPath, err)
|
||||
return nil, fmt.Errorf("cannot open snapshot at %q: %w", snapshotPath, err)
|
||||
}
|
||||
fi, err := f.Stat()
|
||||
_ = f.Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot stat %q: %s", snapshotPath, err)
|
||||
return nil, fmt.Errorf("cannot stat %q: %w", snapshotPath, err)
|
||||
}
|
||||
if !fi.IsDir() {
|
||||
return nil, fmt.Errorf("snapshot %q must be a directory", snapshotPath)
|
||||
@@ -126,7 +129,7 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
MaxBytesPerSecond: *maxBytesPerSecond,
|
||||
}
|
||||
if err := fs.Init(); err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize fs: %s", err)
|
||||
return nil, fmt.Errorf("cannot initialize fs: %w", err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
@@ -134,7 +137,7 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
func newDstFS() (common.RemoteFS, error) {
|
||||
fs, err := actions.NewRemoteFS(*dst)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-dst`=%q: %s", *dst, err)
|
||||
return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
@@ -145,7 +148,7 @@ func newOriginFS() (common.RemoteFS, error) {
|
||||
}
|
||||
fs, err := actions.NewRemoteFS(*origin)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %s", *origin, err)
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
@@ -17,11 +18,14 @@ type InsertCtx struct {
|
||||
|
||||
mrs []storage.MetricRow
|
||||
metricNamesBuf []byte
|
||||
|
||||
relabelCtx relabel.Ctx
|
||||
}
|
||||
|
||||
// Reset resets ctx for future fill with rowsLen rows.
|
||||
func (ctx *InsertCtx) Reset(rowsLen int) {
|
||||
for _, label := range ctx.Labels {
|
||||
for i := range ctx.Labels {
|
||||
label := &ctx.Labels[i]
|
||||
label.Name = nil
|
||||
label.Value = nil
|
||||
}
|
||||
@@ -37,6 +41,7 @@ func (ctx *InsertCtx) Reset(rowsLen int) {
|
||||
}
|
||||
ctx.mrs = ctx.mrs[:0]
|
||||
ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
|
||||
ctx.relabelCtx.Reset()
|
||||
}
|
||||
|
||||
func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
|
||||
@@ -48,23 +53,23 @@ func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label)
|
||||
}
|
||||
|
||||
// WriteDataPoint writes (timestamp, value) with the given prefix and labels into ctx buffer.
|
||||
func (ctx *InsertCtx) WriteDataPoint(prefix []byte, labels []prompb.Label, timestamp int64, value float64) {
|
||||
func (ctx *InsertCtx) WriteDataPoint(prefix []byte, labels []prompb.Label, timestamp int64, value float64) error {
|
||||
metricNameRaw := ctx.marshalMetricNameRaw(prefix, labels)
|
||||
ctx.addRow(metricNameRaw, timestamp, value)
|
||||
return ctx.addRow(metricNameRaw, timestamp, value)
|
||||
}
|
||||
|
||||
// WriteDataPointExt writes (timestamp, value) with the given metricNameRaw and labels into ctx buffer.
|
||||
//
|
||||
// It returns metricNameRaw for the given labels if len(metricNameRaw) == 0.
|
||||
func (ctx *InsertCtx) WriteDataPointExt(metricNameRaw []byte, labels []prompb.Label, timestamp int64, value float64) []byte {
|
||||
func (ctx *InsertCtx) WriteDataPointExt(metricNameRaw []byte, labels []prompb.Label, timestamp int64, value float64) ([]byte, error) {
|
||||
if len(metricNameRaw) == 0 {
|
||||
metricNameRaw = ctx.marshalMetricNameRaw(nil, labels)
|
||||
}
|
||||
ctx.addRow(metricNameRaw, timestamp, value)
|
||||
return metricNameRaw
|
||||
err := ctx.addRow(metricNameRaw, timestamp, value)
|
||||
return metricNameRaw, err
|
||||
}
|
||||
|
||||
func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float64) {
|
||||
func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float64) error {
|
||||
mrs := ctx.mrs
|
||||
if cap(mrs) > len(mrs) {
|
||||
mrs = mrs[:len(mrs)+1]
|
||||
@@ -76,55 +81,64 @@ func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float6
|
||||
mr.MetricNameRaw = metricNameRaw
|
||||
mr.Timestamp = timestamp
|
||||
mr.Value = value
|
||||
if len(ctx.metricNamesBuf) > 16*1024*1024 {
|
||||
if err := ctx.FlushBufs(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddLabelBytes adds (name, value) label to ctx.Labels.
|
||||
//
|
||||
// name and value must exist until ctx.Labels is used.
|
||||
func (ctx *InsertCtx) AddLabelBytes(name, value []byte) {
|
||||
labels := ctx.Labels
|
||||
if cap(labels) > len(labels) {
|
||||
labels = labels[:len(labels)+1]
|
||||
} else {
|
||||
labels = append(labels, prompb.Label{})
|
||||
if len(value) == 0 {
|
||||
// Skip labels without values, since they have no sense.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/600
|
||||
// Do not skip labels with empty name, since they are equal to __name__.
|
||||
return
|
||||
}
|
||||
label := &labels[len(labels)-1]
|
||||
|
||||
// Do not copy name and value contents for performance reasons.
|
||||
// This reduces GC overhead on the number of objects and allocations.
|
||||
label.Name = name
|
||||
label.Value = value
|
||||
|
||||
ctx.Labels = labels
|
||||
ctx.Labels = append(ctx.Labels, prompb.Label{
|
||||
// Do not copy name and value contents for performance reasons.
|
||||
// This reduces GC overhead on the number of objects and allocations.
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
// AddLabel adds (name, value) label to ctx.Labels.
|
||||
//
|
||||
// name and value must exist until ctx.Labels is used.
|
||||
func (ctx *InsertCtx) AddLabel(name, value string) {
|
||||
labels := ctx.Labels
|
||||
if cap(labels) > len(labels) {
|
||||
labels = labels[:len(labels)+1]
|
||||
} else {
|
||||
labels = append(labels, prompb.Label{})
|
||||
if len(value) == 0 {
|
||||
// Skip labels without values, since they have no sense.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/600
|
||||
// Do not skip labels with empty name, since they are equal to __name__.
|
||||
return
|
||||
}
|
||||
label := &labels[len(labels)-1]
|
||||
ctx.Labels = append(ctx.Labels, prompb.Label{
|
||||
// Do not copy name and value contents for performance reasons.
|
||||
// This reduces GC overhead on the number of objects and allocations.
|
||||
Name: bytesutil.ToUnsafeBytes(name),
|
||||
Value: bytesutil.ToUnsafeBytes(value),
|
||||
})
|
||||
}
|
||||
|
||||
// Do not copy name and value contents for performance reasons.
|
||||
// This reduces GC overhead on the number of objects and allocations.
|
||||
label.Name = bytesutil.ToUnsafeBytes(name)
|
||||
label.Value = bytesutil.ToUnsafeBytes(value)
|
||||
|
||||
ctx.Labels = labels
|
||||
// ApplyRelabeling applies relabeling to ic.Labels.
|
||||
func (ctx *InsertCtx) ApplyRelabeling() {
|
||||
ctx.Labels = ctx.relabelCtx.ApplyRelabeling(ctx.Labels)
|
||||
}
|
||||
|
||||
// FlushBufs flushes buffered rows to the underlying storage.
|
||||
func (ctx *InsertCtx) FlushBufs() error {
|
||||
if err := vmstorage.AddRows(ctx.mrs); err != nil {
|
||||
return &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot store metrics: %s", err),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
err := vmstorage.AddRows(ctx.mrs)
|
||||
ctx.Reset(0)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot store metrics: %w", err),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -28,6 +29,7 @@ func insertRows(rows []parser.Row) error {
|
||||
defer common.PutInsertCtx(ctx)
|
||||
|
||||
ctx.Reset(len(rows))
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
@@ -36,7 +38,16 @@ func insertRows(rows []parser.Row) error {
|
||||
tag := &r.Tags[j]
|
||||
ctx.AddLabel(tag.Key, tag.Value)
|
||||
}
|
||||
ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value)
|
||||
if hasRelabeling {
|
||||
ctx.ApplyRelabeling()
|
||||
}
|
||||
if len(ctx.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
if err := ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"io"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -28,6 +29,7 @@ func insertRows(rows []parser.Row) error {
|
||||
defer common.PutInsertCtx(ctx)
|
||||
|
||||
ctx.Reset(len(rows))
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
@@ -36,7 +38,16 @@ func insertRows(rows []parser.Row) error {
|
||||
tag := &r.Tags[j]
|
||||
ctx.AddLabel(tag.Key, tag.Value)
|
||||
}
|
||||
ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value)
|
||||
if hasRelabeling {
|
||||
ctx.ApplyRelabeling()
|
||||
}
|
||||
if len(ctx.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
if err := ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
|
||||
@@ -8,7 +8,9 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
@@ -18,6 +20,7 @@ import (
|
||||
var (
|
||||
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol")
|
||||
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field")
|
||||
skipMeasurement = flag.Bool("influxSkipMeasurement", false, "Uses '{field_name}' as a metric name while ignoring '{measurement}' and '-influxMeasurementFieldSeparator'")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -59,36 +62,68 @@ func insertRows(db string, rows []parser.Row) error {
|
||||
ic := &ctx.Common
|
||||
ic.Reset(rowsLen)
|
||||
rowsTotal := 0
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
ic.Labels = ic.Labels[:0]
|
||||
hasDBLabel := false
|
||||
hasDBKey := false
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
if tag.Key == "db" {
|
||||
hasDBLabel = true
|
||||
hasDBKey = true
|
||||
}
|
||||
ic.AddLabel(tag.Key, tag.Value)
|
||||
}
|
||||
if len(db) > 0 && !hasDBLabel {
|
||||
if !hasDBKey {
|
||||
ic.AddLabel("db", db)
|
||||
}
|
||||
ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
|
||||
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
|
||||
if !*skipMeasurement {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, r.Measurement...)
|
||||
}
|
||||
skipFieldKey := len(r.Fields) == 1 && *skipSingleField
|
||||
if len(ctx.metricGroupBuf) > 0 && !skipFieldKey {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf, *measurementFieldSeparator...)
|
||||
}
|
||||
metricGroupPrefixLen := len(ctx.metricGroupBuf)
|
||||
for j := range r.Fields {
|
||||
f := &r.Fields[j]
|
||||
if !skipFieldKey {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
|
||||
if hasRelabeling {
|
||||
ctx.originLabels = append(ctx.originLabels[:0], ic.Labels...)
|
||||
for j := range r.Fields {
|
||||
f := &r.Fields[j]
|
||||
if !skipFieldKey {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
|
||||
}
|
||||
metricGroup := bytesutil.ToUnsafeString(ctx.metricGroupBuf)
|
||||
ic.Labels = append(ic.Labels[:0], ctx.originLabels...)
|
||||
ic.AddLabel("", metricGroup)
|
||||
ic.ApplyRelabeling()
|
||||
if len(ic.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
if err := ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, f.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
|
||||
labelsLen := len(ic.Labels)
|
||||
for j := range r.Fields {
|
||||
f := &r.Fields[j]
|
||||
if !skipFieldKey {
|
||||
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
|
||||
}
|
||||
metricGroup := bytesutil.ToUnsafeString(ctx.metricGroupBuf)
|
||||
ic.Labels = ic.Labels[:labelsLen]
|
||||
ic.AddLabel("", metricGroup)
|
||||
if len(ic.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
if err := ic.WriteDataPoint(ctx.metricNameBuf, ic.Labels[len(ic.Labels)-1:], r.Timestamp, f.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
metricGroup := bytesutil.ToUnsafeString(ctx.metricGroupBuf)
|
||||
ic.Labels = ic.Labels[:0]
|
||||
ic.AddLabel("", metricGroup)
|
||||
ic.WriteDataPoint(ctx.metricNameBuf, ic.Labels[:1], r.Timestamp, f.Value)
|
||||
}
|
||||
rowsTotal += len(r.Fields)
|
||||
}
|
||||
@@ -101,12 +136,21 @@ type pushCtx struct {
|
||||
Common common.InsertCtx
|
||||
metricNameBuf []byte
|
||||
metricGroupBuf []byte
|
||||
originLabels []prompb.Label
|
||||
}
|
||||
|
||||
func (ctx *pushCtx) reset() {
|
||||
ctx.Common.Reset(0)
|
||||
ctx.metricNameBuf = ctx.metricNameBuf[:0]
|
||||
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
|
||||
|
||||
originLabels := ctx.originLabels
|
||||
for i := range originLabels {
|
||||
label := &originLabels[i]
|
||||
label.Name = nil
|
||||
label.Value = nil
|
||||
}
|
||||
ctx.originLabels = ctx.originLabels[:0]
|
||||
}
|
||||
|
||||
func getPushCtx() *pushCtx {
|
||||
|
||||
@@ -12,8 +12,10 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheusimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prompush"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
|
||||
@@ -29,7 +31,8 @@ import (
|
||||
|
||||
var (
|
||||
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty")
|
||||
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty. "+
|
||||
"This flag isn't needed when ingesting data over HTTP - just send it to `http://<victoriametrics>:8428/write`")
|
||||
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
"Usually :4242 must be set. Doesn't work if empty")
|
||||
@@ -46,6 +49,7 @@ var (
|
||||
|
||||
// Init initializes vminsert.
|
||||
func Init() {
|
||||
relabel.Init()
|
||||
storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)
|
||||
|
||||
writeconcurrencylimiter.Init()
|
||||
@@ -89,7 +93,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -98,7 +102,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
vmimportRequests.Inc()
|
||||
if err := vmimport.InsertHandler(r); err != nil {
|
||||
vmimportErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -107,7 +111,16 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
csvimportRequests.Inc()
|
||||
if err := csvimport.InsertHandler(r); err != nil {
|
||||
csvimportErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/api/v1/import/prometheus":
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(r); err != nil {
|
||||
prometheusimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -116,7 +129,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
influxWriteRequests.Inc()
|
||||
if err := influx.InsertHandlerForHTTP(r); err != nil {
|
||||
influxWriteErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -153,6 +166,9 @@ var (
|
||||
csvimportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/import/csv", protocol="csvimport"}`)
|
||||
csvimportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/import/csv", protocol="csvimport"}`)
|
||||
|
||||
prometheusimportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/import/prometheus", protocol="prometheusimport"}`)
|
||||
prometheusimportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/import/prometheus", protocol="prometheusimport"}`)
|
||||
|
||||
influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/write", protocol="influx"}`)
|
||||
influxWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/write", protocol="influx"}`)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"io"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -28,6 +29,7 @@ func insertRows(rows []parser.Row) error {
|
||||
defer common.PutInsertCtx(ctx)
|
||||
|
||||
ctx.Reset(len(rows))
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
@@ -36,7 +38,16 @@ func insertRows(rows []parser.Row) error {
|
||||
tag := &r.Tags[j]
|
||||
ctx.AddLabel(tag.Key, tag.Value)
|
||||
}
|
||||
ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value)
|
||||
if hasRelabeling {
|
||||
ctx.ApplyRelabeling()
|
||||
}
|
||||
if len(ctx.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
if err := ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -34,6 +35,7 @@ func insertRows(rows []parser.Row) error {
|
||||
defer common.PutInsertCtx(ctx)
|
||||
|
||||
ctx.Reset(len(rows))
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
@@ -42,7 +44,16 @@ func insertRows(rows []parser.Row) error {
|
||||
tag := &r.Tags[j]
|
||||
ctx.AddLabel(tag.Key, tag.Value)
|
||||
}
|
||||
ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value)
|
||||
if hasRelabeling {
|
||||
ctx.ApplyRelabeling()
|
||||
}
|
||||
if len(ctx.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
if err := ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
|
||||
54
app/vminsert/prometheusimport/request_handler.go
Normal file
54
app/vminsert/prometheusimport/request_handler.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package prometheusimport
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="prometheus"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="prometheus"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes `/api/v1/import/prometheus` request.
|
||||
func InsertHandler(req *http.Request) error {
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzipped, insertRows)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
ctx := common.GetInsertCtx()
|
||||
defer common.PutInsertCtx(ctx)
|
||||
|
||||
ctx.Reset(len(rows))
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
ctx.AddLabel("", r.Metric)
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
ctx.AddLabel(tag.Key, tag.Value)
|
||||
}
|
||||
if hasRelabeling {
|
||||
ctx.ApplyRelabeling()
|
||||
}
|
||||
if len(ctx.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
if err := ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return ctx.FlushBufs()
|
||||
}
|
||||
@@ -1,13 +1,8 @@
|
||||
package prompush
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -21,8 +16,8 @@ const maxRowsPerBlock = 10000
|
||||
|
||||
// Push pushes wr to storage.
|
||||
func Push(wr *prompbmarshal.WriteRequest) {
|
||||
ctx := getPushCtx()
|
||||
defer putPushCtx(ctx)
|
||||
ctx := common.GetInsertCtx()
|
||||
defer common.PutInsertCtx(ctx)
|
||||
|
||||
tss := wr.Timeseries
|
||||
for len(tss) > 0 {
|
||||
@@ -34,80 +29,44 @@ func Push(wr *prompbmarshal.WriteRequest) {
|
||||
} else {
|
||||
tss = nil
|
||||
}
|
||||
ctx.push(tssBlock)
|
||||
push(ctx, tssBlock)
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *pushCtx) push(tss []prompbmarshal.TimeSeries) {
|
||||
func push(ctx *common.InsertCtx, tss []prompbmarshal.TimeSeries) {
|
||||
rowsLen := 0
|
||||
for i := range tss {
|
||||
rowsLen += len(tss[i].Samples)
|
||||
}
|
||||
ic := &ctx.Common
|
||||
ic.Reset(rowsLen)
|
||||
ctx.Reset(rowsLen)
|
||||
rowsTotal := 0
|
||||
labels := ctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labels = labels[:0]
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
for j := range ts.Labels {
|
||||
label := &ts.Labels[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: bytesutil.ToUnsafeBytes(label.Name),
|
||||
Value: bytesutil.ToUnsafeBytes(label.Value),
|
||||
})
|
||||
ctx.AddLabel(label.Name, label.Value)
|
||||
}
|
||||
ctx.ApplyRelabeling()
|
||||
if len(ctx.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
var metricNameRaw []byte
|
||||
var err error
|
||||
for i := range ts.Samples {
|
||||
r := &ts.Samples[i]
|
||||
metricNameRaw = ic.WriteDataPointExt(metricNameRaw, labels, r.Timestamp, r.Value)
|
||||
metricNameRaw, err = ctx.WriteDataPointExt(metricNameRaw, ctx.Labels, r.Timestamp, r.Value)
|
||||
if err != nil {
|
||||
logger.Errorf("cannot write promscape data to storage: %s", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
rowsTotal += len(ts.Samples)
|
||||
}
|
||||
ctx.labels = labels
|
||||
rowsInserted.Add(rowsTotal)
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
if err := ic.FlushBufs(); err != nil {
|
||||
if err := ctx.FlushBufs(); err != nil {
|
||||
logger.Errorf("cannot flush promscrape data to storage: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
type pushCtx struct {
|
||||
Common common.InsertCtx
|
||||
labels []prompb.Label
|
||||
}
|
||||
|
||||
func (ctx *pushCtx) reset() {
|
||||
ctx.Common.Reset(0)
|
||||
|
||||
for i := range ctx.labels {
|
||||
label := &ctx.labels[i]
|
||||
label.Name = nil
|
||||
label.Value = nil
|
||||
}
|
||||
ctx.labels = ctx.labels[:0]
|
||||
}
|
||||
|
||||
func getPushCtx() *pushCtx {
|
||||
select {
|
||||
case ctx := <-pushCtxPoolCh:
|
||||
return ctx
|
||||
default:
|
||||
if v := pushCtxPool.Get(); v != nil {
|
||||
return v.(*pushCtx)
|
||||
}
|
||||
return &pushCtx{}
|
||||
}
|
||||
}
|
||||
|
||||
func putPushCtx(ctx *pushCtx) {
|
||||
ctx.reset()
|
||||
select {
|
||||
case pushCtxPoolCh <- ctx:
|
||||
default:
|
||||
pushCtxPool.Put(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
var pushCtxPool sync.Pool
|
||||
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
@@ -32,12 +33,29 @@ func insertRows(timeseries []prompb.TimeSeries) error {
|
||||
}
|
||||
ctx.Reset(rowsLen)
|
||||
rowsTotal := 0
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range timeseries {
|
||||
ts := ×eries[i]
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
srcLabels := ts.Labels
|
||||
for _, srcLabel := range srcLabels {
|
||||
ctx.AddLabelBytes(srcLabel.Name, srcLabel.Value)
|
||||
}
|
||||
if hasRelabeling {
|
||||
ctx.ApplyRelabeling()
|
||||
}
|
||||
if len(ctx.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
var metricNameRaw []byte
|
||||
var err error
|
||||
for i := range ts.Samples {
|
||||
r := &ts.Samples[i]
|
||||
metricNameRaw = ctx.WriteDataPointExt(metricNameRaw, ts.Labels, r.Timestamp, r.Value)
|
||||
metricNameRaw, err = ctx.WriteDataPointExt(metricNameRaw, ctx.Labels, r.Timestamp, r.Value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rowsTotal += len(ts.Samples)
|
||||
}
|
||||
|
||||
127
app/vminsert/relabel/relabel.go
Normal file
127
app/vminsert/relabel/relabel.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package relabel
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var relabelConfig = flag.String("relabelConfig", "", "Optional path to a file with relabeling rules, which are applied to all the ingested metrics. "+
|
||||
"See https://victoriametrics.github.io/#relabeling for details")
|
||||
|
||||
// Init must be called after flag.Parse and before using the relabel package.
|
||||
func Init() {
|
||||
prcs, err := loadRelabelConfig()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot load relabelConfig: %s", err)
|
||||
}
|
||||
prcsGlobal.Store(&prcs)
|
||||
if len(*relabelConfig) == 0 {
|
||||
return
|
||||
}
|
||||
sighupCh := procutil.NewSighupChan()
|
||||
go func() {
|
||||
for range sighupCh {
|
||||
logger.Infof("received SIGHUP; reloading -relabelConfig=%q...", *relabelConfig)
|
||||
prcs, err := loadRelabelConfig()
|
||||
if err != nil {
|
||||
logger.Errorf("cannot load the updated relabelConfig: %s; preserving the previous config", err)
|
||||
continue
|
||||
}
|
||||
prcsGlobal.Store(&prcs)
|
||||
logger.Infof("successfully reloaded -relabelConfig=%q", *relabelConfig)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
var prcsGlobal atomic.Value
|
||||
|
||||
func loadRelabelConfig() ([]promrelabel.ParsedRelabelConfig, error) {
|
||||
if len(*relabelConfig) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
prcs, err := promrelabel.LoadRelabelConfigs(*relabelConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when reading -relabelConfig=%q: %w", *relabelConfig, err)
|
||||
}
|
||||
return prcs, nil
|
||||
}
|
||||
|
||||
// HasRelabeling returns true if there is global relabeling.
|
||||
func HasRelabeling() bool {
|
||||
prcs := prcsGlobal.Load().(*[]promrelabel.ParsedRelabelConfig)
|
||||
return len(*prcs) > 0
|
||||
}
|
||||
|
||||
// Ctx holds relabeling context.
|
||||
type Ctx struct {
|
||||
// tmpLabels is used during ApplyRelabeling call.
|
||||
tmpLabels []prompbmarshal.Label
|
||||
}
|
||||
|
||||
// Reset resets ctx.
|
||||
func (ctx *Ctx) Reset() {
|
||||
labels := ctx.tmpLabels
|
||||
for i := range labels {
|
||||
label := &labels[i]
|
||||
label.Name = ""
|
||||
label.Value = ""
|
||||
}
|
||||
ctx.tmpLabels = ctx.tmpLabels[:0]
|
||||
}
|
||||
|
||||
// ApplyRelabeling applies relabeling to the given labels and returns the result.
|
||||
//
|
||||
// The returned labels are valid until the next call to ApplyRelabeling.
|
||||
func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label {
|
||||
prcs := prcsGlobal.Load().(*[]promrelabel.ParsedRelabelConfig)
|
||||
if len(*prcs) == 0 {
|
||||
// There are no relabeling rules.
|
||||
return labels
|
||||
}
|
||||
// Convert src to prompbmarshal.Label format suitable for relabeling.
|
||||
tmpLabels := ctx.tmpLabels[:0]
|
||||
for _, label := range labels {
|
||||
name := bytesutil.ToUnsafeString(label.Name)
|
||||
if len(name) == 0 {
|
||||
name = "__name__"
|
||||
}
|
||||
value := bytesutil.ToUnsafeString(label.Value)
|
||||
tmpLabels = append(tmpLabels, prompbmarshal.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
// Apply relabeling
|
||||
tmpLabels = promrelabel.ApplyRelabelConfigs(tmpLabels, 0, *prcs, true)
|
||||
ctx.tmpLabels = tmpLabels
|
||||
if len(tmpLabels) == 0 {
|
||||
metricsDropped.Inc()
|
||||
}
|
||||
|
||||
// Return back labels to the desired format.
|
||||
dst := labels[:0]
|
||||
for _, label := range tmpLabels {
|
||||
name := bytesutil.ToUnsafeBytes(label.Name)
|
||||
if label.Name == "__name__" {
|
||||
name = nil
|
||||
}
|
||||
value := bytesutil.ToUnsafeBytes(label.Value)
|
||||
dst = append(dst, prompb.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
var metricsDropped = metrics.NewCounter(`vm_relabel_metrics_dropped_total`)
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
@@ -37,6 +38,7 @@ func insertRows(rows []parser.Row) error {
|
||||
ic := &ctx.Common
|
||||
ic.Reset(rowsLen)
|
||||
rowsTotal := 0
|
||||
hasRelabeling := relabel.HasRelabeling()
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
ic.Labels = ic.Labels[:0]
|
||||
@@ -44,13 +46,22 @@ func insertRows(rows []parser.Row) error {
|
||||
tag := &r.Tags[j]
|
||||
ic.AddLabelBytes(tag.Key, tag.Value)
|
||||
}
|
||||
if hasRelabeling {
|
||||
ic.ApplyRelabeling()
|
||||
}
|
||||
if len(ic.Labels) == 0 {
|
||||
// Skip metric without labels.
|
||||
continue
|
||||
}
|
||||
ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
|
||||
values := r.Values
|
||||
timestamps := r.Timestamps
|
||||
_ = timestamps[len(values)-1]
|
||||
for j, value := range values {
|
||||
timestamp := timestamps[j]
|
||||
ic.WriteDataPoint(ctx.metricNameBuf, nil, timestamp, value)
|
||||
if err := ic.WriteDataPoint(ctx.metricNameBuf, nil, timestamp, value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rowsTotal += len(values)
|
||||
}
|
||||
|
||||
@@ -51,20 +51,23 @@ package-vmrestore-386:
|
||||
publish-vmrestore:
|
||||
APP_NAME=vmrestore $(MAKE) publish-via-docker
|
||||
|
||||
vmrestore-pure:
|
||||
APP_NAME=vmrestore $(MAKE) app-local-pure
|
||||
|
||||
vmrestore-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-amd64 ./app/vmrestore
|
||||
CGO_ENABLED=1 GOARCH=amd64 $(MAKE) vmrestore-local-with-goarch
|
||||
|
||||
vmrestore-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-arm ./app/vmrestore
|
||||
CGO_ENABLED=0 GOARCH=arm $(MAKE) vmrestore-local-with-goarch
|
||||
|
||||
vmrestore-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-arm64 ./app/vmrestore
|
||||
CGO_ENABLED=0 GOARCH=arm64 $(MAKE) vmrestore-local-with-goarch
|
||||
|
||||
vmrestore-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-ppc64le ./app/vmrestore
|
||||
CGO_ENABLED=0 GOARCH=ppc64le $(MAKE) vmrestore-local-with-goarch
|
||||
|
||||
vmrestore-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-386 ./app/vmrestore
|
||||
CGO_ENABLED=0 GOARCH=386 $(MAKE) vmrestore-local-with-goarch
|
||||
|
||||
vmrestore-local-with-goarch:
|
||||
APP_NAME=vmrestore $(MAKE) app-local-with-goarch
|
||||
|
||||
vmrestore-pure:
|
||||
APP_NAME=vmrestore $(MAKE) app-local-pure
|
||||
|
||||
@@ -53,7 +53,9 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot data files bigger than 2^32 bytes in memory
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit (default 10)
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
@@ -62,15 +64,17 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond int
|
||||
The maximum download speed. There is no limit if it is set to 0
|
||||
-memory.allowedBytes int
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to non-zero value. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-skipBackupCompleteCheck
|
||||
Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file
|
||||
-src string
|
||||
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-storageDataPath string
|
||||
Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir is synchronized with -src contents, i.e. it works like 'rsync --delete' (default "victoria-metrics-data")
|
||||
-version
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
@@ -98,9 +102,9 @@ Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` do
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
By default the image is built on top of [alpine](https://hub.docker.com/_/alpine) image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmrestore
|
||||
ROOT_IMAGE=scratch make package-vmrestore
|
||||
```
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
@@ -30,6 +31,8 @@ func main() {
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
cgroup.UpdateGOMAXPROCSToCPUQuota()
|
||||
|
||||
srcFS, err := newSrcFS()
|
||||
if err != nil {
|
||||
@@ -71,7 +74,7 @@ func newDstFS() (*fslocal.FS, error) {
|
||||
MaxBytesPerSecond: *maxBytesPerSecond,
|
||||
}
|
||||
if err := fs.Init(); err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize local fs: %s", err)
|
||||
return nil, fmt.Errorf("cannot initialize local fs: %w", err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
@@ -79,7 +82,7 @@ func newDstFS() (*fslocal.FS, error) {
|
||||
func newSrcFS() (common.RemoteFS, error) {
|
||||
fs, err := actions.NewRemoteFS(*src)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-src`=%q: %s", *src, err)
|
||||
return nil, fmt.Errorf("cannot parse `-src`=%q: %w", *src, err)
|
||||
}
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package vmselect
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
@@ -89,7 +90,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
*maxConcurrentRequests, *maxQueueDuration),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, "%s", err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -175,18 +176,22 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
return true
|
||||
case "/api/v1/status/tsdb":
|
||||
tsdbStatusRequests.Inc()
|
||||
statusTSDBRequests.Inc()
|
||||
if err := prometheus.TSDBStatusHandler(startTime, w, r); err != nil {
|
||||
tsdbStatusErrors.Inc()
|
||||
statusTSDBErrors.Inc()
|
||||
sendPrometheusError(w, r, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
case "/api/v1/status/active_queries":
|
||||
statusActiveQueriesRequests.Inc()
|
||||
promql.WriteActiveQueries(w)
|
||||
return true
|
||||
case "/api/v1/export":
|
||||
exportRequests.Inc()
|
||||
if err := prometheus.ExportHandler(startTime, w, r); err != nil {
|
||||
exportErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
@@ -194,7 +199,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
federateRequests.Inc()
|
||||
if err := prometheus.FederateHandler(startTime, w, r); err != nil {
|
||||
federateErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
@@ -220,12 +225,12 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
deleteRequests.Inc()
|
||||
authKey := r.FormValue("authKey")
|
||||
if authKey != *deleteAuthKey {
|
||||
httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
|
||||
httpserver.Errorf(w, r, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
|
||||
return true
|
||||
}
|
||||
if err := prometheus.DeleteHandler(startTime, r); err != nil {
|
||||
deleteErrors.Inc()
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
@@ -240,7 +245,8 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
statusCode := http.StatusUnprocessableEntity
|
||||
if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {
|
||||
var esc *httpserver.ErrorWithStatusCode
|
||||
if errors.As(err, &esc) {
|
||||
statusCode = esc.StatusCode
|
||||
}
|
||||
w.WriteHeader(statusCode)
|
||||
@@ -269,8 +275,10 @@ var (
|
||||
labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels/count"}`)
|
||||
labelsCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels/count"}`)
|
||||
|
||||
tsdbStatusRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/tsdb"}`)
|
||||
tsdbStatusErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/tsdb"}`)
|
||||
statusTSDBRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/tsdb"}`)
|
||||
statusTSDBErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/tsdb"}`)
|
||||
|
||||
statusActiveQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/active_queries"}`)
|
||||
|
||||
deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
|
||||
deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)
|
||||
|
||||
@@ -2,6 +2,7 @@ package netstorage
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"runtime"
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -71,7 +73,7 @@ func (rss *Results) mustClose() {
|
||||
rss.sr = nil
|
||||
}
|
||||
|
||||
var timeseriesWorkCh = make(chan *timeseriesWork, gomaxprocs)
|
||||
var timeseriesWorkCh = make(chan *timeseriesWork, gomaxprocs*16)
|
||||
|
||||
type timeseriesWork struct {
|
||||
rss *Results
|
||||
@@ -93,12 +95,12 @@ func timeseriesWorker(workerID uint) {
|
||||
var rsLastResetTime uint64
|
||||
for tsw := range timeseriesWorkCh {
|
||||
rss := tsw.rss
|
||||
if time.Until(rss.deadline.Deadline) < 0 {
|
||||
if rss.deadline.Exceeded() {
|
||||
tsw.doneCh <- fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
|
||||
continue
|
||||
}
|
||||
if err := tsw.pts.Unpack(&rs, rss.tr, rss.fetchData); err != nil {
|
||||
tsw.doneCh <- fmt.Errorf("error during time series unpacking: %s", err)
|
||||
tsw.doneCh <- fmt.Errorf("error during time series unpacking: %w", err)
|
||||
continue
|
||||
}
|
||||
if len(rs.Timestamps) > 0 || !rss.fetchData {
|
||||
@@ -115,7 +117,7 @@ func timeseriesWorker(workerID uint) {
|
||||
}
|
||||
}
|
||||
|
||||
// RunParallel runs in parallel f for all the results from rss.
|
||||
// RunParallel runs f in parallel for all the results from rss.
|
||||
//
|
||||
// f shouldn't hold references to rs after returning.
|
||||
// workerID is the id of the worker goroutine that calls f.
|
||||
@@ -166,16 +168,69 @@ type packedTimeseries struct {
|
||||
brs []storage.BlockRef
|
||||
}
|
||||
|
||||
var unpackWorkCh = make(chan *unpackWork, gomaxprocs)
|
||||
var unpackWorkCh = make(chan *unpackWork, gomaxprocs*128)
|
||||
|
||||
type unpackWorkItem struct {
|
||||
br storage.BlockRef
|
||||
tr storage.TimeRange
|
||||
}
|
||||
|
||||
type unpackWork struct {
|
||||
br storage.BlockRef
|
||||
tr storage.TimeRange
|
||||
ws []unpackWorkItem
|
||||
fetchData bool
|
||||
sbs []*sortBlock
|
||||
doneCh chan error
|
||||
sb *sortBlock
|
||||
}
|
||||
|
||||
func (upw *unpackWork) reset() {
|
||||
ws := upw.ws
|
||||
for i := range ws {
|
||||
w := &ws[i]
|
||||
w.br = storage.BlockRef{}
|
||||
w.tr = storage.TimeRange{}
|
||||
}
|
||||
upw.ws = upw.ws[:0]
|
||||
upw.fetchData = false
|
||||
sbs := upw.sbs
|
||||
for i := range sbs {
|
||||
sbs[i] = nil
|
||||
}
|
||||
upw.sbs = upw.sbs[:0]
|
||||
if n := len(upw.doneCh); n > 0 {
|
||||
logger.Panicf("BUG: upw.doneCh must be empty; it contains %d items now", n)
|
||||
}
|
||||
}
|
||||
|
||||
func (upw *unpackWork) unpack() {
|
||||
for _, w := range upw.ws {
|
||||
sb := getSortBlock()
|
||||
if err := sb.unpackFrom(w.br, w.tr, upw.fetchData); err != nil {
|
||||
putSortBlock(sb)
|
||||
upw.doneCh <- fmt.Errorf("cannot unpack block: %w", err)
|
||||
return
|
||||
}
|
||||
upw.sbs = append(upw.sbs, sb)
|
||||
}
|
||||
upw.doneCh <- nil
|
||||
}
|
||||
|
||||
func getUnpackWork() *unpackWork {
|
||||
v := unpackWorkPool.Get()
|
||||
if v != nil {
|
||||
return v.(*unpackWork)
|
||||
}
|
||||
return &unpackWork{
|
||||
doneCh: make(chan error, 1),
|
||||
}
|
||||
}
|
||||
|
||||
func putUnpackWork(upw *unpackWork) {
|
||||
upw.reset()
|
||||
unpackWorkPool.Put(upw)
|
||||
}
|
||||
|
||||
var unpackWorkPool sync.Pool
|
||||
|
||||
func init() {
|
||||
for i := 0; i < gomaxprocs; i++ {
|
||||
go unpackWorker()
|
||||
@@ -184,37 +239,41 @@ func init() {
|
||||
|
||||
func unpackWorker() {
|
||||
for upw := range unpackWorkCh {
|
||||
sb := getSortBlock()
|
||||
if err := sb.unpackFrom(upw.br, upw.tr, upw.fetchData); err != nil {
|
||||
putSortBlock(sb)
|
||||
upw.doneCh <- fmt.Errorf("cannot unpack block: %s", err)
|
||||
continue
|
||||
}
|
||||
upw.sb = sb
|
||||
upw.doneCh <- nil
|
||||
upw.unpack()
|
||||
}
|
||||
}
|
||||
|
||||
// unpackBatchSize is the maximum number of blocks that may be unpacked at once by a single goroutine.
|
||||
//
|
||||
// This batch is needed in order to reduce contention for upackWorkCh in multi-CPU system.
|
||||
var unpackBatchSize = 8 * runtime.GOMAXPROCS(-1)
|
||||
|
||||
// Unpack unpacks pts to dst.
|
||||
func (pts *packedTimeseries) Unpack(dst *Result, tr storage.TimeRange, fetchData bool) error {
|
||||
dst.reset()
|
||||
|
||||
if err := dst.MetricName.Unmarshal(bytesutil.ToUnsafeBytes(pts.metricName)); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal metricName %q: %s", pts.metricName, err)
|
||||
return fmt.Errorf("cannot unmarshal metricName %q: %w", pts.metricName, err)
|
||||
}
|
||||
|
||||
// Feed workers with work
|
||||
upws := make([]*unpackWork, len(pts.brs))
|
||||
for i, br := range pts.brs {
|
||||
upw := &unpackWork{
|
||||
br: br,
|
||||
tr: tr,
|
||||
fetchData: fetchData,
|
||||
doneCh: make(chan error, 1),
|
||||
upws := make([]*unpackWork, 0, 1+len(pts.brs)/unpackBatchSize)
|
||||
upw := getUnpackWork()
|
||||
upw.fetchData = fetchData
|
||||
for _, br := range pts.brs {
|
||||
if len(upw.ws) >= unpackBatchSize {
|
||||
unpackWorkCh <- upw
|
||||
upws = append(upws, upw)
|
||||
upw = getUnpackWork()
|
||||
upw.fetchData = fetchData
|
||||
}
|
||||
unpackWorkCh <- upw
|
||||
upws[i] = upw
|
||||
upw.ws = append(upw.ws, unpackWorkItem{
|
||||
br: br,
|
||||
tr: tr,
|
||||
})
|
||||
}
|
||||
unpackWorkCh <- upw
|
||||
upws = append(upws, upw)
|
||||
pts.brs = pts.brs[:0]
|
||||
|
||||
// Wait until work is complete
|
||||
@@ -226,10 +285,13 @@ func (pts *packedTimeseries) Unpack(dst *Result, tr storage.TimeRange, fetchData
|
||||
firstErr = err
|
||||
}
|
||||
if firstErr == nil {
|
||||
sbs = append(sbs, upw.sb)
|
||||
sbs = append(sbs, upw.sbs...)
|
||||
} else {
|
||||
putSortBlock(upw.sb)
|
||||
for _, sb := range upw.sbs {
|
||||
putSortBlock(sb)
|
||||
}
|
||||
}
|
||||
putUnpackWork(upw)
|
||||
}
|
||||
if firstErr != nil {
|
||||
return firstErr
|
||||
@@ -329,7 +391,7 @@ func (sb *sortBlock) unpackFrom(br storage.BlockRef, tr storage.TimeRange, fetch
|
||||
br.MustReadBlock(&sb.b, fetchData)
|
||||
if fetchData {
|
||||
if err := sb.b.UnmarshalData(); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal block: %s", err)
|
||||
return fmt.Errorf("cannot unmarshal block: %w", err)
|
||||
}
|
||||
}
|
||||
timestamps := sb.b.Timestamps()
|
||||
@@ -396,9 +458,12 @@ func DeleteSeries(sq *storage.SearchQuery) (int, error) {
|
||||
|
||||
// GetLabels returns labels until the given deadline.
|
||||
func GetLabels(deadline Deadline) ([]string, error) {
|
||||
labels, err := vmstorage.SearchTagKeys(*maxTagKeysPerSearch)
|
||||
if deadline.Exceeded() {
|
||||
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
|
||||
}
|
||||
labels, err := vmstorage.SearchTagKeys(*maxTagKeysPerSearch, deadline.deadline)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error during labels search: %s", err)
|
||||
return nil, fmt.Errorf("error during labels search: %w", err)
|
||||
}
|
||||
|
||||
// Substitute "" with "__name__"
|
||||
@@ -417,14 +482,17 @@ func GetLabels(deadline Deadline) ([]string, error) {
|
||||
// GetLabelValues returns label values for the given labelName
|
||||
// until the given deadline.
|
||||
func GetLabelValues(labelName string, deadline Deadline) ([]string, error) {
|
||||
if deadline.Exceeded() {
|
||||
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
|
||||
}
|
||||
if labelName == "__name__" {
|
||||
labelName = ""
|
||||
}
|
||||
|
||||
// Search for tag values
|
||||
labelValues, err := vmstorage.SearchTagValues([]byte(labelName), *maxTagValuesPerSearch)
|
||||
labelValues, err := vmstorage.SearchTagValues([]byte(labelName), *maxTagValuesPerSearch, deadline.deadline)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error during label values search for labelName=%q: %s", labelName, err)
|
||||
return nil, fmt.Errorf("error during label values search for labelName=%q: %w", labelName, err)
|
||||
}
|
||||
|
||||
// Sort labelValues like Prometheus does
|
||||
@@ -435,9 +503,12 @@ func GetLabelValues(labelName string, deadline Deadline) ([]string, error) {
|
||||
|
||||
// GetLabelEntries returns all the label entries until the given deadline.
|
||||
func GetLabelEntries(deadline Deadline) ([]storage.TagEntry, error) {
|
||||
labelEntries, err := vmstorage.SearchTagEntries(*maxTagKeysPerSearch, *maxTagValuesPerSearch)
|
||||
if deadline.Exceeded() {
|
||||
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
|
||||
}
|
||||
labelEntries, err := vmstorage.SearchTagEntries(*maxTagKeysPerSearch, *maxTagValuesPerSearch, deadline.deadline)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error during label entries request: %s", err)
|
||||
return nil, fmt.Errorf("error during label entries request: %w", err)
|
||||
}
|
||||
|
||||
// Substitute "" with "__name__"
|
||||
@@ -462,18 +533,24 @@ func GetLabelEntries(deadline Deadline) ([]storage.TagEntry, error) {
|
||||
|
||||
// GetTSDBStatusForDate returns tsdb status according to https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
|
||||
func GetTSDBStatusForDate(deadline Deadline, date uint64, topN int) (*storage.TSDBStatus, error) {
|
||||
status, err := vmstorage.GetTSDBStatusForDate(date, topN)
|
||||
if deadline.Exceeded() {
|
||||
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
|
||||
}
|
||||
status, err := vmstorage.GetTSDBStatusForDate(date, topN, deadline.deadline)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error during tsdb status request: %s", err)
|
||||
return nil, fmt.Errorf("error during tsdb status request: %w", err)
|
||||
}
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// GetSeriesCount returns the number of unique series.
|
||||
func GetSeriesCount(deadline Deadline) (uint64, error) {
|
||||
n, err := vmstorage.GetSeriesCount()
|
||||
if deadline.Exceeded() {
|
||||
return 0, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
|
||||
}
|
||||
n, err := vmstorage.GetSeriesCount(deadline.deadline)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error during series count request: %s", err)
|
||||
return 0, fmt.Errorf("error during series count request: %w", err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
@@ -497,6 +574,10 @@ var ssPool sync.Pool
|
||||
//
|
||||
// Results.RunParallel or Results.Cancel must be called on the returned Results.
|
||||
func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadline) (*Results, error) {
|
||||
if deadline.Exceeded() {
|
||||
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
|
||||
}
|
||||
|
||||
// Setup search.
|
||||
tfss, err := setupTfss(sq.TagFilterss)
|
||||
if err != nil {
|
||||
@@ -506,30 +587,42 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadli
|
||||
MinTimestamp: sq.MinTimestamp,
|
||||
MaxTimestamp: sq.MaxTimestamp,
|
||||
}
|
||||
if err := vmstorage.CheckTimeRange(tr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
vmstorage.WG.Add(1)
|
||||
defer vmstorage.WG.Done()
|
||||
|
||||
sr := getStorageSearch()
|
||||
sr.Init(vmstorage.Storage, tfss, tr, *maxMetricsPerSearch)
|
||||
maxSeriesCount := sr.Init(vmstorage.Storage, tfss, tr, *maxMetricsPerSearch, deadline.deadline)
|
||||
|
||||
m := make(map[string][]storage.BlockRef)
|
||||
var orderedMetricNames []string
|
||||
m := make(map[string][]storage.BlockRef, maxSeriesCount)
|
||||
orderedMetricNames := make([]string, 0, maxSeriesCount)
|
||||
blocksRead := 0
|
||||
for sr.NextMetricBlock() {
|
||||
blocksRead++
|
||||
if time.Until(deadline.Deadline) < 0 {
|
||||
if deadline.Exceeded() {
|
||||
return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.String())
|
||||
}
|
||||
metricName := sr.MetricBlockRef.MetricName
|
||||
brs := m[string(metricName)]
|
||||
if len(brs) == 0 {
|
||||
brs = append(brs, *sr.MetricBlockRef.BlockRef)
|
||||
if len(brs) > 1 {
|
||||
// An optimization: do not allocate a string for already existing metricName key in m
|
||||
m[string(metricName)] = brs
|
||||
} else {
|
||||
// An optimization for big number of time series with long metricName values:
|
||||
// use only a single copy of metricName for both orderedMetricNames and m.
|
||||
orderedMetricNames = append(orderedMetricNames, string(metricName))
|
||||
m[orderedMetricNames[len(orderedMetricNames)-1]] = brs
|
||||
}
|
||||
m[string(metricName)] = append(brs, *sr.MetricBlockRef.BlockRef)
|
||||
}
|
||||
if err := sr.Error(); err != nil {
|
||||
return nil, fmt.Errorf("search error after reading %d data blocks: %s", blocksRead, err)
|
||||
if errors.Is(err, storage.ErrDeadlineExceeded) {
|
||||
return nil, fmt.Errorf("timeout exceeded during the query: %s", deadline.String())
|
||||
}
|
||||
return nil, fmt.Errorf("search error after reading %d data blocks: %w", blocksRead, err)
|
||||
}
|
||||
|
||||
var rss Results
|
||||
@@ -555,7 +648,7 @@ func setupTfss(tagFilterss [][]storage.TagFilter) ([]*storage.TagFilters, error)
|
||||
for i := range tagFilters {
|
||||
tf := &tagFilters[i]
|
||||
if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse tag filter %s: %s", tf, err)
|
||||
return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
|
||||
}
|
||||
}
|
||||
tfss = append(tfss, tfs)
|
||||
@@ -566,7 +659,7 @@ func setupTfss(tagFilterss [][]storage.TagFilter) ([]*storage.TagFilters, error)
|
||||
|
||||
// Deadline contains deadline with the corresponding timeout for pretty error messages.
|
||||
type Deadline struct {
|
||||
Deadline time.Time
|
||||
deadline uint64
|
||||
|
||||
timeout time.Duration
|
||||
flagHint string
|
||||
@@ -576,14 +669,19 @@ type Deadline struct {
|
||||
//
|
||||
// flagHint must contain a hit for command-line flag, which could be used
|
||||
// in order to increase timeout.
|
||||
func NewDeadline(timeout time.Duration, flagHint string) Deadline {
|
||||
func NewDeadline(startTime time.Time, timeout time.Duration, flagHint string) Deadline {
|
||||
return Deadline{
|
||||
Deadline: time.Now().Add(timeout),
|
||||
deadline: uint64(startTime.Add(timeout).Unix()),
|
||||
timeout: timeout,
|
||||
flagHint: flagHint,
|
||||
}
|
||||
}
|
||||
|
||||
// Exceeded returns true if deadline is exceeded.
|
||||
func (d *Deadline) Exceeded() bool {
|
||||
return fasttime.UnixTimestamp() > d.deadline
|
||||
}
|
||||
|
||||
// String returns human-readable string representation for d.
|
||||
func (d *Deadline) String() string {
|
||||
return fmt.Sprintf("%.3f seconds; the timeout can be adjusted with `%s` command-line flag", d.timeout.Seconds(), d.flagHint)
|
||||
|
||||
@@ -3,7 +3,6 @@ package prometheus
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"runtime"
|
||||
@@ -16,6 +15,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -44,9 +44,9 @@ const defaultStep = 5 * 60 * 1000
|
||||
|
||||
// FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
|
||||
func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
ct := currentTime()
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse request form values: %s", err)
|
||||
return fmt.Errorf("cannot parse request form values: %w", err)
|
||||
}
|
||||
matches := r.Form["match[]"]
|
||||
if len(matches) == 0 {
|
||||
@@ -67,7 +67,7 @@ func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deadline := getDeadlineForQuery(r)
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
if start >= end {
|
||||
start = end - defaultStep
|
||||
}
|
||||
@@ -82,7 +82,7 @@ func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request
|
||||
}
|
||||
rss, err := netstorage.ProcessSearchQuery(sq, true, deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
|
||||
return fmt.Errorf("cannot fetch data for %q: %w", sq, err)
|
||||
}
|
||||
|
||||
resultsCh := make(chan *quicktemplate.ByteBuffer)
|
||||
@@ -105,7 +105,7 @@ func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request
|
||||
|
||||
err = <-doneCh
|
||||
if err != nil {
|
||||
return fmt.Errorf("error during data fetching: %s", err)
|
||||
return fmt.Errorf("error during data fetching: %w", err)
|
||||
}
|
||||
federateDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
@@ -115,9 +115,9 @@ var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/fe
|
||||
|
||||
// ExportHandler exports data in raw format from /api/v1/export.
|
||||
func ExportHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
ct := currentTime()
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse request form values: %s", err)
|
||||
return fmt.Errorf("cannot parse request form values: %w", err)
|
||||
}
|
||||
matches := r.Form["match[]"]
|
||||
if len(matches) == 0 {
|
||||
@@ -138,12 +138,12 @@ func ExportHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
format := r.FormValue("format")
|
||||
maxRowsPerLine := int(fastfloat.ParseInt64BestEffort(r.FormValue("max_rows_per_line")))
|
||||
deadline := getDeadlineForExport(r)
|
||||
deadline := getDeadlineForExport(r, startTime)
|
||||
if start >= end {
|
||||
end = start + defaultStep
|
||||
}
|
||||
if err := exportHandler(w, matches, start, end, format, maxRowsPerLine, deadline); err != nil {
|
||||
return fmt.Errorf("error when exporting data for queries=%q on the time range (start=%d, end=%d): %s", matches, start, end, err)
|
||||
return fmt.Errorf("error when exporting data for queries=%q on the time range (start=%d, end=%d): %w", matches, start, end, err)
|
||||
}
|
||||
exportDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
@@ -153,9 +153,13 @@ var exportDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
|
||||
|
||||
func exportHandler(w http.ResponseWriter, matches []string, start, end int64, format string, maxRowsPerLine int, deadline netstorage.Deadline) error {
|
||||
writeResponseFunc := WriteExportStdResponse
|
||||
writeLineFunc := WriteExportJSONLine
|
||||
writeLineFunc := func(rs *netstorage.Result, resultsCh chan<- *quicktemplate.ByteBuffer) {
|
||||
bb := quicktemplate.AcquireByteBuffer()
|
||||
WriteExportJSONLine(bb, rs)
|
||||
resultsCh <- bb
|
||||
}
|
||||
if maxRowsPerLine > 0 {
|
||||
writeLineFunc = func(w io.Writer, rs *netstorage.Result) {
|
||||
writeLineFunc = func(rs *netstorage.Result, resultsCh chan<- *quicktemplate.ByteBuffer) {
|
||||
valuesOrig := rs.Values
|
||||
timestampsOrig := rs.Timestamps
|
||||
values := valuesOrig
|
||||
@@ -176,7 +180,9 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
|
||||
}
|
||||
rs.Values = valuesChunk
|
||||
rs.Timestamps = timestampsChunk
|
||||
WriteExportJSONLine(w, rs)
|
||||
bb := quicktemplate.AcquireByteBuffer()
|
||||
WriteExportJSONLine(bb, rs)
|
||||
resultsCh <- bb
|
||||
}
|
||||
rs.Values = valuesOrig
|
||||
rs.Timestamps = timestampsOrig
|
||||
@@ -185,10 +191,18 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
|
||||
contentType := "application/stream+json"
|
||||
if format == "prometheus" {
|
||||
contentType = "text/plain"
|
||||
writeLineFunc = WriteExportPrometheusLine
|
||||
writeLineFunc = func(rs *netstorage.Result, resultsCh chan<- *quicktemplate.ByteBuffer) {
|
||||
bb := quicktemplate.AcquireByteBuffer()
|
||||
WriteExportPrometheusLine(bb, rs)
|
||||
resultsCh <- bb
|
||||
}
|
||||
} else if format == "promapi" {
|
||||
writeResponseFunc = WriteExportPromAPIResponse
|
||||
writeLineFunc = WriteExportPromAPILine
|
||||
writeLineFunc = func(rs *netstorage.Result, resultsCh chan<- *quicktemplate.ByteBuffer) {
|
||||
bb := quicktemplate.AcquireByteBuffer()
|
||||
WriteExportPromAPILine(bb, rs)
|
||||
resultsCh <- bb
|
||||
}
|
||||
}
|
||||
|
||||
tagFilterss, err := getTagFilterssFromMatches(matches)
|
||||
@@ -202,16 +216,14 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
|
||||
}
|
||||
rss, err := netstorage.ProcessSearchQuery(sq, true, deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
|
||||
return fmt.Errorf("cannot fetch data for %q: %w", sq, err)
|
||||
}
|
||||
|
||||
resultsCh := make(chan *quicktemplate.ByteBuffer, runtime.GOMAXPROCS(-1))
|
||||
doneCh := make(chan error)
|
||||
go func() {
|
||||
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
|
||||
bb := quicktemplate.AcquireByteBuffer()
|
||||
writeLineFunc(bb, rs)
|
||||
resultsCh <- bb
|
||||
writeLineFunc(rs, resultsCh)
|
||||
})
|
||||
close(resultsCh)
|
||||
doneCh <- err
|
||||
@@ -227,7 +239,7 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
|
||||
}
|
||||
err = <-doneCh
|
||||
if err != nil {
|
||||
return fmt.Errorf("error during data fetching: %s", err)
|
||||
return fmt.Errorf("error during data fetching: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -237,7 +249,7 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
|
||||
func DeleteHandler(startTime time.Time, r *http.Request) error {
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse request form values: %s", err)
|
||||
return fmt.Errorf("cannot parse request form values: %w", err)
|
||||
}
|
||||
if r.FormValue("start") != "" || r.FormValue("end") != "" {
|
||||
return fmt.Errorf("start and end aren't supported. Remove these args from the query in order to delete all the matching metrics")
|
||||
@@ -255,7 +267,7 @@ func DeleteHandler(startTime time.Time, r *http.Request) error {
|
||||
}
|
||||
deletedCount, err := netstorage.DeleteSeries(sq)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot delete time series matching %q: %s", matches, err)
|
||||
return fmt.Errorf("cannot delete time series matching %q: %w", matches, err)
|
||||
}
|
||||
if deletedCount > 0 {
|
||||
promql.ResetRollupResultCache()
|
||||
@@ -270,17 +282,16 @@ var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
|
||||
func LabelValuesHandler(startTime time.Time, labelName string, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := getDeadlineForQuery(r)
|
||||
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse form values: %s", err)
|
||||
return fmt.Errorf("cannot parse form values: %w", err)
|
||||
}
|
||||
var labelValues []string
|
||||
if len(r.Form["match[]"]) == 0 && len(r.Form["start"]) == 0 && len(r.Form["end"]) == 0 {
|
||||
var err error
|
||||
labelValues, err = netstorage.GetLabelValues(labelName, deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf(`cannot obtain label values for %q: %s`, labelName, err)
|
||||
return fmt.Errorf(`cannot obtain label values for %q: %w`, labelName, err)
|
||||
}
|
||||
} else {
|
||||
// Extended functionality that allows filtering by label filters and time range
|
||||
@@ -291,7 +302,7 @@ func LabelValuesHandler(startTime time.Time, labelName string, w http.ResponseWr
|
||||
if len(matches) == 0 {
|
||||
matches = []string{fmt.Sprintf("{%s!=''}", labelName)}
|
||||
}
|
||||
ct := currentTime()
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
end, err := getTime(r, "end", ct)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -302,7 +313,7 @@ func LabelValuesHandler(startTime time.Time, labelName string, w http.ResponseWr
|
||||
}
|
||||
labelValues, err = labelValuesWithMatches(labelName, matches, start, end, deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain label values for %q, match[]=%q, start=%d, end=%d: %s", labelName, matches, start, end, err)
|
||||
return fmt.Errorf("cannot obtain label values for %q, match[]=%q, start=%d, end=%d: %w", labelName, matches, start, end, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -343,7 +354,7 @@ func labelValuesWithMatches(labelName string, matches []string, start, end int64
|
||||
}
|
||||
rss, err := netstorage.ProcessSearchQuery(sq, false, deadline)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot fetch data for %q: %s", sq, err)
|
||||
return nil, fmt.Errorf("cannot fetch data for %q: %w", sq, err)
|
||||
}
|
||||
|
||||
m := make(map[string]struct{})
|
||||
@@ -358,7 +369,7 @@ func labelValuesWithMatches(labelName string, matches []string, start, end int64
|
||||
mLock.Unlock()
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when data fetching: %s", err)
|
||||
return nil, fmt.Errorf("error when data fetching: %w", err)
|
||||
}
|
||||
|
||||
labelValues := make([]string, 0, len(m))
|
||||
@@ -373,10 +384,10 @@ var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
|
||||
|
||||
// LabelsCountHandler processes /api/v1/labels/count request.
|
||||
func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := getDeadlineForQuery(r)
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
labelEntries, err := netstorage.GetLabelEntries(deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf(`cannot obtain label entries: %s`, err)
|
||||
return fmt.Errorf(`cannot obtain label entries: %w`, err)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteLabelsCountResponse(w, labelEntries)
|
||||
@@ -392,16 +403,16 @@ const secsPerDay = 3600 * 24
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
|
||||
func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := getDeadlineForQuery(r)
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse form values: %s", err)
|
||||
return fmt.Errorf("cannot parse form values: %w", err)
|
||||
}
|
||||
date := fasttime.UnixDate()
|
||||
dateStr := r.FormValue("date")
|
||||
if len(dateStr) > 0 {
|
||||
t, err := time.Parse("2006-01-02", dateStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `date` arg %q: %s", dateStr, err)
|
||||
return fmt.Errorf("cannot parse `date` arg %q: %w", dateStr, err)
|
||||
}
|
||||
date = uint64(t.Unix()) / secsPerDay
|
||||
}
|
||||
@@ -410,7 +421,7 @@ func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
|
||||
if len(topNStr) > 0 {
|
||||
n, err := strconv.Atoi(topNStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `topN` arg %q: %s", topNStr, err)
|
||||
return fmt.Errorf("cannot parse `topN` arg %q: %w", topNStr, err)
|
||||
}
|
||||
if n <= 0 {
|
||||
n = 1
|
||||
@@ -422,7 +433,7 @@ func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
|
||||
}
|
||||
status, err := netstorage.GetTSDBStatusForDate(deadline, date, topN)
|
||||
if err != nil {
|
||||
return fmt.Errorf(`cannot obtain tsdb status for date=%d, topN=%d: %s`, date, topN, err)
|
||||
return fmt.Errorf(`cannot obtain tsdb status for date=%d, topN=%d: %w`, date, topN, err)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteTSDBStatusResponse(w, status)
|
||||
@@ -436,17 +447,16 @@ var tsdbStatusDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
|
||||
func LabelsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := getDeadlineForQuery(r)
|
||||
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse form values: %s", err)
|
||||
return fmt.Errorf("cannot parse form values: %w", err)
|
||||
}
|
||||
var labels []string
|
||||
if len(r.Form["match[]"]) == 0 && len(r.Form["start"]) == 0 && len(r.Form["end"]) == 0 {
|
||||
var err error
|
||||
labels, err = netstorage.GetLabels(deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain labels: %s", err)
|
||||
return fmt.Errorf("cannot obtain labels: %w", err)
|
||||
}
|
||||
} else {
|
||||
// Extended functionality that allows filtering by label filters and time range
|
||||
@@ -455,7 +465,7 @@ func LabelsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
if len(matches) == 0 {
|
||||
matches = []string{"{__name__!=''}"}
|
||||
}
|
||||
ct := currentTime()
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
end, err := getTime(r, "end", ct)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -466,7 +476,7 @@ func LabelsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
labels, err = labelsWithMatches(matches, start, end, deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain labels for match[]=%q, start=%d, end=%d: %s", matches, start, end, err)
|
||||
return fmt.Errorf("cannot obtain labels for match[]=%q, start=%d, end=%d: %w", matches, start, end, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -494,7 +504,7 @@ func labelsWithMatches(matches []string, start, end int64, deadline netstorage.D
|
||||
}
|
||||
rss, err := netstorage.ProcessSearchQuery(sq, false, deadline)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot fetch data for %q: %s", sq, err)
|
||||
return nil, fmt.Errorf("cannot fetch data for %q: %w", sq, err)
|
||||
}
|
||||
|
||||
m := make(map[string]struct{})
|
||||
@@ -510,7 +520,7 @@ func labelsWithMatches(matches []string, start, end int64, deadline netstorage.D
|
||||
mLock.Unlock()
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when data fetching: %s", err)
|
||||
return nil, fmt.Errorf("error when data fetching: %w", err)
|
||||
}
|
||||
|
||||
labels := make([]string, 0, len(m))
|
||||
@@ -525,10 +535,10 @@ var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
|
||||
|
||||
// SeriesCountHandler processes /api/v1/series/count request.
|
||||
func SeriesCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := getDeadlineForQuery(r)
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
n, err := netstorage.GetSeriesCount(deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain series count: %s", err)
|
||||
return fmt.Errorf("cannot obtain series count: %w", err)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteSeriesCountResponse(w, n)
|
||||
@@ -542,10 +552,9 @@ var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers
|
||||
func SeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
ct := currentTime()
|
||||
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse form values: %s", err)
|
||||
return fmt.Errorf("cannot parse form values: %w", err)
|
||||
}
|
||||
matches := r.Form["match[]"]
|
||||
if len(matches) == 0 {
|
||||
@@ -564,7 +573,7 @@ func SeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deadline := getDeadlineForQuery(r)
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
|
||||
tagFilterss, err := getTagFilterssFromMatches(matches)
|
||||
if err != nil {
|
||||
@@ -580,7 +589,7 @@ func SeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
rss, err := netstorage.ProcessSearchQuery(sq, false, deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
|
||||
return fmt.Errorf("cannot fetch data for %q: %w", sq, err)
|
||||
}
|
||||
|
||||
resultsCh := make(chan *quicktemplate.ByteBuffer)
|
||||
@@ -605,7 +614,7 @@ func SeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
err = <-doneCh
|
||||
if err != nil {
|
||||
return fmt.Errorf("error during data fetching: %s", err)
|
||||
return fmt.Errorf("error during data fetching: %w", err)
|
||||
}
|
||||
seriesDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
@@ -617,8 +626,7 @@ var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
|
||||
func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
ct := currentTime()
|
||||
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
query := r.FormValue("query")
|
||||
if len(query) == 0 {
|
||||
return fmt.Errorf("missing `query` arg")
|
||||
@@ -638,7 +646,7 @@ func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) e
|
||||
if step <= 0 {
|
||||
step = defaultStep
|
||||
}
|
||||
deadline := getDeadlineForQuery(r)
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
|
||||
if len(query) > *maxQueryLen {
|
||||
return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), *maxQueryLen)
|
||||
@@ -652,17 +660,17 @@ func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) e
|
||||
if childQuery, windowStr, offsetStr := promql.IsMetricSelectorWithRollup(query); childQuery != "" {
|
||||
window, err := parsePositiveDuration(windowStr, step)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse window: %s", err)
|
||||
return fmt.Errorf("cannot parse window: %w", err)
|
||||
}
|
||||
offset, err := parseDuration(offsetStr, step)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse offset: %s", err)
|
||||
return fmt.Errorf("cannot parse offset: %w", err)
|
||||
}
|
||||
start -= offset
|
||||
end := start
|
||||
start = end - window
|
||||
if err := exportHandler(w, []string{childQuery}, start, end, "promapi", 0, deadline); err != nil {
|
||||
return fmt.Errorf("error when exporting data for query=%q on the time range (start=%d, end=%d): %s", childQuery, start, end, err)
|
||||
return fmt.Errorf("error when exporting data for query=%q on the time range (start=%d, end=%d): %w", childQuery, start, end, err)
|
||||
}
|
||||
queryDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
@@ -670,39 +678,40 @@ func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) e
|
||||
if childQuery, windowStr, stepStr, offsetStr := promql.IsRollup(query); childQuery != "" {
|
||||
newStep, err := parsePositiveDuration(stepStr, step)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse step: %s", err)
|
||||
return fmt.Errorf("cannot parse step: %w", err)
|
||||
}
|
||||
if newStep > 0 {
|
||||
step = newStep
|
||||
}
|
||||
window, err := parsePositiveDuration(windowStr, step)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse window: %s", err)
|
||||
return fmt.Errorf("cannot parse window: %w", err)
|
||||
}
|
||||
offset, err := parseDuration(offsetStr, step)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse offset: %s", err)
|
||||
return fmt.Errorf("cannot parse offset: %w", err)
|
||||
}
|
||||
start -= offset
|
||||
end := start
|
||||
start = end - window
|
||||
if err := queryRangeHandler(w, childQuery, start, end, step, r, ct); err != nil {
|
||||
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %s", childQuery, start, end, step, err)
|
||||
if err := queryRangeHandler(startTime, w, childQuery, start, end, step, r, ct); err != nil {
|
||||
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %w", childQuery, start, end, step, err)
|
||||
}
|
||||
queryDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
}
|
||||
|
||||
ec := promql.EvalConfig{
|
||||
Start: start,
|
||||
End: start,
|
||||
Step: step,
|
||||
Deadline: deadline,
|
||||
LookbackDelta: lookbackDelta,
|
||||
Start: start,
|
||||
End: start,
|
||||
Step: step,
|
||||
QuotedRemoteAddr: httpserver.GetQuotedRemoteAddr(r),
|
||||
Deadline: deadline,
|
||||
LookbackDelta: lookbackDelta,
|
||||
}
|
||||
result, err := promql.Exec(&ec, query, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error when executing query=%q for (time=%d, step=%d): %s", query, start, step, err)
|
||||
return fmt.Errorf("error when executing query=%q for (time=%d, step=%d): %w", query, start, step, err)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -731,8 +740,7 @@ func parsePositiveDuration(s string, step int64) (int64, error) {
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
|
||||
func QueryRangeHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
ct := currentTime()
|
||||
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
query := r.FormValue("query")
|
||||
if len(query) == 0 {
|
||||
return fmt.Errorf("missing `query` arg")
|
||||
@@ -749,15 +757,15 @@ func QueryRangeHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := queryRangeHandler(w, query, start, end, step, r, ct); err != nil {
|
||||
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %s", query, start, end, step, err)
|
||||
if err := queryRangeHandler(startTime, w, query, start, end, step, r, ct); err != nil {
|
||||
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %w", query, start, end, step, err)
|
||||
}
|
||||
queryRangeDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
}
|
||||
|
||||
func queryRangeHandler(w http.ResponseWriter, query string, start, end, step int64, r *http.Request, ct int64) error {
|
||||
deadline := getDeadlineForQuery(r)
|
||||
func queryRangeHandler(startTime time.Time, w http.ResponseWriter, query string, start, end, step int64, r *http.Request, ct int64) error {
|
||||
deadline := getDeadlineForQuery(r, startTime)
|
||||
mayCache := !getBool(r, "nocache")
|
||||
lookbackDelta, err := getMaxLookback(r)
|
||||
if err != nil {
|
||||
@@ -779,32 +787,34 @@ func queryRangeHandler(w http.ResponseWriter, query string, start, end, step int
|
||||
}
|
||||
|
||||
ec := promql.EvalConfig{
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: step,
|
||||
Deadline: deadline,
|
||||
MayCache: mayCache,
|
||||
LookbackDelta: lookbackDelta,
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: step,
|
||||
QuotedRemoteAddr: httpserver.GetQuotedRemoteAddr(r),
|
||||
Deadline: deadline,
|
||||
MayCache: mayCache,
|
||||
LookbackDelta: lookbackDelta,
|
||||
}
|
||||
result, err := promql.Exec(&ec, query, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot execute query: %s", err)
|
||||
return fmt.Errorf("cannot execute query: %w", err)
|
||||
}
|
||||
queryOffset := getLatencyOffsetMilliseconds()
|
||||
if ct-end < queryOffset {
|
||||
result = adjustLastPoints(result)
|
||||
if ct-queryOffset < end {
|
||||
result = adjustLastPoints(result, ct-queryOffset, ct+step)
|
||||
}
|
||||
|
||||
// Remove NaN values as Prometheus does.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153
|
||||
removeNaNValuesInplace(result)
|
||||
result = removeEmptyValuesAndTimeseries(result)
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteQueryRangeResponse(w, result)
|
||||
return nil
|
||||
}
|
||||
|
||||
func removeNaNValuesInplace(tss []netstorage.Result) {
|
||||
func removeEmptyValuesAndTimeseries(tss []netstorage.Result) []netstorage.Result {
|
||||
dst := tss[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
hasNaNs := false
|
||||
@@ -816,6 +826,9 @@ func removeNaNValuesInplace(tss []netstorage.Result) {
|
||||
}
|
||||
if !hasNaNs {
|
||||
// Fast path: nothing to remove.
|
||||
if len(ts.Values) > 0 {
|
||||
dst = append(dst, *ts)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -832,45 +845,42 @@ func removeNaNValuesInplace(tss []netstorage.Result) {
|
||||
}
|
||||
ts.Values = dstValues
|
||||
ts.Timestamps = dstTimestamps
|
||||
if len(ts.Values) > 0 {
|
||||
dst = append(dst, *ts)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
var queryRangeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query_range"}`)
|
||||
|
||||
// adjustLastPoints substitutes the last point values with the previous
|
||||
// point values, since the last points may contain garbage.
|
||||
func adjustLastPoints(tss []netstorage.Result) []netstorage.Result {
|
||||
if len(tss) == 0 {
|
||||
return nil
|
||||
}
|
||||
var nan = math.NaN()
|
||||
|
||||
// Search for the last non-NaN value across all the timeseries.
|
||||
lastNonNaNIdx := -1
|
||||
// adjustLastPoints substitutes the last point values on the time range (start..end]
|
||||
// with the previous point values, since these points may contain incomplete values.
|
||||
func adjustLastPoints(tss []netstorage.Result, start, end int64) []netstorage.Result {
|
||||
for i := range tss {
|
||||
values := tss[i].Values
|
||||
j := len(values) - 1
|
||||
for j >= 0 && math.IsNaN(values[j]) {
|
||||
ts := &tss[i]
|
||||
values := ts.Values
|
||||
timestamps := ts.Timestamps
|
||||
j := len(timestamps) - 1
|
||||
if j >= 0 && timestamps[j] > end {
|
||||
// It looks like the `offset` is used in the query, which shifts time range beyond the `end`.
|
||||
// Leave such a time series as is, since it is unclear which points may be incomplete in it.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/625
|
||||
continue
|
||||
}
|
||||
for j >= 0 && timestamps[j] > start {
|
||||
j--
|
||||
}
|
||||
if j > lastNonNaNIdx {
|
||||
lastNonNaNIdx = j
|
||||
j++
|
||||
lastValue := nan
|
||||
if j > 0 {
|
||||
lastValue = values[j-1]
|
||||
}
|
||||
}
|
||||
if lastNonNaNIdx == -1 {
|
||||
// All timeseries contain only NaNs.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Substitute the last two values starting from lastNonNaNIdx
|
||||
// with the previous values for each timeseries.
|
||||
for i := range tss {
|
||||
values := tss[i].Values
|
||||
for j := 0; j < 2; j++ {
|
||||
idx := lastNonNaNIdx + j
|
||||
if idx <= 0 || idx >= len(values) || math.IsNaN(values[idx-1]) {
|
||||
continue
|
||||
}
|
||||
values[idx] = values[idx-1]
|
||||
for j < len(timestamps) && timestamps[j] <= end {
|
||||
values[j] = lastValue
|
||||
j++
|
||||
}
|
||||
}
|
||||
return tss
|
||||
@@ -895,14 +905,14 @@ func getTime(r *http.Request, argKey string, defaultValue int64) (int64, error)
|
||||
return maxTimeMsecs, nil
|
||||
}
|
||||
// Try parsing duration relative to the current time
|
||||
d, err1 := time.ParseDuration(argValue)
|
||||
d, err1 := metricsql.DurationValue(argValue, 0)
|
||||
if err1 != nil {
|
||||
return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
|
||||
return 0, fmt.Errorf("cannot parse %q=%q: %w", argKey, argValue, err)
|
||||
}
|
||||
if d > 0 {
|
||||
d = -d
|
||||
}
|
||||
t = time.Now().Add(d)
|
||||
t = time.Now().Add(time.Duration(d) * time.Millisecond)
|
||||
}
|
||||
secs = float64(t.UnixNano()) / 1e9
|
||||
}
|
||||
@@ -937,11 +947,11 @@ func getDuration(r *http.Request, argKey string, defaultValue int64) (int64, err
|
||||
secs, err := strconv.ParseFloat(argValue, 64)
|
||||
if err != nil {
|
||||
// Try parsing string format
|
||||
d, err := time.ParseDuration(argValue)
|
||||
d, err := metricsql.DurationValue(argValue, 0)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
|
||||
return 0, fmt.Errorf("cannot parse %q=%q: %w", argKey, argValue, err)
|
||||
}
|
||||
secs = d.Seconds()
|
||||
secs = float64(d) / 1000
|
||||
}
|
||||
msecs := int64(secs * 1e3)
|
||||
if msecs <= 0 || msecs > maxDurationMsecs {
|
||||
@@ -960,17 +970,17 @@ func getMaxLookback(r *http.Request) (int64, error) {
|
||||
return getDuration(r, "max_lookback", d)
|
||||
}
|
||||
|
||||
func getDeadlineForQuery(r *http.Request) netstorage.Deadline {
|
||||
func getDeadlineForQuery(r *http.Request, startTime time.Time) netstorage.Deadline {
|
||||
dMax := maxQueryDuration.Milliseconds()
|
||||
return getDeadlineWithMaxDuration(r, dMax, "-search.maxQueryDuration")
|
||||
return getDeadlineWithMaxDuration(r, startTime, dMax, "-search.maxQueryDuration")
|
||||
}
|
||||
|
||||
func getDeadlineForExport(r *http.Request) netstorage.Deadline {
|
||||
func getDeadlineForExport(r *http.Request, startTime time.Time) netstorage.Deadline {
|
||||
dMax := maxExportDuration.Milliseconds()
|
||||
return getDeadlineWithMaxDuration(r, dMax, "-search.maxExportDuration")
|
||||
return getDeadlineWithMaxDuration(r, startTime, dMax, "-search.maxExportDuration")
|
||||
}
|
||||
|
||||
func getDeadlineWithMaxDuration(r *http.Request, dMax int64, flagHint string) netstorage.Deadline {
|
||||
func getDeadlineWithMaxDuration(r *http.Request, startTime time.Time, dMax int64, flagHint string) netstorage.Deadline {
|
||||
d, err := getDuration(r, "timeout", 0)
|
||||
if err != nil {
|
||||
d = 0
|
||||
@@ -979,7 +989,7 @@ func getDeadlineWithMaxDuration(r *http.Request, dMax int64, flagHint string) ne
|
||||
d = dMax
|
||||
}
|
||||
timeout := time.Duration(d) * time.Millisecond
|
||||
return netstorage.NewDeadline(timeout, flagHint)
|
||||
return netstorage.NewDeadline(startTime, timeout, flagHint)
|
||||
}
|
||||
|
||||
func getBool(r *http.Request, argKey string) bool {
|
||||
@@ -992,16 +1002,12 @@ func getBool(r *http.Request, argKey string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func currentTime() int64 {
|
||||
return int64(fasttime.UnixTimestamp() * 1000)
|
||||
}
|
||||
|
||||
func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error) {
|
||||
tagFilterss := make([][]storage.TagFilter, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
tagFilters, err := promql.ParseMetricSelector(match)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse %q: %s", match, err)
|
||||
return nil, fmt.Errorf("cannot parse %q: %w", match, err)
|
||||
}
|
||||
tagFilterss = append(tagFilterss, tagFilters)
|
||||
}
|
||||
|
||||
@@ -11,17 +11,15 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
)
|
||||
|
||||
func TestRemoveNaNValuesInplace(t *testing.T) {
|
||||
func TestRemoveEmptyValuesAndTimeseries(t *testing.T) {
|
||||
f := func(tss []netstorage.Result, tssExpected []netstorage.Result) {
|
||||
t.Helper()
|
||||
removeNaNValuesInplace(tss)
|
||||
tss = removeEmptyValuesAndTimeseries(tss)
|
||||
if !reflect.DeepEqual(tss, tssExpected) {
|
||||
t.Fatalf("unexpected result; got %v; want %v", tss, tssExpected)
|
||||
}
|
||||
}
|
||||
|
||||
nan := math.NaN()
|
||||
|
||||
f(nil, nil)
|
||||
f([]netstorage.Result{
|
||||
{
|
||||
@@ -32,6 +30,14 @@ func TestRemoveNaNValuesInplace(t *testing.T) {
|
||||
Timestamps: []int64{100, 200, 300, 400},
|
||||
Values: []float64{nan, nan, 3, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{1, 2},
|
||||
Values: []float64{nan, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: nil,
|
||||
Values: nil,
|
||||
},
|
||||
}, []netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300},
|
||||
@@ -113,3 +119,152 @@ func TestGetTimeError(t *testing.T) {
|
||||
f("-292273086-05-16T16:47:07Z")
|
||||
f("292277025-08-18T07:12:54.999999998Z")
|
||||
}
|
||||
|
||||
func TestAdjustLastPoints(t *testing.T) {
|
||||
f := func(tss []netstorage.Result, start, end int64, tssExpected []netstorage.Result) {
|
||||
t.Helper()
|
||||
tss = adjustLastPoints(tss, start, end)
|
||||
for i, ts := range tss {
|
||||
for j, value := range ts.Values {
|
||||
expectedValue := tssExpected[i].Values[j]
|
||||
if math.IsNaN(expectedValue) {
|
||||
if !math.IsNaN(value) {
|
||||
t.Fatalf("unexpected value for time series #%d at position %d; got %v; want nan", i, j, value)
|
||||
}
|
||||
} else if expectedValue != value {
|
||||
t.Fatalf("unexpected value for time series #%d at position %d; got %v; want %v", i, j, value, expectedValue)
|
||||
}
|
||||
}
|
||||
if !reflect.DeepEqual(ts.Timestamps, tssExpected[i].Timestamps) {
|
||||
t.Fatalf("unexpected timestamps for time series #%d; got %v; want %v", i, tss, tssExpected)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
nan := math.NaN()
|
||||
|
||||
f(nil, 300, 500, nil)
|
||||
|
||||
f([]netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, 4, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, nan, nan},
|
||||
},
|
||||
}, 400, 500, []netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, 4, 4},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, nan, nan},
|
||||
},
|
||||
})
|
||||
|
||||
f([]netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, nan, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, nan, nan, nan},
|
||||
},
|
||||
}, 300, 500, []netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, 3, 3},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, nan, nan, nan},
|
||||
},
|
||||
})
|
||||
|
||||
f([]netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, nan, nan, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, nan, nan, nan, nan},
|
||||
},
|
||||
}, 500, 300, []netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, nan, nan, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, nan, nan, nan, nan},
|
||||
},
|
||||
})
|
||||
|
||||
f([]netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, 4, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400},
|
||||
Values: []float64{1, 2, 3, 4},
|
||||
},
|
||||
}, 400, 500, []netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, 4, 4},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400},
|
||||
Values: []float64{1, 2, 3, 4},
|
||||
},
|
||||
})
|
||||
|
||||
f([]netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, nan, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300},
|
||||
Values: []float64{1, 2, nan},
|
||||
},
|
||||
}, 300, 600, []netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, 3, 3},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300},
|
||||
Values: []float64{1, 2, nan},
|
||||
},
|
||||
})
|
||||
|
||||
// Check for timestamps outside the configured time range.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/625
|
||||
f([]netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, nan, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300},
|
||||
Values: []float64{1, 2, 45},
|
||||
},
|
||||
}, 250, 400, []netstorage.Result{
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300, 400, 500},
|
||||
Values: []float64{1, 2, 3, nan, nan},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{100, 200, 300},
|
||||
Values: []float64{1, 2, 2},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -21,10 +21,11 @@
|
||||
{% endfunc %}
|
||||
|
||||
{% func valuesWithTimestamps(values []float64, timestamps []int64) %}
|
||||
[
|
||||
{% if len(values) == 0 %}
|
||||
[]
|
||||
{% return %}
|
||||
{% endif %}
|
||||
[
|
||||
{% code /* inline metricRow call here for the sake of performance optimization */ %}
|
||||
[{%f= float64(timestamps[0])/1e3 %},"{%f= values[0] %}"]
|
||||
{% code
|
||||
|
||||
@@ -133,83 +133,85 @@ func metricRow(timestamp int64, value float64) string {
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:23
|
||||
func streamvaluesWithTimestamps(qw422016 *qt422016.Writer, values []float64, timestamps []int64) {
|
||||
//line app/vmselect/prometheus/util.qtpl:23
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmselect/prometheus/util.qtpl:25
|
||||
//line app/vmselect/prometheus/util.qtpl:24
|
||||
if len(values) == 0 {
|
||||
//line app/vmselect/prometheus/util.qtpl:24
|
||||
qw422016.N().S(`[]`)
|
||||
//line app/vmselect/prometheus/util.qtpl:26
|
||||
return
|
||||
//line app/vmselect/prometheus/util.qtpl:27
|
||||
}
|
||||
//line app/vmselect/prometheus/util.qtpl:28
|
||||
/* inline metricRow call here for the sake of performance optimization */
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:28
|
||||
//line app/vmselect/prometheus/util.qtpl:27
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmselect/prometheus/util.qtpl:29
|
||||
/* inline metricRow call here for the sake of performance optimization */
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:29
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmselect/prometheus/util.qtpl:30
|
||||
qw422016.N().F(float64(timestamps[0]) / 1e3)
|
||||
//line app/vmselect/prometheus/util.qtpl:29
|
||||
//line app/vmselect/prometheus/util.qtpl:30
|
||||
qw422016.N().S(`,"`)
|
||||
//line app/vmselect/prometheus/util.qtpl:29
|
||||
//line app/vmselect/prometheus/util.qtpl:30
|
||||
qw422016.N().F(values[0])
|
||||
//line app/vmselect/prometheus/util.qtpl:29
|
||||
//line app/vmselect/prometheus/util.qtpl:30
|
||||
qw422016.N().S(`"]`)
|
||||
//line app/vmselect/prometheus/util.qtpl:31
|
||||
//line app/vmselect/prometheus/util.qtpl:32
|
||||
timestamps = timestamps[1:]
|
||||
values = values[1:]
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:34
|
||||
//line app/vmselect/prometheus/util.qtpl:35
|
||||
if len(values) > 0 {
|
||||
//line app/vmselect/prometheus/util.qtpl:36
|
||||
//line app/vmselect/prometheus/util.qtpl:37
|
||||
// Remove bounds check inside the loop below
|
||||
_ = timestamps[len(values)-1]
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:39
|
||||
for i, v := range values {
|
||||
//line app/vmselect/prometheus/util.qtpl:40
|
||||
for i, v := range values {
|
||||
//line app/vmselect/prometheus/util.qtpl:41
|
||||
/* inline metricRow call here for the sake of performance optimization */
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:40
|
||||
//line app/vmselect/prometheus/util.qtpl:41
|
||||
qw422016.N().S(`,[`)
|
||||
//line app/vmselect/prometheus/util.qtpl:41
|
||||
qw422016.N().F(float64(timestamps[i]) / 1e3)
|
||||
//line app/vmselect/prometheus/util.qtpl:41
|
||||
qw422016.N().S(`,"`)
|
||||
//line app/vmselect/prometheus/util.qtpl:41
|
||||
qw422016.N().F(v)
|
||||
//line app/vmselect/prometheus/util.qtpl:41
|
||||
qw422016.N().S(`"]`)
|
||||
//line app/vmselect/prometheus/util.qtpl:42
|
||||
qw422016.N().F(float64(timestamps[i]) / 1e3)
|
||||
//line app/vmselect/prometheus/util.qtpl:42
|
||||
qw422016.N().S(`,"`)
|
||||
//line app/vmselect/prometheus/util.qtpl:42
|
||||
qw422016.N().F(v)
|
||||
//line app/vmselect/prometheus/util.qtpl:42
|
||||
qw422016.N().S(`"]`)
|
||||
//line app/vmselect/prometheus/util.qtpl:43
|
||||
}
|
||||
//line app/vmselect/prometheus/util.qtpl:43
|
||||
//line app/vmselect/prometheus/util.qtpl:44
|
||||
}
|
||||
//line app/vmselect/prometheus/util.qtpl:43
|
||||
//line app/vmselect/prometheus/util.qtpl:44
|
||||
qw422016.N().S(`]`)
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
func writevaluesWithTimestamps(qq422016 qtio422016.Writer, values []float64, timestamps []int64) {
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
streamvaluesWithTimestamps(qw422016, values, timestamps)
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
func valuesWithTimestamps(values []float64, timestamps []int64) string {
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
writevaluesWithTimestamps(qb422016, values, timestamps)
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/util.qtpl:45
|
||||
//line app/vmselect/prometheus/util.qtpl:46
|
||||
}
|
||||
|
||||
83
app/vmselect/promql/active_queries.go
Normal file
83
app/vmselect/promql/active_queries.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package promql
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// WriteActiveQueries writes active queries to w.
|
||||
//
|
||||
// The written active queries are sorted in descending order of their exeuction duration.
|
||||
func WriteActiveQueries(w io.Writer) {
|
||||
aqes := activeQueriesV.GetAll()
|
||||
sort.Slice(aqes, func(i, j int) bool {
|
||||
return aqes[i].startTime.Sub(aqes[j].startTime) < 0
|
||||
})
|
||||
now := time.Now()
|
||||
for _, aqe := range aqes {
|
||||
d := now.Sub(aqe.startTime)
|
||||
fmt.Fprintf(w, "\tduration: %.3fs, id=%016X, remote_addr=%s, query=%q, start=%d, end=%d, step=%d\n",
|
||||
d.Seconds(), aqe.qid, aqe.quotedRemoteAddr, aqe.q, aqe.start, aqe.end, aqe.step)
|
||||
}
|
||||
}
|
||||
|
||||
var activeQueriesV = newActiveQueries()
|
||||
|
||||
type activeQueries struct {
|
||||
mu sync.Mutex
|
||||
m map[uint64]activeQueryEntry
|
||||
}
|
||||
|
||||
type activeQueryEntry struct {
|
||||
start int64
|
||||
end int64
|
||||
step int64
|
||||
qid uint64
|
||||
quotedRemoteAddr string
|
||||
q string
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
func newActiveQueries() *activeQueries {
|
||||
return &activeQueries{
|
||||
m: make(map[uint64]activeQueryEntry),
|
||||
}
|
||||
}
|
||||
|
||||
func (aq *activeQueries) Add(ec *EvalConfig, q string) uint64 {
|
||||
var aqe activeQueryEntry
|
||||
aqe.start = ec.Start
|
||||
aqe.end = ec.End
|
||||
aqe.step = ec.Step
|
||||
aqe.qid = atomic.AddUint64(&nextActiveQueryID, 1)
|
||||
aqe.quotedRemoteAddr = ec.QuotedRemoteAddr
|
||||
aqe.q = q
|
||||
aqe.startTime = time.Now()
|
||||
|
||||
aq.mu.Lock()
|
||||
aq.m[aqe.qid] = aqe
|
||||
aq.mu.Unlock()
|
||||
return aqe.qid
|
||||
}
|
||||
|
||||
func (aq *activeQueries) Remove(qid uint64) {
|
||||
aq.mu.Lock()
|
||||
delete(aq.m, qid)
|
||||
aq.mu.Unlock()
|
||||
}
|
||||
|
||||
func (aq *activeQueries) GetAll() []activeQueryEntry {
|
||||
aq.mu.Lock()
|
||||
aqes := make([]activeQueryEntry, 0, len(aq.m))
|
||||
for _, aqe := range aq.m {
|
||||
aqes = append(aqes, aqe)
|
||||
}
|
||||
aq.mu.Unlock()
|
||||
return aqes
|
||||
}
|
||||
|
||||
var nextActiveQueryID = uint64(time.Now().UnixNano())
|
||||
@@ -27,6 +27,7 @@ var aggrFuncs = map[string]aggrFunc{
|
||||
"bottomk": newAggrFuncTopK(true),
|
||||
"topk": newAggrFuncTopK(false),
|
||||
"quantile": aggrFuncQuantile,
|
||||
"group": newAggrFunc(aggrFuncGroup),
|
||||
|
||||
// PromQL extension funcs
|
||||
"median": aggrFuncMedian,
|
||||
@@ -43,8 +44,10 @@ var aggrFuncs = map[string]aggrFunc{
|
||||
"bottomk_max": newAggrFuncRangeTopK(maxValue, true),
|
||||
"bottomk_avg": newAggrFuncRangeTopK(avgValue, true),
|
||||
"bottomk_median": newAggrFuncRangeTopK(medianValue, true),
|
||||
"any": newAggrFunc(aggrFuncAny),
|
||||
"any": aggrFuncAny,
|
||||
"outliersk": aggrFuncOutliersK,
|
||||
"mode": newAggrFunc(aggrFuncMode),
|
||||
"zscore": aggrFuncZScore,
|
||||
}
|
||||
|
||||
type aggrFunc func(afa *aggrFuncArg) ([]*timeseries, error)
|
||||
@@ -62,14 +65,25 @@ func getAggrFunc(s string) aggrFunc {
|
||||
|
||||
func newAggrFunc(afe func(tss []*timeseries) []*timeseries) aggrFunc {
|
||||
return func(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
tss, err := getAggrTimeseries(afa.args)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
return aggrFuncExt(afe, tss, &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
}
|
||||
}
|
||||
|
||||
func getAggrTimeseries(args [][]*timeseries) ([]*timeseries, error) {
|
||||
if len(args) == 0 {
|
||||
return nil, fmt.Errorf("expecting at least one arg")
|
||||
}
|
||||
tss := args[0]
|
||||
for _, arg := range args[1:] {
|
||||
tss = append(tss, arg...)
|
||||
}
|
||||
return tss, nil
|
||||
}
|
||||
|
||||
func removeGroupTags(metricName *storage.MetricName, modifier *metricsql.ModifierExpr) {
|
||||
groupOp := strings.ToLower(modifier.Op)
|
||||
switch groupOp {
|
||||
@@ -77,13 +91,15 @@ func removeGroupTags(metricName *storage.MetricName, modifier *metricsql.Modifie
|
||||
metricName.RemoveTagsOn(modifier.Args)
|
||||
case "without":
|
||||
metricName.RemoveTagsIgnoring(modifier.Args)
|
||||
// Reset metric group as Prometheus does on `aggr(...) without (...)` call.
|
||||
metricName.ResetMetricGroup()
|
||||
default:
|
||||
logger.Panicf("BUG: unknown group modifier: %q", groupOp)
|
||||
}
|
||||
}
|
||||
|
||||
func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *metricsql.ModifierExpr, maxSeries int, keepOriginal bool) ([]*timeseries, error) {
|
||||
arg := copyTimeseriesMetricNames(argOrig)
|
||||
arg := copyTimeseriesMetricNames(argOrig, keepOriginal)
|
||||
|
||||
// Perform grouping.
|
||||
m := make(map[string][]*timeseries)
|
||||
@@ -120,7 +136,35 @@ func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeserie
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func aggrFuncAny(tss []*timeseries) []*timeseries {
|
||||
func aggrFuncAny(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
tss, err := getAggrTimeseries(afa.args)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
afe := func(tss []*timeseries) []*timeseries {
|
||||
return tss[:1]
|
||||
}
|
||||
limit := afa.ae.Limit
|
||||
if limit > 1 {
|
||||
// Only a single time series per group must be returned
|
||||
limit = 1
|
||||
}
|
||||
return aggrFuncExt(afe, tss, &afa.ae.Modifier, limit, true)
|
||||
}
|
||||
|
||||
func aggrFuncGroup(tss []*timeseries) []*timeseries {
|
||||
// See https://github.com/prometheus/prometheus/commit/72425d4e3d14d209cc3f3f6e10e3240411303399
|
||||
dst := tss[0]
|
||||
for i := range dst.Values {
|
||||
v := nan
|
||||
for _, ts := range tss {
|
||||
if math.IsNaN(ts.Values[i]) {
|
||||
continue
|
||||
}
|
||||
v = 1
|
||||
}
|
||||
dst.Values[i] = v
|
||||
}
|
||||
return tss[:1]
|
||||
}
|
||||
|
||||
@@ -323,9 +367,7 @@ func aggrFuncStdvar(tss []*timeseries) []*timeseries {
|
||||
dst := tss[0]
|
||||
for i := range dst.Values {
|
||||
// See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation
|
||||
var avg float64
|
||||
var count float64
|
||||
var q float64
|
||||
var avg, count, q float64
|
||||
for _, ts := range tss {
|
||||
v := ts.Values[i]
|
||||
if math.IsNaN(v) {
|
||||
@@ -386,6 +428,98 @@ func aggrFuncDistinct(tss []*timeseries) []*timeseries {
|
||||
return tss[:1]
|
||||
}
|
||||
|
||||
func aggrFuncMode(tss []*timeseries) []*timeseries {
|
||||
dst := tss[0]
|
||||
a := make([]float64, 0, len(tss))
|
||||
for i := range dst.Values {
|
||||
a := a[:0]
|
||||
for _, ts := range tss {
|
||||
v := ts.Values[i]
|
||||
if !math.IsNaN(v) {
|
||||
a = append(a, v)
|
||||
}
|
||||
}
|
||||
dst.Values[i] = modeNoNaNs(nan, a)
|
||||
}
|
||||
return tss[:1]
|
||||
}
|
||||
|
||||
func aggrFuncZScore(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
tss, err := getAggrTimeseries(afa.args)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
afe := func(tss []*timeseries) []*timeseries {
|
||||
for i := range tss[0].Values {
|
||||
// Calculate avg and stddev for tss points at position i.
|
||||
// See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation
|
||||
var avg, count, q float64
|
||||
for _, ts := range tss {
|
||||
v := ts.Values[i]
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
avgNew := avg + (v-avg)/count
|
||||
q += (v - avg) * (v - avgNew)
|
||||
avg = avgNew
|
||||
}
|
||||
if count == 0 {
|
||||
// Cannot calculate z-score for NaN points.
|
||||
continue
|
||||
}
|
||||
|
||||
// Calculate z-score for tss points at position i.
|
||||
// See https://en.wikipedia.org/wiki/Standard_score
|
||||
stddev := math.Sqrt(q / count)
|
||||
for _, ts := range tss {
|
||||
v := ts.Values[i]
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
}
|
||||
ts.Values[i] = (v - avg) / stddev
|
||||
}
|
||||
}
|
||||
|
||||
// Remove MetricGroup from all the tss.
|
||||
for _, ts := range tss {
|
||||
ts.MetricName.ResetMetricGroup()
|
||||
}
|
||||
return tss
|
||||
}
|
||||
return aggrFuncExt(afe, tss, &afa.ae.Modifier, afa.ae.Limit, true)
|
||||
}
|
||||
|
||||
// modeNoNaNs returns mode for a.
|
||||
//
|
||||
// It is expected that a doesn't contain NaNs.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Mode_(statistics)
|
||||
func modeNoNaNs(prevValue float64, a []float64) float64 {
|
||||
if len(a) == 0 {
|
||||
return prevValue
|
||||
}
|
||||
sort.Float64s(a)
|
||||
j := -1
|
||||
dMax := 0
|
||||
mode := prevValue
|
||||
for i, v := range a {
|
||||
if prevValue == v {
|
||||
continue
|
||||
}
|
||||
if d := i - j; d > dMax || math.IsNaN(mode) {
|
||||
dMax = d
|
||||
mode = prevValue
|
||||
}
|
||||
j = i
|
||||
prevValue = v
|
||||
}
|
||||
if d := len(a) - j; d > dMax || math.IsNaN(mode) {
|
||||
mode = prevValue
|
||||
}
|
||||
return mode
|
||||
}
|
||||
|
||||
func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||
@@ -688,13 +822,13 @@ func aggrFuncQuantile(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
|
||||
func aggrFuncMedian(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
tss, err := getAggrTimeseries(afa.args)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
phis := evalNumber(afa.ec, 0.5)[0].Values
|
||||
afe := newAggrQuantileFunc(phis)
|
||||
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
return aggrFuncExt(afe, tss, &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
}
|
||||
|
||||
func newAggrQuantileFunc(phis []float64) func(tss []*timeseries) []*timeseries {
|
||||
|
||||
@@ -52,6 +52,13 @@ var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
|
||||
updateAggrFunc: updateAggrAny,
|
||||
mergeAggrFunc: mergeAggrAny,
|
||||
finalizeAggrFunc: finalizeAggrCommon,
|
||||
|
||||
keepOriginal: true,
|
||||
},
|
||||
"group": {
|
||||
updateAggrFunc: updateAggrCount,
|
||||
mergeAggrFunc: mergeAggrCount,
|
||||
finalizeAggrFunc: finalizeAggrGroup,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -72,7 +79,7 @@ func newIncrementalAggrFuncContext(ae *metricsql.AggrFuncExpr, callbacks *increm
|
||||
}
|
||||
}
|
||||
|
||||
func (iafc *incrementalAggrFuncContext) updateTimeseries(ts *timeseries, workerID uint) {
|
||||
func (iafc *incrementalAggrFuncContext) updateTimeseries(tsOrig *timeseries, workerID uint) {
|
||||
iafc.mLock.Lock()
|
||||
m := iafc.m[workerID]
|
||||
if m == nil {
|
||||
@@ -81,6 +88,13 @@ func (iafc *incrementalAggrFuncContext) updateTimeseries(ts *timeseries, workerI
|
||||
}
|
||||
iafc.mLock.Unlock()
|
||||
|
||||
ts := tsOrig
|
||||
keepOriginal := iafc.callbacks.keepOriginal
|
||||
if keepOriginal {
|
||||
var dst timeseries
|
||||
dst.CopyFromMetricNames(tsOrig)
|
||||
ts = &dst
|
||||
}
|
||||
removeGroupTags(&ts.MetricName, &iafc.ae.Modifier)
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
@@ -95,6 +109,9 @@ func (iafc *incrementalAggrFuncContext) updateTimeseries(ts *timeseries, workerI
|
||||
Timestamps: ts.Timestamps,
|
||||
denyReuse: true,
|
||||
}
|
||||
if keepOriginal {
|
||||
ts = tsOrig
|
||||
}
|
||||
tsAggr.MetricName.CopyFrom(&ts.MetricName)
|
||||
iac = &incrementalAggrContext{
|
||||
ts: tsAggr,
|
||||
@@ -138,6 +155,9 @@ type incrementalAggrFuncCallbacks struct {
|
||||
updateAggrFunc func(iac *incrementalAggrContext, values []float64)
|
||||
mergeAggrFunc func(dst, src *incrementalAggrContext)
|
||||
finalizeAggrFunc func(iac *incrementalAggrContext)
|
||||
|
||||
// Whether to keep the original MetricName for every time series during aggregation
|
||||
keepOriginal bool
|
||||
}
|
||||
|
||||
func getIncrementalAggrFuncCallbacks(name string) *incrementalAggrFuncCallbacks {
|
||||
@@ -371,6 +391,17 @@ func finalizeAggrCount(iac *incrementalAggrContext) {
|
||||
}
|
||||
}
|
||||
|
||||
func finalizeAggrGroup(iac *incrementalAggrContext) {
|
||||
dstValues := iac.ts.Values
|
||||
for i, v := range dstValues {
|
||||
if v == 0 {
|
||||
dstValues[i] = nan
|
||||
} else {
|
||||
dstValues[i] = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func updateAggrSum2(iac *incrementalAggrContext, values []float64) {
|
||||
dstValues := iac.ts.Values
|
||||
dstCounts := iac.values
|
||||
|
||||
@@ -119,7 +119,7 @@ func testIncrementalParallelAggr(iafc *incrementalAggrFuncContext, tssSrc, tssEx
|
||||
wg.Wait()
|
||||
tssActual := iafc.finalizeTimeseries()
|
||||
if err := expectTimeseriesEqual(tssActual, tssExpected); err != nil {
|
||||
return fmt.Errorf("%s; tssActual=%v, tssExpected=%v", err, tssActual, tssExpected)
|
||||
return fmt.Errorf("%w; tssActual=%v, tssExpected=%v", err, tssActual, tssExpected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -164,7 +164,7 @@ func expectTsEqual(actual, expected *timeseries) error {
|
||||
return fmt.Errorf("unexpected timestamps; got %v; want %v", actual.Timestamps, expected.Timestamps)
|
||||
}
|
||||
if err := compareValues(actual.Values, expected.Values); err != nil {
|
||||
return fmt.Errorf("%s; actual %v; expected %v", err, actual.Values, expected.Values)
|
||||
return fmt.Errorf("%w; actual %v; expected %v", err, actual.Values, expected.Values)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
36
app/vmselect/promql/aggr_test.go
Normal file
36
app/vmselect/promql/aggr_test.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package promql
|
||||
|
||||
import (
|
||||
"math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestModeNoNaNs(t *testing.T) {
|
||||
f := func(prevValue float64, a []float64, expectedResult float64) {
|
||||
t.Helper()
|
||||
result := modeNoNaNs(prevValue, a)
|
||||
if math.IsNaN(result) {
|
||||
if !math.IsNaN(expectedResult) {
|
||||
t.Fatalf("unexpected result; got %v; want %v", result, expectedResult)
|
||||
}
|
||||
return
|
||||
}
|
||||
if result != expectedResult {
|
||||
t.Fatalf("unexpected result; got %v; want %v", result, expectedResult)
|
||||
}
|
||||
}
|
||||
f(nan, nil, nan)
|
||||
f(nan, []float64{123}, 123)
|
||||
f(nan, []float64{1, 2, 3}, 1)
|
||||
f(nan, []float64{1, 2, 2}, 2)
|
||||
f(nan, []float64{1, 1, 2}, 1)
|
||||
f(nan, []float64{1, 1, 1}, 1)
|
||||
f(nan, []float64{1, 2, 2, 3}, 2)
|
||||
f(nan, []float64{1, 1, 2, 2, 3, 3, 3}, 3)
|
||||
f(1, []float64{2, 3, 4, 5}, 1)
|
||||
f(1, []float64{2, 2}, 2)
|
||||
f(1, []float64{2, 3, 3}, 3)
|
||||
f(1, []float64{2, 4, 3, 4, 3, 4}, 4)
|
||||
f(1, []float64{2, 3, 3, 4, 4}, 3)
|
||||
f(1, []float64{4, 3, 2, 3, 4}, 3)
|
||||
}
|
||||
@@ -290,12 +290,14 @@ func binaryOpAnd(bfa *binaryOpFuncArg) ([]*timeseries, error) {
|
||||
if tssLeft == nil {
|
||||
continue
|
||||
}
|
||||
for i := range tssLeft[0].Values {
|
||||
if !isAllNaNs(tssRight, i) {
|
||||
continue
|
||||
}
|
||||
for _, tsLeft := range tssLeft {
|
||||
tsLeft.Values[i] = nan
|
||||
// Add gaps to tssLeft if there are gaps at valuesRight.
|
||||
valuesRight := tssRight[0].Values
|
||||
for _, tsLeft := range tssLeft {
|
||||
valuesLeft := tsLeft.Values
|
||||
for i, v := range valuesRight {
|
||||
if math.IsNaN(v) {
|
||||
valuesLeft[i] = nan
|
||||
}
|
||||
}
|
||||
}
|
||||
tssLeft = removeNaNs(tssLeft)
|
||||
@@ -340,12 +342,14 @@ func binaryOpUnless(bfa *binaryOpFuncArg) ([]*timeseries, error) {
|
||||
rvs = append(rvs, tssLeft...)
|
||||
continue
|
||||
}
|
||||
for i := range tssLeft[0].Values {
|
||||
if isAllNaNs(tssRight, i) {
|
||||
continue
|
||||
}
|
||||
for _, tsLeft := range tssLeft {
|
||||
tsLeft.Values[i] = nan
|
||||
// Add gaps to tssLeft if the are no gaps at valuesRight.
|
||||
valuesRight := tssRight[0].Values
|
||||
for _, tsLeft := range tssLeft {
|
||||
valuesLeft := tsLeft.Values
|
||||
for i, v := range valuesRight {
|
||||
if !math.IsNaN(v) {
|
||||
valuesLeft[i] = nan
|
||||
}
|
||||
}
|
||||
}
|
||||
tssLeft = removeNaNs(tssLeft)
|
||||
@@ -354,15 +358,6 @@ func binaryOpUnless(bfa *binaryOpFuncArg) ([]*timeseries, error) {
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func isAllNaNs(tss []*timeseries, idx int) bool {
|
||||
for _, ts := range tss {
|
||||
if !math.IsNaN(ts.Values[idx]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func createTimeseriesMapByTagSet(be *metricsql.BinaryOpExpr, left, right []*timeseries) (map[string][]*timeseries, map[string][]*timeseries) {
|
||||
groupTags := be.GroupModifier.Args
|
||||
groupOp := strings.ToLower(be.GroupModifier.Op)
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")
|
||||
maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from the search")
|
||||
)
|
||||
|
||||
@@ -42,6 +43,11 @@ func ValidateMaxPointsPerTimeseries(start, end, step int64) error {
|
||||
//
|
||||
// See EvalConfig.mayCache for details.
|
||||
func AdjustStartEnd(start, end, step int64) (int64, int64) {
|
||||
if *disableCache {
|
||||
// Do not adjust start and end values when cache is disabled.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/563
|
||||
return start, end
|
||||
}
|
||||
points := (end-start)/step + 1
|
||||
if points < minTimeseriesPointsForTimeRounding {
|
||||
// Too small number of points for rounding.
|
||||
@@ -75,6 +81,9 @@ type EvalConfig struct {
|
||||
End int64
|
||||
Step int64
|
||||
|
||||
// QuotedRemoteAddr contains quoted remote address.
|
||||
QuotedRemoteAddr string
|
||||
|
||||
Deadline netstorage.Deadline
|
||||
|
||||
MayCache bool
|
||||
@@ -110,6 +119,9 @@ func (ec *EvalConfig) validate() {
|
||||
}
|
||||
|
||||
func (ec *EvalConfig) mayCache() bool {
|
||||
if *disableCache {
|
||||
return false
|
||||
}
|
||||
if !ec.MayCache {
|
||||
return false
|
||||
}
|
||||
@@ -160,14 +172,14 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
|
||||
}
|
||||
rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, e, re, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %s`, me.AppendString(nil), err)
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %w`, me.AppendString(nil), err)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
if re, ok := e.(*metricsql.RollupExpr); ok {
|
||||
rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, e, re, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %s`, re.AppendString(nil), err)
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %w`, re.AppendString(nil), err)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
@@ -189,7 +201,7 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
|
||||
}
|
||||
rv, err := tf(tfa)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %s`, fe.AppendString(nil), err)
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %w`, fe.AppendString(nil), err)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
@@ -203,7 +215,7 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
|
||||
}
|
||||
rv, err := evalRollupFunc(ec, fe.Name, rf, e, re, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %s`, fe.AppendString(nil), err)
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %w`, fe.AppendString(nil), err)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
@@ -240,7 +252,7 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
|
||||
}
|
||||
rv, err := af(afa)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %s`, ae.AppendString(nil), err)
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %w`, ae.AppendString(nil), err)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
@@ -264,7 +276,7 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
|
||||
}
|
||||
rv, err := bf(bfa)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %s`, be.AppendString(nil), err)
|
||||
return nil, fmt.Errorf(`cannot evaluate %q: %w`, be.AppendString(nil), err)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
@@ -375,7 +387,7 @@ func evalRollupFuncArgs(ec *EvalConfig, fe *metricsql.FuncExpr) ([]interface{},
|
||||
}
|
||||
ts, err := evalExpr(ec, arg)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot evaluate arg #%d for %q: %s", i+1, fe.AppendString(nil), err)
|
||||
return nil, nil, fmt.Errorf("cannot evaluate arg #%d for %q: %w", i+1, fe.AppendString(nil), err)
|
||||
}
|
||||
args[i] = ts
|
||||
}
|
||||
@@ -686,9 +698,10 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
|
||||
if !rml.Get(uint64(rollupMemorySize)) {
|
||||
rss.Cancel()
|
||||
return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
|
||||
"total available memory for concurrent requests: %d bytes; "+
|
||||
"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
|
||||
"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
|
||||
rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
|
||||
rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, float64(ec.Step)/1e3)
|
||||
}
|
||||
defer rml.Put(uint64(rollupMemorySize))
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result,
|
||||
defer func() {
|
||||
d := time.Since(startTime)
|
||||
if d >= *logSlowQueryDuration {
|
||||
logger.Infof("slow query according to -search.logSlowQueryDuration=%s: duration=%.3f seconds, start=%d, end=%d, step=%d, query=%q",
|
||||
logger.Warnf("slow query according to -search.logSlowQueryDuration=%s: duration=%.3f seconds, start=%d, end=%d, step=%d, query=%q",
|
||||
*logSlowQueryDuration, d.Seconds(), ec.Start/1000, ec.End/1000, ec.Step/1000, q)
|
||||
slowQueries.Inc()
|
||||
}
|
||||
@@ -40,7 +40,9 @@ func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
qid := activeQueriesV.Add(ec, q)
|
||||
rv, err := evalExpr(ec, e)
|
||||
activeQueriesV.Remove(qid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -131,10 +133,50 @@ func removeNaNs(tss []*timeseries) []*timeseries {
|
||||
return rvs
|
||||
}
|
||||
|
||||
func adjustCmpOps(e metricsql.Expr) metricsql.Expr {
|
||||
metricsql.VisitAll(e, func(expr metricsql.Expr) {
|
||||
be, ok := expr.(*metricsql.BinaryOpExpr)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if !metricsql.IsBinaryOpCmp(be.Op) {
|
||||
return
|
||||
}
|
||||
if _, ok := be.Left.(*metricsql.NumberExpr); !ok {
|
||||
return
|
||||
}
|
||||
// Convert 'num cmpOp query' expression to `query reverseCmpOp num` expression
|
||||
// like Prometheus does. For isntance, `0.5 < foo` must be converted to `foo > 0.5`
|
||||
// in order to return valid values for `foo` that are bigger than 0.5.
|
||||
be.Right, be.Left = be.Left, be.Right
|
||||
be.Op = getReverseCmpOp(be.Op)
|
||||
})
|
||||
return e
|
||||
}
|
||||
|
||||
func getReverseCmpOp(op string) string {
|
||||
switch op {
|
||||
case ">":
|
||||
return "<"
|
||||
case "<":
|
||||
return ">"
|
||||
case ">=":
|
||||
return "<="
|
||||
case "<=":
|
||||
return ">="
|
||||
default:
|
||||
// there is no need in changing `==` and `!=`.
|
||||
return op
|
||||
}
|
||||
}
|
||||
|
||||
func parsePromQLWithCache(q string) (metricsql.Expr, error) {
|
||||
pcv := parseCacheV.Get(q)
|
||||
if pcv == nil {
|
||||
e, err := metricsql.Parse(q)
|
||||
if err == nil {
|
||||
e = adjustCmpOps(e)
|
||||
}
|
||||
pcv = &parseCacheValue{
|
||||
e: e,
|
||||
err: err,
|
||||
|
||||
@@ -21,7 +21,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: step,
|
||||
Deadline: netstorage.NewDeadline(time.Minute, ""),
|
||||
Deadline: netstorage.NewDeadline(time.Now(), time.Minute, ""),
|
||||
}
|
||||
for i := 0; i < 5; i++ {
|
||||
result, err := Exec(ec, q, false)
|
||||
@@ -635,7 +635,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
Values: []float64{1000, 1200, 1400, 1400, 1400, 1400},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.MetricGroup = []byte("foobar")
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
@@ -647,7 +646,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
Values: []float64{1000, 1200, 1400, 1400, 1400, 1400},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.MetricGroup = []byte("foobar")
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
@@ -860,12 +858,23 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run("time()*-1^0.5", func(t *testing.T) {
|
||||
t.Run("time()*(-4)^0.5", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `time()*-1^0.5`
|
||||
q := `time()*(-4)^0.5`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run("time()*-4^0.5", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `time()*-4^0.5`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{-2000, -2400, -2800, -3200, -3600, -4000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`alias()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `alias(time(), "foobar")`
|
||||
@@ -1713,6 +1722,51 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`scalar < time()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `123 < time()`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`time() > scalar`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `time() > 123`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`scalar > time()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `123 > time()`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`time() < scalar`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `time() < 123`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`scalar1 < time() < scalar2`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `1300 < time() < 1700`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{nan, nan, 1400, 1600, nan, nan},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`a cmp scalar (leave MetricGroup)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort_desc((
|
||||
@@ -3002,6 +3056,102 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`buckets_limit(zero)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `buckets_limit(0, (
|
||||
alias(label_set(100, "le", "inf", "x", "y"), "metric"),
|
||||
alias(label_set(50, "le", "120", "x", "y"), "metric"),
|
||||
))`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`buckets_limit(unused)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(buckets_limit(5, (
|
||||
alias(label_set(100, "le", "inf", "x", "y"), "metric"),
|
||||
alias(label_set(50, "le", "120", "x", "y"), "metric"),
|
||||
)))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{50, 50, 50, 50, 50, 50},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.MetricGroup = []byte("metric")
|
||||
r1.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("120"),
|
||||
},
|
||||
{
|
||||
Key: []byte("x"),
|
||||
Value: []byte("y"),
|
||||
},
|
||||
}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{100, 100, 100, 100, 100, 100},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.MetricGroup = []byte("metric")
|
||||
r2.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("inf"),
|
||||
},
|
||||
{
|
||||
Key: []byte("x"),
|
||||
Value: []byte("y"),
|
||||
},
|
||||
}
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`buckets_limit(used)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(buckets_limit(2, (
|
||||
alias(label_set(100, "le", "inf", "x", "y"), "metric"),
|
||||
alias(label_set(98, "le", "300", "x", "y"), "metric"),
|
||||
alias(label_set(52, "le", "200", "x", "y"), "metric"),
|
||||
alias(label_set(50, "le", "120", "x", "y"), "metric"),
|
||||
alias(label_set(20, "le", "70", "x", "y"), "metric"),
|
||||
alias(label_set(10, "le", "30", "x", "y"), "metric"),
|
||||
alias(label_set(9, "le", "10", "x", "y"), "metric"),
|
||||
)))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{52, 52, 52, 52, 52, 52},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.MetricGroup = []byte("metric")
|
||||
r1.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("200"),
|
||||
},
|
||||
{
|
||||
Key: []byte("x"),
|
||||
Value: []byte("y"),
|
||||
},
|
||||
}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{100, 100, 100, 100, 100, 100},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.MetricGroup = []byte("metric")
|
||||
r2.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("inf"),
|
||||
},
|
||||
{
|
||||
Key: []byte("x"),
|
||||
Value: []byte("y"),
|
||||
},
|
||||
}
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`prometheus_buckets(missing-vmrange)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(prometheus_buckets((
|
||||
@@ -3220,6 +3370,28 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`sum(multi-args)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sum(1, 2, 3)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{6, 6, 6, 6, 6, 6},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`sum(union-args)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sum((1, 2, 3))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1, 1, 1, 1, 1, 1},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`sum(scalar) by ()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sum(123) by ()`
|
||||
@@ -3242,6 +3414,71 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`mode()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `mode((
|
||||
alias(3, "m1"),
|
||||
alias(2, "m2"),
|
||||
alias(3, "m3"),
|
||||
alias(4, "m4"),
|
||||
alias(3, "m5"),
|
||||
alias(2, "m6"),
|
||||
))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{3, 3, 3, 3, 3, 3},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`zscore()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort_by_label(round(zscore((
|
||||
label_set(time()/100+10, "k", "v1"),
|
||||
label_set(time()/200+5, "k", "v2"),
|
||||
label_set(time()/110-10, "k", "v3"),
|
||||
label_set(time()/90-5, "k", "v4"),
|
||||
)), 0.001), "k")`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1.482, 1.511, 1.535, 1.552, 1.564, 1.57},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("k"),
|
||||
Value: []byte("v1"),
|
||||
}}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.159, 0.058, -0.042, -0.141, -0.237, -0.329},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("k"),
|
||||
Value: []byte("v2"),
|
||||
}}
|
||||
r3 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{-1.285, -1.275, -1.261, -1.242, -1.219, -1.193},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r3.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("k"),
|
||||
Value: []byte("v3"),
|
||||
}}
|
||||
r4 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{-0.356, -0.294, -0.232, -0.17, -0.108, -0.048},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r4.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("k"),
|
||||
Value: []byte("v4"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r1, r2, r3, r4}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`avg(scalar) without (xx, yy)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `avg without (xx, yy) (123)`
|
||||
@@ -3396,7 +3633,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
Values: []float64{6.8, 8.8, 10.9, 12.9, 14.9, 16.9},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.MetricGroup = []byte("foobar")
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
@@ -3819,12 +4055,57 @@ func TestExecSuccess(t *testing.T) {
|
||||
})
|
||||
t.Run(`any()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `any(label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
q := `any(label_set(10, "__name__", "x", "foo", "bar") or label_set(time()/150, "__name__", "y", "baz", "sss"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10, 10, 10},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.MetricGroup = []byte("x")
|
||||
r.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("foo"),
|
||||
Value: []byte("bar"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`group() by (test)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `group((
|
||||
label_set(5, "__name__", "data", "test", "three samples", "point", "a"),
|
||||
label_set(6, "__name__", "data", "test", "three samples", "point", "b"),
|
||||
label_set(7, "__name__", "data", "test", "three samples", "point", "c"),
|
||||
)) by (test)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1, 1, 1, 1, 1, 1},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.MetricGroup = nil
|
||||
r.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("test"),
|
||||
Value: []byte("three samples"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`group() without (point)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `group((
|
||||
label_set(5, "__name__", "data", "test", "three samples", "point", "a"),
|
||||
label_set(6, "__name__", "data", "test", "three samples", "point", "b"),
|
||||
label_set(7, "__name__", "data", "test", "three samples", "point", "c"),
|
||||
)) without (point)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1, 1, 1, 1, 1, 1},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.MetricGroup = nil
|
||||
r.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("test"),
|
||||
Value: []byte("three samples"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
@@ -4103,6 +4384,44 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r1}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`interpolate()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `interpolate(label_set(time() < 1300 default time() > 1700, "__name__", "foobar", "x", "y"))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.MetricGroup = []byte("foobar")
|
||||
r1.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("x"),
|
||||
Value: []byte("y"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r1}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`interpolate(tail)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `interpolate(time() < 1300)`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1200, 1200, 1200, 1200},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r1}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`interpolate(head)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `interpolate(time() > 1500)`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1600, 1600, 1600, 1600, 1800, 2000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r1}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`distinct_over_time([500s])`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `distinct_over_time((time() < 1700)[500s])`
|
||||
@@ -4161,6 +4480,23 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`ifnot`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `time() ifnot time() > 1400`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, nan, nan, nan},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`ifnot-no-matching-timeseries`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `label_set(time(), "foo", "bar") ifnot label_set(time() > 1400, "x", "y")`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`quantile(-2)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `quantile(-2, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
@@ -4408,6 +4744,50 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`mode_over_time()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `mode_over_time(round(time()/500)[100s:1s])`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{2, 2, 3, 3, 4, 4},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`rate_over_sum()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `rate_over_sum(round(time()/500)[100s:5s])`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.4, 0.4, 0.6, 0.6, 0.71, 0.8},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`zscore_over_time(rand)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `round(zscore_over_time(rand(0)[100s:10s]), 0.01)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{-1.17, -0.08, 0.98, 0.67, 1.61, 1.55},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`zscore_over_time(const)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `zscore_over_time(1[100s:10s])`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0, 0, 0, 0, 0, 0},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`integrate(1)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `integrate(1)`
|
||||
@@ -5494,7 +5874,7 @@ func TestExecError(t *testing.T) {
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Step: 100,
|
||||
Deadline: netstorage.NewDeadline(time.Minute, ""),
|
||||
Deadline: netstorage.NewDeadline(time.Now(), time.Minute, ""),
|
||||
}
|
||||
for i := 0; i < 4; i++ {
|
||||
rv, err := Exec(ec, q, false)
|
||||
@@ -5558,6 +5938,8 @@ func TestExecError(t *testing.T) {
|
||||
f(`sum()`)
|
||||
f(`count_values()`)
|
||||
f(`quantile()`)
|
||||
f(`any()`)
|
||||
f(`group()`)
|
||||
f(`topk()`)
|
||||
f(`topk_min()`)
|
||||
f(`topk_max()`)
|
||||
@@ -5592,9 +5974,9 @@ func TestExecError(t *testing.T) {
|
||||
f(`label_move()`)
|
||||
f(`median_over_time()`)
|
||||
f(`median()`)
|
||||
f(`median("foo", "bar")`)
|
||||
f(`keep_last_value()`)
|
||||
f(`keep_next_value()`)
|
||||
f(`interpolate()`)
|
||||
f(`distinct_over_time()`)
|
||||
f(`distinct()`)
|
||||
f(`alias()`)
|
||||
@@ -5616,6 +5998,14 @@ func TestExecError(t *testing.T) {
|
||||
f(`hoeffding_bound_upper(0.99, foo, 1)`)
|
||||
f(`outliersk()`)
|
||||
f(`outliersk(1)`)
|
||||
f(`mode_over_time()`)
|
||||
f(`rate_over_sum()`)
|
||||
f(`zscore_over_time()`)
|
||||
f(`mode()`)
|
||||
f(`zscore()`)
|
||||
f(`prometheus_buckets()`)
|
||||
f(`buckets_limit()`)
|
||||
f(`buckets_limit(1)`)
|
||||
|
||||
// Invalid argument type
|
||||
f(`median_over_time({}, 2)`)
|
||||
@@ -5685,7 +6075,6 @@ func TestExecError(t *testing.T) {
|
||||
) + 10`)
|
||||
|
||||
// Invalid aggregates
|
||||
f(`sum(1, 2)`)
|
||||
f(`sum(1) foo (bar)`)
|
||||
f(`sum foo () (bar)`)
|
||||
f(`sum(foo) by (1)`)
|
||||
|
||||
@@ -28,8 +28,8 @@ var rollupFuncs = map[string]newRollupFunc{
|
||||
"deriv_fast": newRollupFuncOneArg(rollupDerivFast),
|
||||
"holt_winters": newRollupHoltWinters,
|
||||
"idelta": newRollupFuncOneArg(rollupIdelta),
|
||||
"increase": newRollupFuncOneArg(rollupIncrease), // + rollupFuncsRemoveCounterResets
|
||||
"irate": newRollupFuncOneArg(rollupIderiv), // + rollupFuncsRemoveCounterResets
|
||||
"increase": newRollupFuncOneArg(rollupDelta), // + rollupFuncsRemoveCounterResets
|
||||
"irate": newRollupFuncOneArg(rollupIderiv), // + rollupFuncsRemoveCounterResets
|
||||
"predict_linear": newRollupPredictLinear,
|
||||
"rate": newRollupFuncOneArg(rollupDerivFast), // + rollupFuncsRemoveCounterResets
|
||||
"resets": newRollupFuncOneArg(rollupResets),
|
||||
@@ -74,11 +74,17 @@ var rollupFuncs = map[string]newRollupFunc{
|
||||
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
|
||||
"ascent_over_time": newRollupFuncOneArg(rollupAscentOverTime),
|
||||
"descent_over_time": newRollupFuncOneArg(rollupDescentOverTime),
|
||||
"zscore_over_time": newRollupFuncOneArg(rollupZScoreOverTime),
|
||||
|
||||
// `timestamp` function must return timestamp for the last datapoint on the current window
|
||||
// in order to properly handle offset and timestamps unaligned to the current step.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415 for details.
|
||||
"timestamp": newRollupFuncOneArg(rollupTimestamp),
|
||||
|
||||
// See https://en.wikipedia.org/wiki/Mode_(statistics)
|
||||
"mode_over_time": newRollupFuncOneArg(rollupModeOverTime),
|
||||
|
||||
"rate_over_sum": newRollupFuncOneArg(rollupRateOverSum),
|
||||
}
|
||||
|
||||
// rollupAggrFuncs are functions that can be passed to `aggr_over_time()`
|
||||
@@ -89,7 +95,7 @@ var rollupAggrFuncs = map[string]rollupFunc{
|
||||
"deriv": rollupDerivSlow,
|
||||
"deriv_fast": rollupDerivFast,
|
||||
"idelta": rollupIdelta,
|
||||
"increase": rollupIncrease, // + rollupFuncsRemoveCounterResets
|
||||
"increase": rollupDelta, // + rollupFuncsRemoveCounterResets
|
||||
"irate": rollupIderiv, // + rollupFuncsRemoveCounterResets
|
||||
"rate": rollupDerivFast, // + rollupFuncsRemoveCounterResets
|
||||
"resets": rollupResets,
|
||||
@@ -120,7 +126,10 @@ var rollupAggrFuncs = map[string]rollupFunc{
|
||||
"tmax_over_time": rollupTmax,
|
||||
"ascent_over_time": rollupAscentOverTime,
|
||||
"descent_over_time": rollupDescentOverTime,
|
||||
"zscore_over_time": rollupZScoreOverTime,
|
||||
"timestamp": rollupTimestamp,
|
||||
"mode_over_time": rollupModeOverTime,
|
||||
"rate_over_sum": rollupRateOverSum,
|
||||
}
|
||||
|
||||
var rollupFuncsCannotAdjustWindow = map[string]bool{
|
||||
@@ -131,6 +140,7 @@ var rollupFuncsCannotAdjustWindow = map[string]bool{
|
||||
"increase": true,
|
||||
"predict_linear": true,
|
||||
"resets": true,
|
||||
"avg_over_time": true,
|
||||
"sum_over_time": true,
|
||||
"count_over_time": true,
|
||||
"quantile_over_time": true,
|
||||
@@ -145,6 +155,7 @@ var rollupFuncsCannotAdjustWindow = map[string]bool{
|
||||
"integrate": true,
|
||||
"ascent_over_time": true,
|
||||
"descent_over_time": true,
|
||||
"zscore_over_time": true,
|
||||
}
|
||||
|
||||
var rollupFuncsRemoveCounterResets = map[string]bool{
|
||||
@@ -157,12 +168,7 @@ var rollupFuncsRemoveCounterResets = map[string]bool{
|
||||
|
||||
var rollupFuncsKeepMetricGroup = map[string]bool{
|
||||
"default_rollup": true,
|
||||
"avg_over_time": true,
|
||||
"min_over_time": true,
|
||||
"max_over_time": true,
|
||||
"quantile_over_time": true,
|
||||
"rollup": true,
|
||||
"geomean_over_time": true,
|
||||
"hoeffding_bound_lower": true,
|
||||
"hoeffding_bound_upper": true,
|
||||
"first_over_time": true,
|
||||
@@ -285,7 +291,7 @@ func getRollupConfigs(name string, rf rollupFunc, expr metricsql.Expr, start, en
|
||||
case "aggr_over_time":
|
||||
aggrFuncNames, err := getRollupAggrFuncNames(expr)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("invalid args to %s: %s", expr.AppendString(nil), err)
|
||||
return nil, nil, fmt.Errorf("invalid args to %s: %w", expr.AppendString(nil), err)
|
||||
}
|
||||
for _, aggrFuncName := range aggrFuncNames {
|
||||
if rollupFuncsRemoveCounterResets[aggrFuncName] {
|
||||
@@ -316,11 +322,7 @@ type rollupFuncArg struct {
|
||||
|
||||
currTimestamp int64
|
||||
idx int
|
||||
step int64
|
||||
|
||||
// Real previous value even if it is located too far from the current window.
|
||||
// It matches prevValue if prevValue is not nan.
|
||||
realPrevValue float64
|
||||
window int64
|
||||
|
||||
tsm *timeseriesMap
|
||||
}
|
||||
@@ -332,8 +334,7 @@ func (rfa *rollupFuncArg) reset() {
|
||||
rfa.timestamps = nil
|
||||
rfa.currTimestamp = 0
|
||||
rfa.idx = 0
|
||||
rfa.step = 0
|
||||
rfa.realPrevValue = nan
|
||||
rfa.window = 0
|
||||
rfa.tsm = nil
|
||||
}
|
||||
|
||||
@@ -477,8 +478,7 @@ func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, valu
|
||||
}
|
||||
rfa := getRollupFuncArg()
|
||||
rfa.idx = 0
|
||||
rfa.step = rc.Step
|
||||
rfa.realPrevValue = nan
|
||||
rfa.window = window
|
||||
rfa.tsm = tsm
|
||||
|
||||
i := 0
|
||||
@@ -505,9 +505,6 @@ func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, valu
|
||||
rfa.values = values[i:j]
|
||||
rfa.timestamps = timestamps[i:j]
|
||||
rfa.currTimestamp = tEnd
|
||||
if i > 0 {
|
||||
rfa.realPrevValue = values[i-1]
|
||||
}
|
||||
value := rc.Func(rfa)
|
||||
rfa.idx++
|
||||
dstValues = append(dstValues, value)
|
||||
@@ -1077,6 +1074,28 @@ func rollupSum(rfa *rollupFuncArg) float64 {
|
||||
return sum
|
||||
}
|
||||
|
||||
func rollupRateOverSum(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
timestamps := rfa.timestamps
|
||||
if len(timestamps) == 0 {
|
||||
if math.IsNaN(rfa.prevValue) {
|
||||
return nan
|
||||
}
|
||||
// Assume that the value didn't change since rfa.prevValue.
|
||||
return 0
|
||||
}
|
||||
dt := rfa.window
|
||||
if !math.IsNaN(rfa.prevValue) {
|
||||
dt = timestamps[len(timestamps)-1] - rfa.prevTimestamp
|
||||
}
|
||||
sum := float64(0)
|
||||
for _, v := range rfa.values {
|
||||
sum += v
|
||||
}
|
||||
return sum / (float64(dt) / 1e3)
|
||||
}
|
||||
|
||||
func rollupRange(rfa *rollupFuncArg) float64 {
|
||||
max := rollupMax(rfa)
|
||||
min := rollupMin(rfa)
|
||||
@@ -1165,14 +1184,6 @@ func rollupStdvar(rfa *rollupFuncArg) float64 {
|
||||
}
|
||||
|
||||
func rollupDelta(rfa *rollupFuncArg) float64 {
|
||||
return rollupDeltaInternal(rfa, false)
|
||||
}
|
||||
|
||||
func rollupIncrease(rfa *rollupFuncArg) float64 {
|
||||
return rollupDeltaInternal(rfa, true)
|
||||
}
|
||||
|
||||
func rollupDeltaInternal(rfa *rollupFuncArg, canUseRealPrevValue bool) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
values := rfa.values
|
||||
@@ -1182,17 +1193,23 @@ func rollupDeltaInternal(rfa *rollupFuncArg, canUseRealPrevValue bool) float64 {
|
||||
return nan
|
||||
}
|
||||
// Assume that the previous non-existing value was 0
|
||||
// only if the first value is quite small.
|
||||
// only if the first value doesn't exceed too much the delta with the next value.
|
||||
//
|
||||
// This should prevent from improper increase() results for os-level counters
|
||||
// such as cpu time or bytes sent over the network interface.
|
||||
// These counters may start long ago before the first value appears in the db.
|
||||
if values[0] < 1e6 {
|
||||
//
|
||||
// This also should prevent from improper increase() results when a part of label values are changed
|
||||
// without counter reset.
|
||||
d := float64(10)
|
||||
if len(values) > 1 {
|
||||
d = values[1] - values[0]
|
||||
}
|
||||
if math.Abs(values[0]) < 10*(math.Abs(d)+1) {
|
||||
prevValue = 0
|
||||
if canUseRealPrevValue && !math.IsNaN(rfa.realPrevValue) {
|
||||
prevValue = rfa.realPrevValue
|
||||
}
|
||||
} else {
|
||||
prevValue = values[0]
|
||||
values = values[1:]
|
||||
}
|
||||
}
|
||||
if len(values) == 0 {
|
||||
@@ -1459,7 +1476,7 @@ func getCandlestickValues(rfa *rollupFuncArg) []float64 {
|
||||
}
|
||||
|
||||
func getFirstValueForCandlestick(rfa *rollupFuncArg) float64 {
|
||||
if rfa.prevTimestamp+rfa.step >= rfa.currTimestamp {
|
||||
if rfa.prevTimestamp+rfa.window >= rfa.currTimestamp {
|
||||
return rfa.prevValue
|
||||
}
|
||||
return nan
|
||||
@@ -1534,6 +1551,12 @@ func rollupTimestamp(rfa *rollupFuncArg) float64 {
|
||||
return float64(timestamps[len(timestamps)-1]) / 1e3
|
||||
}
|
||||
|
||||
func rollupModeOverTime(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
return modeNoNaNs(rfa.prevValue, rfa.values)
|
||||
}
|
||||
|
||||
func rollupAscentOverTime(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
@@ -1580,6 +1603,20 @@ func rollupDescentOverTime(rfa *rollupFuncArg) float64 {
|
||||
return s
|
||||
}
|
||||
|
||||
func rollupZScoreOverTime(rfa *rollupFuncArg) float64 {
|
||||
// See https://about.gitlab.com/blog/2019/07/23/anomaly-detection-using-prometheus/#using-z-score-for-anomaly-detection
|
||||
scrapeInterval := rollupScrapeInterval(rfa)
|
||||
lag := rollupLag(rfa)
|
||||
if math.IsNaN(scrapeInterval) || math.IsNaN(lag) || lag > scrapeInterval {
|
||||
return nan
|
||||
}
|
||||
d := rollupLast(rfa) - rollupAvg(rfa)
|
||||
if d == 0 {
|
||||
return 0
|
||||
}
|
||||
return d / rollupStddev(rfa)
|
||||
}
|
||||
|
||||
func rollupFirst(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")
|
||||
cacheTimestampOffset = flag.Duration("search.cacheTimestampOffset", 5*time.Minute, "The maximum duration since the current time for response data, "+
|
||||
"which is always queried from the original raw data, without using the response cache. Increase this value if you see gaps in responses "+
|
||||
"due to time synchronization issues between VictoriaMetrics and data sources")
|
||||
@@ -137,7 +136,7 @@ func ResetRollupResultCache() {
|
||||
}
|
||||
|
||||
func (rrc *rollupResultCache) Get(ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) {
|
||||
if *disableCache || !ec.mayCache() {
|
||||
if !ec.mayCache() {
|
||||
return nil, ec.Start
|
||||
}
|
||||
|
||||
@@ -218,7 +217,7 @@ func (rrc *rollupResultCache) Get(ec *EvalConfig, expr metricsql.Expr, window in
|
||||
var resultBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func (rrc *rollupResultCache) Put(ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) {
|
||||
if *disableCache || len(tss) == 0 || !ec.mayCache() {
|
||||
if len(tss) == 0 || !ec.mayCache() {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -286,7 +285,7 @@ var (
|
||||
var buf [8]byte
|
||||
if _, err := rand.Read(buf[:]); err != nil {
|
||||
// do not use logger.Panicf, since it isn't initialized yet.
|
||||
panic(fmt.Errorf("FATAL: cannot read random data for rollupResultCacheKeyPrefix: %s", err))
|
||||
panic(fmt.Errorf("FATAL: cannot read random data for rollupResultCacheKeyPrefix: %w", err))
|
||||
}
|
||||
return encoding.UnmarshalUint64(buf[:])
|
||||
}()
|
||||
@@ -414,7 +413,7 @@ func (mi *rollupResultCacheMetainfo) Unmarshal(src []byte) error {
|
||||
for i := 0; i < entriesLen; i++ {
|
||||
tail, err := mi.entries[i].Unmarshal(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal entry #%d: %s", i, err)
|
||||
return fmt.Errorf("cannot unmarshal entry #%d: %w", i, err)
|
||||
}
|
||||
src = tail
|
||||
}
|
||||
|
||||
@@ -174,6 +174,7 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpecte
|
||||
rfa.prevTimestamp = 0
|
||||
rfa.values = append(rfa.values, testValues...)
|
||||
rfa.timestamps = append(rfa.timestamps, testTimestamps...)
|
||||
rfa.window = rfa.timestamps[len(rfa.timestamps)-1] - rfa.timestamps[0]
|
||||
if rollupFuncsRemoveCounterResets[funcName] {
|
||||
removeCounterResets(rfa.values)
|
||||
}
|
||||
@@ -391,7 +392,10 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
|
||||
f("increases_over_time", 5)
|
||||
f("ascent_over_time", 142)
|
||||
f("descent_over_time", 231)
|
||||
f("zscore_over_time", -0.4254336383156416)
|
||||
f("timestamp", 0.13)
|
||||
f("mode_over_time", 34)
|
||||
f("rate_over_sum", 4520)
|
||||
}
|
||||
|
||||
func TestRollupNewRollupFuncError(t *testing.T) {
|
||||
@@ -952,6 +956,48 @@ func TestRollupFuncsNoWindow(t *testing.T) {
|
||||
timestampsExpected := []int64{0, 40, 80, 120, 160}
|
||||
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
|
||||
})
|
||||
t.Run("mode_over_time", func(t *testing.T) {
|
||||
rc := rollupConfig{
|
||||
Func: rollupModeOverTime,
|
||||
Start: 0,
|
||||
End: 160,
|
||||
Step: 40,
|
||||
Window: 80,
|
||||
}
|
||||
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
|
||||
values := rc.Do(nil, testValues, testTimestamps)
|
||||
valuesExpected := []float64{nan, nan, 34, 44, 44}
|
||||
timestampsExpected := []int64{0, 40, 80, 120, 160}
|
||||
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
|
||||
})
|
||||
t.Run("rate_over_sum", func(t *testing.T) {
|
||||
rc := rollupConfig{
|
||||
Func: rollupRateOverSum,
|
||||
Start: 0,
|
||||
End: 160,
|
||||
Step: 40,
|
||||
Window: 80,
|
||||
}
|
||||
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
|
||||
values := rc.Do(nil, testValues, testTimestamps)
|
||||
valuesExpected := []float64{nan, 1262.5, 3187.5, 4059.523809523809, 6200}
|
||||
timestampsExpected := []int64{0, 40, 80, 120, 160}
|
||||
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
|
||||
})
|
||||
t.Run("zscore_over_time", func(t *testing.T) {
|
||||
rc := rollupConfig{
|
||||
Func: rollupZScoreOverTime,
|
||||
Start: 0,
|
||||
End: 160,
|
||||
Step: 40,
|
||||
Window: 80,
|
||||
}
|
||||
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
|
||||
values := rc.Do(nil, testValues, testTimestamps)
|
||||
valuesExpected := []float64{nan, 0.9397878236968458, 1.1969836716333457, 2.3112921116373175, nan}
|
||||
timestampsExpected := []int64{0, 40, 80, 120, 160}
|
||||
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRollupBigNumberOfValues(t *testing.T) {
|
||||
@@ -1009,3 +1055,41 @@ func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRollupDelta(t *testing.T) {
|
||||
f := func(prevValue float64, values []float64, resultExpected float64) {
|
||||
t.Helper()
|
||||
rfa := &rollupFuncArg{
|
||||
prevValue: prevValue,
|
||||
values: values,
|
||||
}
|
||||
result := rollupDelta(rfa)
|
||||
if math.IsNaN(result) {
|
||||
if !math.IsNaN(resultExpected) {
|
||||
t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
|
||||
}
|
||||
return
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
|
||||
}
|
||||
}
|
||||
f(nan, nil, nan)
|
||||
|
||||
// Small initial value
|
||||
f(nan, []float64{1}, 1)
|
||||
f(nan, []float64{10}, 10)
|
||||
f(nan, []float64{100}, 100)
|
||||
f(nan, []float64{1, 2, 3}, 3)
|
||||
f(1, []float64{1, 2, 3}, 2)
|
||||
f(nan, []float64{5, 6, 8}, 8)
|
||||
f(2, []float64{5, 6, 8}, 6)
|
||||
|
||||
// Too big initial value must be skipped.
|
||||
f(nan, []float64{1000}, 0)
|
||||
f(nan, []float64{1000, 1001, 1002}, 2)
|
||||
|
||||
// Empty values
|
||||
f(1, nil, 0)
|
||||
f(100, nil, 0)
|
||||
}
|
||||
|
||||
@@ -217,7 +217,7 @@ func (ts *timeseries) unmarshalFastNoTimestamps(src []byte) ([]byte, error) {
|
||||
|
||||
tail, err := unmarshalMetricNameFast(&ts.MetricName, src)
|
||||
if err != nil {
|
||||
return tail, fmt.Errorf("cannot unmarshal MetricName: %s", err)
|
||||
return tail, fmt.Errorf("cannot unmarshal MetricName: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
@@ -275,7 +275,7 @@ func unmarshalMetricNameFast(mn *storage.MetricName, src []byte) ([]byte, error)
|
||||
|
||||
tail, metricGroup, err := unmarshalBytesFast(src)
|
||||
if err != nil {
|
||||
return tail, fmt.Errorf("cannot unmarshal MetricGroup: %s", err)
|
||||
return tail, fmt.Errorf("cannot unmarshal MetricGroup: %w", err)
|
||||
}
|
||||
src = tail
|
||||
mn.MetricGroup = metricGroup[:len(metricGroup):len(metricGroup)]
|
||||
@@ -292,13 +292,13 @@ func unmarshalMetricNameFast(mn *storage.MetricName, src []byte) ([]byte, error)
|
||||
for i := range mn.Tags {
|
||||
tail, key, err := unmarshalBytesFast(src)
|
||||
if err != nil {
|
||||
return tail, fmt.Errorf("cannot unmarshal key for tag[%d]: %s", i, err)
|
||||
return tail, fmt.Errorf("cannot unmarshal key for tag[%d]: %w", i, err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
tail, value, err := unmarshalBytesFast(src)
|
||||
if err != nil {
|
||||
return tail, fmt.Errorf("cannot unmarshal value for tag[%d]: %s", i, err)
|
||||
return tail, fmt.Errorf("cannot unmarshal value for tag[%d]: %w", i, err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
|
||||
@@ -17,14 +17,6 @@ import (
|
||||
"github.com/valyala/histogram"
|
||||
)
|
||||
|
||||
var transformFuncsKeepMetricGroup = map[string]bool{
|
||||
"ceil": true,
|
||||
"clamp_max": true,
|
||||
"clamp_min": true,
|
||||
"floor": true,
|
||||
"round": true,
|
||||
}
|
||||
|
||||
var transformFuncs = map[string]transformFunc{
|
||||
// Standard promql funcs
|
||||
// See funcs accepting instant-vector on https://prometheus.io/docs/prometheus/latest/querying/functions/ .
|
||||
@@ -72,6 +64,7 @@ var transformFuncs = map[string]transformFunc{
|
||||
"": transformUnion, // empty func is a synonim to union
|
||||
"keep_last_value": transformKeepLastValue,
|
||||
"keep_next_value": transformKeepNextValue,
|
||||
"interpolate": transformInterpolate,
|
||||
"start": newTransformFuncZeroArgs(transformStart),
|
||||
"end": newTransformFuncZeroArgs(transformEnd),
|
||||
"step": newTransformFuncZeroArgs(transformStep),
|
||||
@@ -97,6 +90,7 @@ var transformFuncs = map[string]transformFunc{
|
||||
"asin": newTransformFuncOneArg(transformAsin),
|
||||
"acos": newTransformFuncOneArg(transformAcos),
|
||||
"prometheus_buckets": transformPrometheusBuckets,
|
||||
"buckets_limit": transformBucketsLimit,
|
||||
"histogram_share": transformHistogramShare,
|
||||
"sort_by_label": newTransformFuncSortByLabel(false),
|
||||
"sort_by_label_desc": newTransformFuncSortByLabel(true),
|
||||
@@ -131,12 +125,8 @@ func newTransformFuncOneArg(tf func(v float64) float64) transformFunc {
|
||||
}
|
||||
|
||||
func doTransformValues(arg []*timeseries, tf func(values []float64), fe *metricsql.FuncExpr) ([]*timeseries, error) {
|
||||
name := strings.ToLower(fe.Name)
|
||||
keepMetricGroup := transformFuncsKeepMetricGroup[name]
|
||||
for _, ts := range arg {
|
||||
if !keepMetricGroup {
|
||||
ts.MetricName.ResetMetricGroup()
|
||||
}
|
||||
ts.MetricName.ResetMetricGroup()
|
||||
tf(ts.Values)
|
||||
}
|
||||
return arg, nil
|
||||
@@ -281,6 +271,101 @@ func transformFloor(v float64) float64 {
|
||||
return math.Floor(v)
|
||||
}
|
||||
|
||||
func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
limits, err := getScalar(args[0], 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
limit := int(limits[0])
|
||||
if limit <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
tss := vmrangeBucketsToLE(args[1])
|
||||
if len(tss) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Group timeseries by all MetricGroup+tags excluding `le` tag.
|
||||
type x struct {
|
||||
le float64
|
||||
hits float64
|
||||
ts *timeseries
|
||||
}
|
||||
m := make(map[string][]x)
|
||||
var b []byte
|
||||
var mn storage.MetricName
|
||||
for _, ts := range tss {
|
||||
leStr := ts.MetricName.GetTagValue("le")
|
||||
if len(leStr) == 0 {
|
||||
// Skip time series without `le` tag.
|
||||
continue
|
||||
}
|
||||
le, err := strconv.ParseFloat(string(leStr), 64)
|
||||
if err != nil {
|
||||
// Skip time series with invalid `le` tag.
|
||||
continue
|
||||
}
|
||||
mn.CopyFrom(&ts.MetricName)
|
||||
mn.RemoveTag("le")
|
||||
b = marshalMetricNameSorted(b[:0], &mn)
|
||||
m[string(b)] = append(m[string(b)], x{
|
||||
le: le,
|
||||
ts: ts,
|
||||
})
|
||||
}
|
||||
|
||||
// Remove buckets with the smallest counters.
|
||||
rvs := make([]*timeseries, 0, len(tss))
|
||||
for _, leGroup := range m {
|
||||
if len(leGroup) <= limit {
|
||||
// Fast path - the number of buckets doesn't exceed the given limit.
|
||||
// Keep all the buckets as is.
|
||||
for _, xx := range leGroup {
|
||||
rvs = append(rvs, xx.ts)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Slow path - remove buckets with the smallest number of hits until their count reaches the limit.
|
||||
|
||||
// Calculate per-bucket hits.
|
||||
sort.Slice(leGroup, func(i, j int) bool {
|
||||
return leGroup[i].le < leGroup[j].le
|
||||
})
|
||||
for n := range limits {
|
||||
prevValue := float64(0)
|
||||
for i := range leGroup {
|
||||
xx := &leGroup[i]
|
||||
value := xx.ts.Values[n]
|
||||
xx.hits += value - prevValue
|
||||
prevValue = value
|
||||
}
|
||||
}
|
||||
for len(leGroup) > limit {
|
||||
xxMinIdx := 0
|
||||
for i, xx := range leGroup {
|
||||
if xx.hits < leGroup[xxMinIdx].hits {
|
||||
xxMinIdx = i
|
||||
}
|
||||
}
|
||||
// Merge the leGroup[xxMinIdx] bucket with the smallest adjacent bucket in order to preserve
|
||||
// the maximum accuracy.
|
||||
if xxMinIdx+1 == len(leGroup) || (xxMinIdx > 0 && leGroup[xxMinIdx-1].hits < leGroup[xxMinIdx+1].hits) {
|
||||
xxMinIdx--
|
||||
}
|
||||
leGroup[xxMinIdx+1].hits += leGroup[xxMinIdx].hits
|
||||
leGroup = append(leGroup[:xxMinIdx], leGroup[xxMinIdx+1:]...)
|
||||
}
|
||||
for _, xx := range leGroup {
|
||||
rvs = append(rvs, xx.ts)
|
||||
}
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func transformPrometheusBuckets(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
@@ -414,7 +499,7 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
les, err := getScalar(args[0], 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse le: %s", err)
|
||||
return nil, fmt.Errorf("cannot parse le: %w", err)
|
||||
}
|
||||
|
||||
// Convert buckets with `vmrange` labels to buckets with `le` labels.
|
||||
@@ -425,7 +510,7 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
if len(args) > 2 {
|
||||
s, err := getString(args[2], 2)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse boundsLabel (arg #3): %s", err)
|
||||
return nil, fmt.Errorf("cannot parse boundsLabel (arg #3): %w", err)
|
||||
}
|
||||
boundsLabel = s
|
||||
}
|
||||
@@ -513,7 +598,7 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
phis, err := getScalar(args[0], 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse phi: %s", err)
|
||||
return nil, fmt.Errorf("cannot parse phi: %w", err)
|
||||
}
|
||||
|
||||
// Convert buckets with `vmrange` labels to buckets with `le` labels.
|
||||
@@ -524,7 +609,7 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
if len(args) > 2 {
|
||||
s, err := getString(args[2], 2)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse boundsLabel (arg #3): %s", err)
|
||||
return nil, fmt.Errorf("cannot parse boundsLabel (arg #3): %w", err)
|
||||
}
|
||||
boundsLabel = s
|
||||
}
|
||||
@@ -764,6 +849,52 @@ func transformKeepNextValue(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func transformInterpolate(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rvs := args[0]
|
||||
for _, ts := range rvs {
|
||||
values := ts.Values
|
||||
if len(values) == 0 {
|
||||
continue
|
||||
}
|
||||
prevValue := nan
|
||||
var nextValue float64
|
||||
for i := 0; i < len(values); i++ {
|
||||
if !math.IsNaN(values[i]) {
|
||||
continue
|
||||
}
|
||||
if i > 0 {
|
||||
prevValue = values[i-1]
|
||||
}
|
||||
j := i + 1
|
||||
for j < len(values) {
|
||||
if !math.IsNaN(values[j]) {
|
||||
break
|
||||
}
|
||||
j++
|
||||
}
|
||||
if j >= len(values) {
|
||||
nextValue = prevValue
|
||||
} else {
|
||||
nextValue = values[j]
|
||||
}
|
||||
if math.IsNaN(prevValue) {
|
||||
prevValue = nextValue
|
||||
}
|
||||
delta := (nextValue - prevValue) / float64(j-i+1)
|
||||
for i < j {
|
||||
prevValue += delta
|
||||
values[i] = prevValue
|
||||
i++
|
||||
}
|
||||
}
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func newTransformFuncRunning(rf func(a, b float64, idx int) float64) transformFunc {
|
||||
return func(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
@@ -1034,7 +1165,7 @@ func transformLabelMap(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
label, err := getString(args[1], 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read label name: %s", err)
|
||||
return nil, fmt.Errorf("cannot read label name: %w", err)
|
||||
}
|
||||
srcValues, dstValues, err := getStringPairs(args[2:])
|
||||
if err != nil {
|
||||
@@ -1179,7 +1310,7 @@ func transformLabelTransform(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
|
||||
r, err := metricsql.CompileRegexp(regex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
|
||||
return nil, fmt.Errorf(`cannot compile regex %q: %w`, regex, err)
|
||||
}
|
||||
return labelReplace(args[0], label, r, label, replacement)
|
||||
}
|
||||
@@ -1208,7 +1339,7 @@ func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
|
||||
r, err := metricsql.CompileRegexpAnchored(regex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
|
||||
return nil, fmt.Errorf(`cannot compile regex %q: %w`, regex, err)
|
||||
}
|
||||
return labelReplace(args[0], srcLabel, r, dstLabel, replacement)
|
||||
}
|
||||
@@ -1238,7 +1369,7 @@ func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
labelName, err := getString(args[1], 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get label name: %s", err)
|
||||
return nil, fmt.Errorf("cannot get label name: %w", err)
|
||||
}
|
||||
rvs := args[0]
|
||||
for _, ts := range rvs {
|
||||
@@ -1265,15 +1396,15 @@ func transformLabelMatch(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
labelName, err := getString(args[1], 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get label name: %s", err)
|
||||
return nil, fmt.Errorf("cannot get label name: %w", err)
|
||||
}
|
||||
labelRe, err := getString(args[2], 2)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get regexp: %s", err)
|
||||
return nil, fmt.Errorf("cannot get regexp: %w", err)
|
||||
}
|
||||
r, err := metricsql.CompileRegexpAnchored(labelRe)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
|
||||
return nil, fmt.Errorf(`cannot compile regexp %q: %w`, labelRe, err)
|
||||
}
|
||||
tss := args[0]
|
||||
rvs := tss[:0]
|
||||
@@ -1293,15 +1424,15 @@ func transformLabelMismatch(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
labelName, err := getString(args[1], 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get label name: %s", err)
|
||||
return nil, fmt.Errorf("cannot get label name: %w", err)
|
||||
}
|
||||
labelRe, err := getString(args[2], 2)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get regexp: %s", err)
|
||||
return nil, fmt.Errorf("cannot get regexp: %w", err)
|
||||
}
|
||||
r, err := metricsql.CompileRegexpAnchored(labelRe)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
|
||||
return nil, fmt.Errorf(`cannot compile regexp %q: %w`, labelRe, err)
|
||||
}
|
||||
tss := args[0]
|
||||
rvs := tss[:0]
|
||||
@@ -1401,7 +1532,7 @@ func newTransformFuncSortByLabel(isDesc bool) transformFunc {
|
||||
}
|
||||
label, err := getString(args[1], 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse label name for sorting: %s", err)
|
||||
return nil, fmt.Errorf("cannot parse label name for sorting: %w", err)
|
||||
}
|
||||
rvs := args[0]
|
||||
sort.SliceStable(rvs, func(i, j int) bool {
|
||||
@@ -1554,11 +1685,16 @@ func transformEnd(tfa *transformFuncArg) float64 {
|
||||
return float64(tfa.ec.End) * 1e-3
|
||||
}
|
||||
|
||||
// copyTimeseriesMetricNames returns a copy of arg with real copy of MetricNames,
|
||||
// but with shallow copy of Timestamps and Values.
|
||||
func copyTimeseriesMetricNames(arg []*timeseries) []*timeseries {
|
||||
rvs := make([]*timeseries, len(arg))
|
||||
for i, src := range arg {
|
||||
// copyTimeseriesMetricNames returns a copy of tss with real copy of MetricNames,
|
||||
// but with shallow copy of Timestamps and Values if makeCopy is set.
|
||||
//
|
||||
// Otherwise tss is returned.
|
||||
func copyTimeseriesMetricNames(tss []*timeseries, makeCopy bool) []*timeseries {
|
||||
if !makeCopy {
|
||||
return tss
|
||||
}
|
||||
rvs := make([]*timeseries, len(tss))
|
||||
for i, src := range tss {
|
||||
var dst timeseries
|
||||
dst.CopyFromMetricNames(src)
|
||||
rvs[i] = &dst
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -28,8 +29,27 @@ var (
|
||||
|
||||
bigMergeConcurrency = flag.Int("bigMergeConcurrency", 0, "The maximum number of CPU cores to use for big merges. Default value is used if set to 0")
|
||||
smallMergeConcurrency = flag.Int("smallMergeConcurrency", 0, "The maximum number of CPU cores to use for small merges. Default value is used if set to 0")
|
||||
|
||||
denyQueriesOutsideRetention = flag.Bool("denyQueriesOutsideRetention", false, "Whether to deny queries outside of the configured -retentionPeriod. "+
|
||||
"When set, then /api/v1/query_range would return '503 Service Unavailable' error for queries with 'from' value outside -retentionPeriod. "+
|
||||
"This may be useful when multiple data sources with distinct retentions are hidden behind query-tee")
|
||||
)
|
||||
|
||||
// CheckTimeRange returns true if the given tr is denied for querying.
|
||||
func CheckTimeRange(tr storage.TimeRange) error {
|
||||
if !*denyQueriesOutsideRetention {
|
||||
return nil
|
||||
}
|
||||
minAllowedTimestamp := (int64(fasttime.UnixTimestamp()) - int64(*retentionPeriod)*3600*24*30) * 1000
|
||||
if tr.MinTimestamp > minAllowedTimestamp {
|
||||
return nil
|
||||
}
|
||||
return &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("the given time range %s is outside the allowed retention of %d months according to -denyQueriesOutsideRetention", &tr, *retentionPeriod),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes vmstorage.
|
||||
func Init() {
|
||||
InitWithoutMetrics()
|
||||
@@ -97,41 +117,41 @@ func DeleteMetrics(tfss []*storage.TagFilters) (int, error) {
|
||||
}
|
||||
|
||||
// SearchTagKeys searches for tag keys
|
||||
func SearchTagKeys(maxTagKeys int) ([]string, error) {
|
||||
func SearchTagKeys(maxTagKeys int, deadline uint64) ([]string, error) {
|
||||
WG.Add(1)
|
||||
keys, err := Storage.SearchTagKeys(maxTagKeys)
|
||||
keys, err := Storage.SearchTagKeys(maxTagKeys, deadline)
|
||||
WG.Done()
|
||||
return keys, err
|
||||
}
|
||||
|
||||
// SearchTagValues searches for tag values for the given tagKey
|
||||
func SearchTagValues(tagKey []byte, maxTagValues int) ([]string, error) {
|
||||
func SearchTagValues(tagKey []byte, maxTagValues int, deadline uint64) ([]string, error) {
|
||||
WG.Add(1)
|
||||
values, err := Storage.SearchTagValues(tagKey, maxTagValues)
|
||||
values, err := Storage.SearchTagValues(tagKey, maxTagValues, deadline)
|
||||
WG.Done()
|
||||
return values, err
|
||||
}
|
||||
|
||||
// SearchTagEntries searches for tag entries.
|
||||
func SearchTagEntries(maxTagKeys, maxTagValues int) ([]storage.TagEntry, error) {
|
||||
func SearchTagEntries(maxTagKeys, maxTagValues int, deadline uint64) ([]storage.TagEntry, error) {
|
||||
WG.Add(1)
|
||||
tagEntries, err := Storage.SearchTagEntries(maxTagKeys, maxTagValues)
|
||||
tagEntries, err := Storage.SearchTagEntries(maxTagKeys, maxTagValues, deadline)
|
||||
WG.Done()
|
||||
return tagEntries, err
|
||||
}
|
||||
|
||||
// GetTSDBStatusForDate returns TSDB status for the given date.
|
||||
func GetTSDBStatusForDate(date uint64, topN int) (*storage.TSDBStatus, error) {
|
||||
func GetTSDBStatusForDate(date uint64, topN int, deadline uint64) (*storage.TSDBStatus, error) {
|
||||
WG.Add(1)
|
||||
status, err := Storage.GetTSDBStatusForDate(date, topN)
|
||||
status, err := Storage.GetTSDBStatusForDate(date, topN, deadline)
|
||||
WG.Done()
|
||||
return status, err
|
||||
}
|
||||
|
||||
// GetSeriesCount returns the number of time series in the storage.
|
||||
func GetSeriesCount() (uint64, error) {
|
||||
func GetSeriesCount(deadline uint64) (uint64, error) {
|
||||
WG.Add(1)
|
||||
n, err := Storage.GetSeriesCount()
|
||||
n, err := Storage.GetSeriesCount(deadline)
|
||||
WG.Done()
|
||||
return n, err
|
||||
}
|
||||
@@ -161,7 +181,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
authKey := r.FormValue("authKey")
|
||||
if authKey != *snapshotAuthKey {
|
||||
httpserver.Errorf(w, "invalid authKey %q. It must match the value from -snapshotAuthKey command line flag", authKey)
|
||||
httpserver.Errorf(w, r, "invalid authKey %q. It must match the value from -snapshotAuthKey command line flag", authKey)
|
||||
return true
|
||||
}
|
||||
path = path[len("/snapshot"):]
|
||||
@@ -171,7 +191,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshotPath, err := Storage.CreateSnapshot()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("cannot create snapshot: %s", err)
|
||||
err = fmt.Errorf("cannot create snapshot: %w", err)
|
||||
jsonResponseError(w, err)
|
||||
return true
|
||||
}
|
||||
@@ -185,7 +205,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshots, err := Storage.ListSnapshots()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("cannot list snapshots: %s", err)
|
||||
err = fmt.Errorf("cannot list snapshots: %w", err)
|
||||
jsonResponseError(w, err)
|
||||
return true
|
||||
}
|
||||
@@ -202,7 +222,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshotName := r.FormValue("snapshot")
|
||||
if err := Storage.DeleteSnapshot(snapshotName); err != nil {
|
||||
err = fmt.Errorf("cannot delete snapshot %q: %s", snapshotName, err)
|
||||
err = fmt.Errorf("cannot delete snapshot %q: %w", snapshotName, err)
|
||||
jsonResponseError(w, err)
|
||||
return true
|
||||
}
|
||||
@@ -212,13 +232,13 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshots, err := Storage.ListSnapshots()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("cannot list snapshots: %s", err)
|
||||
err = fmt.Errorf("cannot list snapshots: %w", err)
|
||||
jsonResponseError(w, err)
|
||||
return true
|
||||
}
|
||||
for _, snapshotName := range snapshots {
|
||||
if err := Storage.DeleteSnapshot(snapshotName); err != nil {
|
||||
err = fmt.Errorf("cannot delete snapshot %q: %s", snapshotName, err)
|
||||
err = fmt.Errorf("cannot delete snapshot %q: %w", snapshotName, err)
|
||||
jsonResponseError(w, err)
|
||||
return true
|
||||
}
|
||||
@@ -409,6 +429,23 @@ func registerStorageMetrics() {
|
||||
return float64(m().AddRowsConcurrencyCurrent)
|
||||
})
|
||||
|
||||
metrics.NewGauge(`vm_concurrent_search_tsids_limit_reached_total`, func() float64 {
|
||||
return float64(m().SearchTSIDsConcurrencyLimitReached)
|
||||
})
|
||||
metrics.NewGauge(`vm_concurrent_search_tsids_limit_timeout_total`, func() float64 {
|
||||
return float64(m().SearchTSIDsConcurrencyLimitTimeout)
|
||||
})
|
||||
metrics.NewGauge(`vm_concurrent_search_tsids_capacity`, func() float64 {
|
||||
return float64(m().SearchTSIDsConcurrencyCapacity)
|
||||
})
|
||||
metrics.NewGauge(`vm_concurrent_search_tsids_current`, func() float64 {
|
||||
return float64(m().SearchTSIDsConcurrencyCurrent)
|
||||
})
|
||||
|
||||
metrics.NewGauge(`vm_search_delays_total`, func() float64 {
|
||||
return float64(m().SearchDelays)
|
||||
})
|
||||
|
||||
metrics.NewGauge(`vm_slow_row_inserts_total`, func() float64 {
|
||||
return float64(m().SlowRowInserts)
|
||||
})
|
||||
@@ -486,7 +523,7 @@ func registerStorageMetrics() {
|
||||
metrics.NewGauge(`vm_cache_entries{type="storage/regexps"}`, func() float64 {
|
||||
return float64(storage.RegexpCacheSize())
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_size_entries{type="storage/prefetchedMetricIDs"}`, func() float64 {
|
||||
metrics.NewGauge(`vm_cache_entries{type="storage/prefetchedMetricIDs"}`, func() float64 {
|
||||
return float64(m().PrefetchedMetricIDsSize)
|
||||
})
|
||||
|
||||
|
||||
@@ -1,20 +1,11 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_PROMETHEUS",
|
||||
"label": "Prometheus",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "prometheus",
|
||||
"pluginName": "Prometheus"
|
||||
}
|
||||
],
|
||||
"__inputs": [],
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "7.0.2"
|
||||
"version": "7.0.3"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
@@ -65,7 +56,7 @@
|
||||
"gnetId": 10229,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"iteration": 1591986157550,
|
||||
"iteration": 1593345560631,
|
||||
"links": [
|
||||
{
|
||||
"icon": "doc",
|
||||
@@ -96,7 +87,7 @@
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
@@ -110,7 +101,7 @@
|
||||
},
|
||||
{
|
||||
"content": "<div style=\"text-align: center; font-size: 2em\">$version</div>",
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"datasource": "$datasource",
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@@ -500,7 +491,7 @@
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
@@ -1144,7 +1135,7 @@
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
@@ -2178,7 +2169,7 @@
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
@@ -2486,7 +2477,7 @@
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
@@ -3207,9 +3198,9 @@
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"selected": false,
|
||||
"text": "VictoriaMetrics",
|
||||
"value": "VictoriaMetrics"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
@@ -3228,14 +3219,14 @@
|
||||
"allValue": null,
|
||||
"current": {},
|
||||
"datasource": "$datasource",
|
||||
"definition": "label_values(vm_app_version, job)",
|
||||
"definition": "label_values(vm_app_version{version=~\"victoria-metrics-.*\"}, job)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [],
|
||||
"query": "label_values(vm_app_version, job)",
|
||||
"query": "label_values(vm_app_version{version=~\"victoria-metrics-.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
@@ -3301,5 +3292,5 @@
|
||||
"timezone": "",
|
||||
"title": "VictoriaMetrics",
|
||||
"uid": "wNf0q_kZk",
|
||||
"version": 3
|
||||
"version": 1
|
||||
}
|
||||
3537
dashboards/vmagent.json
Normal file
3537
dashboards/vmagent.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ DOCKER_NAMESPACE := victoriametrics
|
||||
|
||||
ROOT_IMAGE ?= alpine:3.12
|
||||
CERTS_IMAGE := alpine:3.12
|
||||
GO_BUILDER_IMAGE := golang:1.14.4
|
||||
GO_BUILDER_IMAGE := golang:1.15.0
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr : _)
|
||||
BASE_IMAGE := local/base:1.1.1-$(shell echo $(ROOT_IMAGE) | tr : _)-$(shell echo $(CERTS_IMAGE) | tr : _)
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#### Docker compose
|
||||
|
||||
To spin-up setup of VictoriaMetrics, Prometheus and Grafana run following command:
|
||||
To spin-up setup of VictoriaMetrics, vmagent and Grafana run following command:
|
||||
|
||||
`docker-compose up`
|
||||
|
||||
@@ -13,11 +13,11 @@ VictoriaMetrics opens following ports:
|
||||
* `--opentsdbListenAddr=:4242`
|
||||
* `--httpListenAddr=:8428`
|
||||
|
||||
##### Prometheus
|
||||
##### vmagent
|
||||
|
||||
To access service open following [link](http://localhost:9090).
|
||||
|
||||
Prometheus is already configured to use VictoriaMetrics as remote storage.
|
||||
vmagent is used for scraping and pushing timeseries to
|
||||
VictoriaMetrics instance. It accepts Prometheus-compatible
|
||||
configuration `prometheus.yml` with listed targets for scraping.
|
||||
|
||||
##### Grafana
|
||||
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
version: '3.5'
|
||||
services:
|
||||
prometheus:
|
||||
container_name: prometheus
|
||||
image: prom/prometheus:v2.19.1
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
- 9090:9090
|
||||
- 8429:8429
|
||||
volumes:
|
||||
- promdata:/prometheus
|
||||
- vmagentdata:/vmagentdata
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--promscrape.config=/etc/prometheus/prometheus.yml'
|
||||
- '--remoteWrite.url=http://victoriametrics:8428/api/v1/write'
|
||||
networks:
|
||||
- vm_net
|
||||
restart: always
|
||||
@@ -35,13 +35,7 @@ services:
|
||||
restart: always
|
||||
grafana:
|
||||
container_name: grafana
|
||||
image: grafana/grafana:7.0.3
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
cd /var/lib/grafana &&
|
||||
mkdir -p dashboards &&
|
||||
sed 's/$${DS_PROMETHEUS}/Prometheus/g' vm.json > dashboards/vm.json &&
|
||||
/run.sh"
|
||||
image: grafana/grafana:7.1.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -49,12 +43,13 @@ services:
|
||||
volumes:
|
||||
- grafanadata:/var/lib/grafana
|
||||
- ./provisioning/:/etc/grafana/provisioning/
|
||||
- ./../../dashboards/victoriametrics.json:/var/lib/grafana/vm.json
|
||||
- ./../../dashboards/victoriametrics.json:/var/lib/grafana/dashboards/vm.json
|
||||
- ./../../dashboards/vmagent.json:/var/lib/grafana/dashboards/vmagent.json
|
||||
networks:
|
||||
- vm_net
|
||||
restart: always
|
||||
volumes:
|
||||
promdata: {}
|
||||
vmagentdata: {}
|
||||
vmdata: {}
|
||||
grafanadata: {}
|
||||
networks:
|
||||
|
||||
@@ -1,16 +1,10 @@
|
||||
global:
|
||||
scrape_interval: 10s
|
||||
evaluation_interval: 10s
|
||||
|
||||
remote_write:
|
||||
- url: "http://victoriametrics:8428/api/v1/write"
|
||||
queue_config:
|
||||
max_samples_per_send: 10000
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
- job_name: 'vmagent'
|
||||
static_configs:
|
||||
- targets: ['prometheus:9090']
|
||||
- targets: ['vmagent:8429']
|
||||
- job_name: 'victoriametrics'
|
||||
static_configs:
|
||||
- targets: ['victoriametrics:8428']
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
|
||||
- name: VictoriaMetrics
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://victoriametrics:8428
|
||||
isDefault: false
|
||||
isDefault: true
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
* [Improving histogram usability for Prometheus and Grafana](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
|
||||
* [Prometheus storage: tech terms for humans](https://medium.com/@valyala/prometheus-storage-technical-terms-for-humans-4ab4de6c3d48)
|
||||
* [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)
|
||||
* [How to migrate data from Prometheus to VictoriaMetrics](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-d44a6728f043)
|
||||
|
||||
|
||||
## Third-party articles and slides
|
||||
@@ -33,6 +34,7 @@
|
||||
* [Sismology: Iguana Solutions’ Monitoring System](https://medium.com/@IG1.com/sismology-iguana-solutions-monitoring-system-f46e4170447f)
|
||||
* [Monitoring K8S with VictoriaMetrics](https://docs.google.com/presentation/d/1g7yUyVEaAp4tPuRy-MZbPXKqJ1z78_5VKuV841aQfsg/edit)
|
||||
* [CMS monitoring R&D: Real-time monitoring and alerts](https://indico.cern.ch/event/877333/contributions/3696707/attachments/1972189/3281133/CMS_mon_RD_for_opInt.pdf)
|
||||
* [The CMS monitoring infrastructure and applications](https://arxiv.org/pdf/2007.03630.pdf)
|
||||
* [Disk usage: VictoriaMetrics vs Prometheus](https://stas.starikevich.com/posts/disk-usage-for-vm-versus-prometheus/)
|
||||
* [Benchmarking time series workloads on Apache Kudu using TSBS](https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/)
|
||||
* [What are Open Source Time Series Databases?](https://www.iunera.com/kraken/fabric/time-series-database/)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user