Compare commits

..

3 Commits

Author SHA1 Message Date
Alexander Marshalov
5bc3488538 Merge branch 'master' into streaming-aggregation 2023-10-26 16:54:09 +02:00
Alexander Marshalov
1cd6232537 WIP 2023-10-23 13:14:48 +02:00
Alexander Marshalov
ed1bef0e2d WIP 2023-10-18 14:48:49 +02:00
953 changed files with 30004 additions and 20931 deletions

View File

@@ -17,7 +17,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@main
with:
go-version: 1.21.4
go-version: 1.21.3
id: go
- name: Code checkout
uses: actions/checkout@master

View File

@@ -57,7 +57,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: 1.21.4
go-version: 1.21.3
check-latest: true
cache: true
if: ${{ matrix.language == 'go' }}

View File

@@ -32,7 +32,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: 1.21.4
go-version: 1.21.3
check-latest: true
cache: true
@@ -56,7 +56,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: 1.21.4
go-version: 1.21.3
check-latest: true
cache: true
@@ -81,7 +81,7 @@ jobs:
id: go
uses: actions/setup-go@v4
with:
go-version: 1.21.4
go-version: 1.21.3
check-latest: true
cache: true

View File

@@ -8,7 +8,6 @@ on:
workflow_dispatch: {}
env:
PAGEFIND_VERSION: "1.0.3"
HUGO_VERSION: "latest"
permissions:
contents: read # This is required for actions/checkout and to commit back image update
deployments: write
@@ -29,7 +28,7 @@ jobs:
path: docs
- uses: peaceiris/actions-hugo@v2
with:
hugo-version: ${{env.HUGO_VERSION}}
hugo-version: 'latest'
extended: true
- name: Install PageFind #install the static search engine for index build
uses: supplypike/setup-bin@v3

View File

@@ -25,143 +25,143 @@ all: \
victoria-logs-prod \
vmagent-prod \
vmalert-prod \
vmalert-tool-prod \
vmauth-prod \
vmbackup-prod \
vmrestore-prod \
vmctl-prod
vmctl-prod \
vmalert-tool-prod
clean:
rm -rf bin/*
publish: \
publish: package-base \
publish-victoria-metrics \
publish-vmagent \
publish-vmalert \
publish-vmalert-tool \
publish-vmauth \
publish-vmbackup \
publish-vmrestore \
publish-vmctl
publish-vmctl \
publish-vmalert-tool
package: \
package-victoria-metrics \
package-victoria-logs \
package-vmagent \
package-vmalert \
package-vmalert-tool \
package-vmauth \
package-vmbackup \
package-vmrestore \
package-vmctl
package-vmctl \
package-vmalert-tool
vmutils: \
vmagent \
vmalert \
vmalert-tool \
vmauth \
vmbackup \
vmrestore \
vmctl
vmctl \
vmalert-tool
vmutils-pure: \
vmagent-pure \
vmalert-pure \
vmalert-tool-pure \
vmauth-pure \
vmbackup-pure \
vmrestore-pure \
vmctl-pure
vmctl-pure \
vmalert-tool-pure
vmutils-linux-amd64: \
vmagent-linux-amd64 \
vmalert-linux-amd64 \
vmalert-tool-linux-amd64 \
vmauth-linux-amd64 \
vmbackup-linux-amd64 \
vmrestore-linux-amd64 \
vmctl-linux-amd64
vmctl-linux-amd64 \
vmalert-tool-linux-amd64
vmutils-linux-arm64: \
vmagent-linux-arm64 \
vmalert-linux-arm64 \
vmalert-tool-linux-arm64 \
vmauth-linux-arm64 \
vmbackup-linux-arm64 \
vmrestore-linux-arm64 \
vmctl-linux-arm64
vmctl-linux-arm64 \
vmalert-tool-linux-arm64
vmutils-linux-arm: \
vmagent-linux-arm \
vmalert-linux-arm \
vmalert-tool-linux-arm \
vmauth-linux-arm \
vmbackup-linux-arm \
vmrestore-linux-arm \
vmctl-linux-arm
vmctl-linux-arm \
vmalert-tool-linux-arm
vmutils-linux-386: \
vmagent-linux-386 \
vmalert-linux-386 \
vmalert-tool-linux-386 \
vmauth-linux-386 \
vmbackup-linux-386 \
vmrestore-linux-386 \
vmctl-linux-386
vmctl-linux-386 \
vmalert-tool-linux-386
vmutils-linux-ppc64le: \
vmagent-linux-ppc64le \
vmalert-linux-ppc64le \
vmalert-tool-linux-ppc64le \
vmauth-linux-ppc64le \
vmbackup-linux-ppc64le \
vmrestore-linux-ppc64le \
vmctl-linux-ppc64le
vmctl-linux-ppc64le \
vmalert-tool-linux-ppc64le
vmutils-darwin-amd64: \
vmagent-darwin-amd64 \
vmalert-darwin-amd64 \
vmalert-tool-darwin-amd64 \
vmauth-darwin-amd64 \
vmbackup-darwin-amd64 \
vmrestore-darwin-amd64 \
vmctl-darwin-amd64
vmctl-darwin-amd64 \
vmalert-tool-darwin-amd64
vmutils-darwin-arm64: \
vmagent-darwin-arm64 \
vmalert-darwin-arm64 \
vmalert-tool-darwin-arm64 \
vmauth-darwin-arm64 \
vmbackup-darwin-arm64 \
vmrestore-darwin-arm64 \
vmctl-darwin-arm64
vmctl-darwin-arm64 \
vmalert-tool-darwin-arm64
vmutils-freebsd-amd64: \
vmagent-freebsd-amd64 \
vmalert-freebsd-amd64 \
vmalert-tool-freebsd-amd64 \
vmauth-freebsd-amd64 \
vmbackup-freebsd-amd64 \
vmrestore-freebsd-amd64 \
vmctl-freebsd-amd64
vmctl-freebsd-amd64 \
vmalert-tool-freebsd-amd64
vmutils-openbsd-amd64: \
vmagent-openbsd-amd64 \
vmalert-openbsd-amd64 \
vmalert-tool-openbsd-amd64 \
vmauth-openbsd-amd64 \
vmbackup-openbsd-amd64 \
vmrestore-openbsd-amd64 \
vmctl-openbsd-amd64
vmctl-openbsd-amd64 \
vmalert-tool-openbsd-amd64
vmutils-windows-amd64: \
vmagent-windows-amd64 \
vmalert-windows-amd64 \
vmalert-tool-windows-amd64 \
vmauth-windows-amd64 \
vmbackup-windows-amd64 \
vmrestore-windows-amd64 \
vmctl-windows-amd64
vmctl-windows-amd64 \
vmalert-tool-windows-amd64
victoria-metrics-crossbuild: \
victoria-metrics-linux-386 \
@@ -354,72 +354,72 @@ release-vmutils-windows-amd64:
release-vmutils-goos-goarch: \
vmagent-$(GOOS)-$(GOARCH)-prod \
vmalert-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
vmauth-$(GOOS)-$(GOARCH)-prod \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod
cd bin && \
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
vmagent-$(GOOS)-$(GOARCH)-prod \
vmalert-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
vmauth-$(GOOS)-$(GOARCH)-prod \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod
&& sha256sum vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
vmagent-$(GOOS)-$(GOARCH)-prod \
vmalert-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
vmauth-$(GOOS)-$(GOARCH)-prod \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
cd bin && rm -rf \
vmagent-$(GOOS)-$(GOARCH)-prod \
vmalert-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
vmauth-$(GOOS)-$(GOARCH)-prod \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod
release-vmutils-windows-goarch: \
vmagent-windows-$(GOARCH)-prod \
vmalert-windows-$(GOARCH)-prod \
vmalert-tool-windows-$(GOARCH)-prod \
vmauth-windows-$(GOARCH)-prod \
vmbackup-windows-$(GOARCH)-prod \
vmrestore-windows-$(GOARCH)-prod \
vmctl-windows-$(GOARCH)-prod
vmctl-windows-$(GOARCH)-prod \
vmalert-tool-windows-$(GOARCH)-prod
cd bin && \
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
vmagent-windows-$(GOARCH)-prod.exe \
vmalert-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe \
vmauth-windows-$(GOARCH)-prod.exe \
vmbackup-windows-$(GOARCH)-prod.exe \
vmrestore-windows-$(GOARCH)-prod.exe \
vmctl-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe \
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
vmagent-windows-$(GOARCH)-prod.exe \
vmalert-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe \
vmauth-windows-$(GOARCH)-prod.exe \
vmbackup-windows-$(GOARCH)-prod.exe \
vmrestore-windows-$(GOARCH)-prod.exe \
vmctl-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe \
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
cd bin && rm -rf \
vmagent-windows-$(GOARCH)-prod.exe \
vmalert-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe \
vmauth-windows-$(GOARCH)-prod.exe \
vmbackup-windows-$(GOARCH)-prod.exe \
vmrestore-windows-$(GOARCH)-prod.exe \
vmctl-windows-$(GOARCH)-prod.exe
vmctl-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe
pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
@@ -486,7 +486,7 @@ golangci-lint: install-golangci-lint
golangci-lint run
install-golangci-lint:
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.55.1
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.54.2
govulncheck: install-govulncheck
govulncheck ./...

133
README.md
View File

@@ -338,8 +338,7 @@ which can be used as faster and less resource-hungry alternative to Prometheus.
## Grafana setup
Create [Prometheus datasource](https://grafana.com/docs/grafana/latest/datasources/prometheus/configure-prometheus-data-source/)
in Grafana with the following url:
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following url:
```url
http://<victoriametrics-addr>:8428
@@ -353,9 +352,6 @@ or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
Alternatively, use VictoriaMetrics [datasource plugin](https://github.com/VictoriaMetrics/grafana-datasource) with support of extra features.
See more in [description](https://github.com/VictoriaMetrics/grafana-datasource#victoriametrics-data-source-for-grafana).
Creating a datasource may require [specific permissions](https://grafana.com/docs/grafana/latest/administration/data-source-management/).
If you don't see an option to create a data source - try contacting system administrator.
## How to upgrade VictoriaMetrics
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking [the CHANGELOG page](https://docs.victoriametrics.com/CHANGELOG.html) and performing regular upgrades.
@@ -446,7 +442,7 @@ This information is obtained from the `/api/v1/status/active_queries` HTTP endpo
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
1. Open the `vmui` at `http://victoriametrics:8428/vmui/`.
1. Click the `Explore Prometheus metrics` tab.
1. Click the `Explore metrics` tab.
1. Select the `job` you want to explore.
1. Optionally select the `instance` for the selected job to explore.
1. Select metrics you want to explore and compare.
@@ -1130,18 +1126,6 @@ For example, the following command builds the image on top of [scratch](https://
ROOT_IMAGE=scratch make package-victoria-metrics
```
#### Building VictoriaMetrics with Podman
VictoriaMetrics can be built with Podman in either rootful or rootless mode.
When building via rootlful Podman, simply add `DOCKER=podman` to the relevant `make` commandline. To build
via rootless Podman, add `DOCKER=podman DOCKER_RUN="podman run --userns=keep-id"` to the `make`
commandline.
For example: `make victoria-metrics-pure DOCKER=podman DOCKER_RUN="podman run --userns=keep-id"`
Note that `production` builds are not supported via Podman becuase Podman does not support `buildx`.
## Start with docker-compose
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
@@ -1168,15 +1152,6 @@ Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-s
is the command-line flag value. Snapshots can be archived to backup storage at any time
with [vmbackup](https://docs.victoriametrics.com/vmbackup.html).
Snapshots consist of a mix of hard-links and soft-links to various files and directories inside `-storageDataPath`.
See [this article](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
for more details. This adds some restrictions on what can be done with the contents of `<-storageDataPath>/snapshots` directory:
- Do not delete subdirectories inside `<-storageDataPath>/snapshots` with `rm` or similar commands, since this will leave some snapshot data undeleted.
Prefer using the `/snapshot/delete` API for deleting snapshot. See below for more details about this API.
- Do not copy subdirectories inside `<-storageDataPath>/snapshot` with `cp`, `rsync` or similar commands, since there are high chances
that these commands won't copy some data stored in the snapshot. Prefer using [vmbackup](https://docs.victoriametrics.com/vmbackup.html) for making copies of snapshot data.
The `http://<victoriametrics-addr>:8428/snapshot/list` page contains the list of available snapshots.
Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete?snapshot=<snapshot-name>` in order
@@ -1699,44 +1674,43 @@ See also [cardinality limiter](#cardinality-limiter) and [capacity planning docs
## High availability
The general approach for achieving high availability is the following:
- to run two identically configured VictoriaMetrics instances in distinct datacenters (availability zones)
- to store the collected data simultaneously into these instances via [vmagent](https://docs.victoriametrics.com/vmagent.html) or Prometheus
- to query the first VictoriaMetrics instance and to fail over to the second instance when the first instance becomes temporarily unavailable.
Such a setup guarantees that the collected data isn't lost when one of VictoriaMetrics instance becomes unavailable.
The collected data continues to be written to the available VictoriaMetrics instance, so it should be available for querying.
Both [vmagent](https://docs.victoriametrics.com/vmagent.html) and Prometheus buffer the collected data locally if they cannot send it
to the configured remote storage. So the collected data will be written to the temporarily unavailable VictoriaMetrics instance
after it becomes available.
If you use [vmagent](https://docs.victoriametrics.com/vmagent.html) for storing the data into VictoriaMetrics,
then it can be configured with multiple `-remoteWrite.url` command-line flags, where every flag points to the VictoriaMetrics
instance in a particular availability zone, in order to replicate the collected data to all the VictoriaMetrics instances.
For example, the following command instructs `vmagent` to replicate data to `vm-az1` and `vm-az2` instances of VictoriaMetrics:
* Install multiple VictoriaMetrics instances in distinct datacenters (availability zones).
* Pass addresses of these instances to [vmagent](https://docs.victoriametrics.com/vmagent.html) via `-remoteWrite.url` command-line flag:
```console
/path/to/vmagent \
-remoteWrite.url=http://<vm-az1>:8428/api/v1/write \
-remoteWrite.url=http://<vm-az2>:8428/api/v1/write
/path/to/vmagent -remoteWrite.url=http://<victoriametrics-addr-1>:8428/api/v1/write -remoteWrite.url=http://<victoriametrics-addr-2>:8428/api/v1/write
```
If you use Prometheus for collecting and writing the data to VictoriaMetrics,
then the following [`remote_write`](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section
in Prometheus config can be used for replicating the collected data to `vm-az1` and `vm-az2` VictoriaMetrics instances:
Alternatively these addresses may be passed to `remote_write` section in Prometheus config:
```yml
remote_write:
- url: http://<vm-az1>:8428/api/v1/write
- url: http://<vm-az2>:8428/api/v1/write
- url: http://<victoriametrics-addr-1>:8428/api/v1/write
queue_config:
max_samples_per_send: 10000
# ...
- url: http://<victoriametrics-addr-N>:8428/api/v1/write
queue_config:
max_samples_per_send: 10000
```
It is recommended to use [vmagent](https://docs.victoriametrics.com/vmagent.html) instead of Prometheus for highly loaded setups,
since it uses lower amounts of RAM, CPU and network bandwidth than Prometheus.
* Apply the updated config:
If you use identically configured [vmagent](https://docs.victoriametrics.com/vmagent.html) instances for collecting the same data
and sending it to VictoriaMetrics, then do not forget enabling [deduplication](#deduplication) at VictoriaMetrics side.
```console
kill -HUP `pidof prometheus`
```
It is recommended to use [vmagent](https://docs.victoriametrics.com/vmagent.html) instead of Prometheus for highly loaded setups.
* Now Prometheus should write data into all the configured `remote_write` urls in parallel.
* Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
* Set up Prometheus datasource in Grafana that points to Promxy.
If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
with the enabled de-duplication. See [this section](#deduplication) for details.
## Deduplication
@@ -1923,23 +1897,8 @@ See how to request a free trial license [here](https://victoriametrics.com/produ
* `-downsampling.period=30d:5m,180d:1h` instructs VictoriaMetrics to deduplicate samples older than 30 days with 5 minutes interval and to deduplicate samples older than 180 days with 1 hour interval.
Downsampling is applied independently per each time series and leaves a single [raw sample](https://docs.victoriametrics.com/keyConcepts.html#raw-samples)
with the biggest [timestamp](https://en.wikipedia.org/wiki/Unix_time) on the configured interval, in the same way as [deduplication](#deduplication) does.
It works the best for [counters](https://docs.victoriametrics.com/keyConcepts.html#counter) and [histograms](https://docs.victoriametrics.com/keyConcepts.html#histogram),
as their values are always increasing. But downsampling [gauges](https://docs.victoriametrics.com/keyConcepts.html#gauge)
and [summaries](https://docs.victoriametrics.com/keyConcepts.html#summary)
would mean losing the changes within the downsampling interval. Please note, you can use [recording rules](https://docs.victoriametrics.com/vmalert.html#rules)
or [steaming aggregation](https://docs.victoriametrics.com/stream-aggregation.html)
to apply custom aggregation functions, like min/max/avg etc., in order to make gauges more resilient to downsampling.
Downsampling can reduce disk space usage and improve query performance if it is applied to time series with big number
of samples per each series. The downsampling doesn't improve query performance if the database contains big number
of time series with small number of samples per each series (aka [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)),
since downsampling doesn't reduce the number of time series. In this case the majority of query time is spent on searching for the matching time series
instead of processing the found samples.
It is possible to use [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) in [vmagent](https://docs.victoriametrics.com/vmagent.html)
or recording rules in [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to
[reduce the number of time series](https://docs.victoriametrics.com/vmalert.html#downsampling-and-aggregation-via-vmalert).
Downsampling is applied independently per each time series. It can reduce disk space usage and improve query performance if it is applied to time series with big number of samples per each series. The downsampling doesn't improve query performance if the database contains big number of time series with small number of samples per each series (aka [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)), since downsampling doesn't reduce the number of time series. So the majority of time is spent on searching for the matching time series.
It is possible to use [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) in vmagent or recording rules in [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to [reduce the number of time series](https://docs.victoriametrics.com/vmalert.html#downsampling-and-aggregation-via-vmalert).
Downsampling happens during [background merges](https://docs.victoriametrics.com/#storage)
and can't be performed if there is not enough of free disk space or if vmstorage
@@ -1997,8 +1956,6 @@ VictoriaMetrics provides the following security-related command-line flags:
* `-flagsAuthKey` for protecting `/flags` endpoint.
* `-pprofAuthKey` for protecting `/debug/pprof/*` endpoints, which can be used for [profiling](#profiling).
* `-denyQueryTracing` for disallowing [query tracing](#query-tracing).
* `-http.header.hsts`, `-http.header.csp`, and `-http.header.frameOptions` for serving `Strict-Transport-Security`, `Content-Security-Policy`
and `X-Frame-Options` HTTP response headers.
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
@@ -2530,8 +2487,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
```
-bigMergeConcurrency int
Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
-blockcache.missesBeforeCaching int
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
-cacheExpireDuration duration
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
-configAuthKey string
@@ -2552,7 +2507,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-denyQueryTracing
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
-downsampling.period array
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-dryRun
Whether to check config files without running VictoriaMetrics. The following config files are checked: -promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag
@@ -2586,12 +2541,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration
@@ -2659,8 +2608,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerMaxArgLen int
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 500)
-loggerOutput string
Output for the logs. Supported values: stderr, stdout (default "stderr")
-loggerTimezone string
@@ -2683,9 +2630,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
-newrelic.maxInsertRequestSize size
The maximum size in bytes of a single NewRelic request to /newrelic/infra/v2/metrics/events/bulk
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentsdbHTTPListenAddr string
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
-opentsdbHTTPListenAddr.useProxyProtocol
@@ -2726,7 +2670,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-promscrape.config.strictParse
Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields (default true)
-promscrape.configCheckInterval duration
Interval for checking for changes in -promscrape.config file. By default, the checking is disabled. See how to reload -promscrape.config file at https://docs.victoriametrics.com/vmagent.html#configuration-update
Interval for checking for changes in '-promscrape.config' file. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes
-promscrape.consul.waitTime duration
Wait time used by Consul service discovery. Default value is used if not set
-promscrape.consulSDCheckInterval duration
@@ -2809,11 +2753,11 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-relabelConfig string
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. The path can point either to local file or to http url. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
-retentionFilter array
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-retentionPeriod value
Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. See also -retentionFilter
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
-retentionTimezoneOffset duration
The offset for performing indexdb rotation. If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)
-search.cacheTimestampOffset duration
@@ -2829,7 +2773,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-search.latencyOffset duration
The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
-search.logQueryMemoryUsage size
Log query and increment vm_memory_intensive_queries_total metric each time the query requires more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
Log queries, which require more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-search.logSlowQueryDuration duration
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
@@ -2891,9 +2835,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 4)
-search.minStalenessInterval duration
The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
-search.minWindowForInstantRollupOptimization value
Enable cache-based optimization for repeated queries to /api/v1/query (aka instant queries), which contain rollup functions with lookbehind window exceeding the given value
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 6h)
-search.noStaleMarkers
Set this flag to true if the database doesn't contain Prometheus stale markers, so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets
-search.queryStats.lastQueriesCount int
@@ -2920,7 +2861,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The timeout for creating new snapshot. If set, make sure that timeout is lower than backup period
-snapshotsMaxAge value
Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
-sortLabels
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
-storage.cacheSizeIndexDBDataBlocks size

View File

@@ -117,6 +117,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"expand-with-exprs", "WITH expressions' tutorial"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"stream-agg", "streaming aggregation status"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"api/v1/status/tsdb", "tsdb status page"},

View File

@@ -120,10 +120,10 @@ func compressData(s string) string {
var bb bytes.Buffer
zw := gzip.NewWriter(&bb)
if _, err := zw.Write([]byte(s)); err != nil {
panic(fmt.Errorf("unexpected error when compressing data: %w", err))
panic(fmt.Errorf("unexpected error when compressing data: %s", err))
}
if err := zw.Close(); err != nil {
panic(fmt.Errorf("unexpected error when closing gzip writer: %w", err))
panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err))
}
return bb.String()
}

View File

@@ -43,7 +43,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
r.Reset(dataBytes)
_, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage)
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
panic(fmt.Errorf("unexpected error: %s", err))
}
}
})

View File

@@ -29,7 +29,7 @@ func benchmarkParseJSONRequest(b *testing.B, streams, rows, labels int) {
for pb.Next() {
_, err := parseJSONRequest(data, func(timestamp int64, fields []logstorage.Field) {})
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
panic(fmt.Errorf("unexpected error: %s", err))
}
}
})

View File

@@ -84,7 +84,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
err = req.Unmarshal(bb.B)
if err != nil {
return 0, fmt.Errorf("cannot parse request body: %w", err)
return 0, fmt.Errorf("cannot parse request body: %s", err)
}
var commonFields []logstorage.Field
@@ -97,7 +97,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
// Labels are same for all entries in the stream.
commonFields, err = parsePromLabels(commonFields[:0], stream.Labels)
if err != nil {
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %w", stream.Labels, err)
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %s", stream.Labels, err)
}
fields := commonFields

View File

@@ -31,7 +31,7 @@ func benchmarkParseProtobufRequest(b *testing.B, streams, rows, labels int) {
for pb.Next() {
_, err := parseProtobufRequest(body, func(timestamp int64, fields []logstorage.Field) {})
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
panic(fmt.Errorf("unexpected error: %s", err))
}
}
})

View File

@@ -1,13 +1,13 @@
{
"files": {
"main.css": "./static/css/main.d1313636.css",
"main.js": "./static/js/main.1919fefe.js",
"static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
"main.css": "./static/css/main.9a224445.css",
"main.js": "./static/js/main.02178f4b.js",
"static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.957b90ab4cb4852eec26.md",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.d1313636.css",
"static/js/main.1919fefe.js"
"static/css/main.9a224445.css",
"static/js/main.02178f4b.js"
]
}

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.1919fefe.js"></script><link href="./static/css/main.d1313636.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.02178f4b.js"></script><link href="./static/css/main.9a224445.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -7,7 +7,7 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @remix-run/router v1.10.0
* @remix-run/router v1.7.2
*
* Copyright (c) Remix Software Inc.
*
@@ -18,7 +18,7 @@
*/
/**
* React Router DOM v6.17.0
* React Router DOM v6.14.2
*
* Copyright (c) Remix Software Inc.
*
@@ -29,7 +29,7 @@
*/
/**
* React Router v6.17.0
* React Router v6.14.2
*
* Copyright (c) Remix Software Inc.
*

File diff suppressed because one or more lines are too long

View File

@@ -1,11 +1,11 @@
---
sort: 23
weight: 23
sort: 14
weight: 14
title: MetricsQL
menu:
docs:
parent: 'victoriametrics'
weight: 23
parent: "victoriametrics"
weight: 14
aliases:
- /ExtendedPromQL.html
- /MetricsQL.html
@@ -21,8 +21,7 @@ However, there are some [intentional differences](https://medium.com/@romanhavro
[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/metricsql) can be used for parsing MetricsQL in external apps.
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085)
and introduction into [basic querying via MetricsQL](https://docs.victoriametrics.com/keyConcepts.html#metricsql).
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
The following functionality is implemented differently in MetricsQL compared to PromQL. This improves user experience:
@@ -110,7 +109,7 @@ The list of MetricsQL features on top of PromQL:
* [histogram_quantile](#histogram_quantile) accepts optional third arg - `boundsLabel`.
In this case it returns `lower` and `upper` bounds for the estimated percentile.
See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`. See also [drop_empty_series](#drop_empty_series).
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`.
* `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`.
* `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`.
* `WITH` templates. This feature simplifies writing and managing complex queries.
@@ -532,7 +531,7 @@ See also [duration_over_time](#duration_over_time) and [lag](#lag).
`mad_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation)
over raw samples on the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
See also [mad](#mad), [range_mad](#range_mad) and [outlier_iqr_over_time](#outlier_iqr_over_time).
See also [mad](#mad) and [range_mad](#range_mad).
#### max_over_time
@@ -562,18 +561,6 @@ This function is supported by PromQL. See also [tmin_over_time](#tmin_over_time)
for raw samples on the given lookbehind window `d`. It is calculated individually per each time series returned
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). It is expected that raw sample values are discrete.
#### outlier_iqr_over_time
`outlier_iqr_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns the last sample on the given lookbehind window `d`
if its value is either smaller than the `q25-1.5*iqr` or bigger than `q75+1.5*iqr` where:
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) over raw samples on the lookbehind window `d`
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) over raw samples on the lookbehind window `d`.
The `outlier_iqr_over_time()` is useful for detecting anomalies in gauge values based on the previous history of values.
For example, `outlier_iqr_over_time(memory_usage_bytes[1h])` triggers when `memory_usage_bytes` suddenly goes outside the usual value range for the last 24 hours.
See also [outliers_iqr](#outliers_iqr).
#### predict_linear
`predict_linear(series_selector[d], t)` is a [rollup function](#rollup-functions), which calculates the value `t` seconds in the future using
@@ -878,7 +865,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
See also [zscore](#zscore), [range_trim_zscore](#range_trim_zscore) and [outlier_iqr_over_time](#outlier_iqr_over_time).
See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore).
### Transform functions
@@ -1068,17 +1055,6 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
This function is supported by PromQL. See also [rad](#rad).
#### drop_empty_series
`drop_empty_series(q)` is a [transform function](#transform-functions), which drops empty series from `q`.
This function can be used when `default` operator should be applied only to non-empty series. For example,
`drop_empty_series(temperature < 30) default 42` returns series, which have at least a single sample smaller than 30 on the selected time range,
while filling gaps in the returned series with 42.
On the other hand `(temperature < 30) default 40` returns all the `temperature` series, even if they have no samples smaller than 30,
by replacing all the values bigger or equal to 30 with 40.
#### end
`end()` is a [transform function](#transform-functions), which returns the unix timestamp in seconds for the last point.
@@ -1615,7 +1591,7 @@ which maps `label` values from `src_*` to `dst*` for all the time series returne
which drops time series from `q` with `label` not matching the given `regexp`.
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
See also [label_mismatch](#label_mismatch).
#### label_mismatch
@@ -1623,7 +1599,7 @@ See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
which drops time series from `q` with `label` matching the given `regexp`.
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
See also [label_match](#label_match) and [labels_equal](#labels_equal).
See also [label_match](#label_match).
#### label_move
@@ -1666,30 +1642,23 @@ for the given `label` for every time series returned by `q`.
For example, if `label_value(foo, "bar")` is applied to `foo{bar="1.234"}`, then it will return a time series
`foo{bar="1.234"}` with `1.234` value. Function will return no data for non-numeric label values.
#### labels_equal
`labels_equal(q, "label1", "label2", ...)` is [label manipulation function](#label-manipulation-functions), which returns `q` series with identical values for the listed labels
"label1", "label2", etc.
See also [label_match](#label_match) and [label_mismatch](#label_mismatch).
#### sort_by_label
`sort_by_label(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
`sort_by_label(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
See also [sort_by_label_desc](#sort_by_label_desc) and [sort_by_label_numeric](#sort_by_label_numeric).
#### sort_by_label_desc
`sort_by_label_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
`sort_by_label_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
See also [sort_by_label](#sort_by_label) and [sort_by_label_numeric_desc](#sort_by_label_numeric_desc).
#### sort_by_label_numeric
`sort_by_label_numeric(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
`sort_by_label_numeric(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")` would return series
in the following order of `bar` label values: `1`, `2`, `15` and `101`.
@@ -1698,7 +1667,7 @@ See also [sort_by_label_numeric_desc](#sort_by_label_numeric_desc) and [sort_by_
#### sort_by_label_numeric_desc
`sort_by_label_numeric_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
`sort_by_label_numeric_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
by the given set of labels using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")`
would return series in the following order of `bar` label values: `101`, `15`, `2` and `1`.
@@ -1870,33 +1839,20 @@ This function is supported by PromQL.
`mode(q) by (group_labels)` is [aggregate function](#aggregate-functions), which returns [mode](https://en.wikipedia.org/wiki/Mode_(statistics))
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
#### outliers_iqr
`outliers_iqr(q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least a single point
outside e.g. [Interquartile range outlier bounds](https://en.wikipedia.org/wiki/Interquartile_range) `[q25-1.5*iqr .. q75+1.5*iqr]`
comparing to other time series at the given point, where:
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) calculated independently per each point on the graph across `q` series.
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) calculated independently per each point on the graph across `q` series.
The `outliers_iqr()` is useful for detecting anomalous series in the group of series. For example, `outliers_iqr(temperature) by (country)` returns
per-country series with anomalous outlier values comparing to the rest of per-country series.
See also [outliers_mad](#outliers_mad), [outliersk](#outliersk) and [outlier_iqr_over_time](#outlier_iqr_over_time).
#### outliers_mad
`outliers_mad(tolerance, q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least
a single point outside [Median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) (aka MAD) multiplied by `tolerance`.
E.g. it returns time series with at least a single point below `median(q) - mad(q)` or a single point above `median(q) + mad(q)`.
See also [outliers_iqr](#outliers_iqr), [outliersk](#outliersk) and [mad](#mad).
See also [outliersk](#outliersk) and [mad](#mad).
#### outliersk
`outliersk(k, q)` is [aggregate function](#aggregate-functions), which returns up to `k` time series with the biggest standard deviation (aka outliers)
out of time series returned by `q`.
See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad).
See also [outliers_mad](#outliers_mad).
#### quantile
@@ -2016,7 +1972,7 @@ See also [bottomk_min](#bottomk_min).
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
This function is useful for detecting anomalies in the group of related time series.
See also [zscore_over_time](#zscore_over_time), [range_trim_zscore](#range_trim_zscore) and [outliers_iqr](#outliers_iqr).
See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore).
## Subqueries

View File

@@ -60,7 +60,7 @@ and sending the data to the Prometheus-compatible remote storage:
Example command for writing the data received via [supported push-based protocols](#how-to-push-data-to-vmagent)
to [single-node VictoriaMetrics](https://docs.victoriametrics.com/) located at `victoria-metrics-host:8428`:
```bash
```console
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
@@ -69,7 +69,7 @@ the data to [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-V
Example command for scraping Prometheus targets and writing the data to single-node VictoriaMetrics:
```bash
```console
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
@@ -110,7 +110,7 @@ additionally to pull-based Prometheus-compatible targets' scraping:
* Sending `SIGHUP` signal to `vmagent` process:
```bash
```console
kill -SIGHUP `pidof vmagent`
```
@@ -173,13 +173,6 @@ by routing outgoing samples for the same time series of [counter](https://docs.v
and [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) types from top-level `vmagent` instances
to the same second-level `vmagent` instance, so they are aggregated properly.
If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding
among remote storage systems specified in `-remoteWrite.url`. Sometimes it may be needed to use only a particular
set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURLLabels`
command-line flag. For example, `-remoteWrite.shardByURLLabels=instance,__name__` would shard metrics with the same name and `instance`
label to the same `-remoteWrite.url`.
See also [how to scrape big number of targets](#scraping-big-number-of-targets).
### Relabeling and filtering
@@ -325,7 +318,7 @@ in the `scrape_config_files` section of `-promscrape.config` file. For example,
loading scrape configs from all the `*.yml` files under `configs` directory, from `single_scrape_config.yml` local file
and from `https://config-server/scrape_config.yml` url:
```yaml
```yml
scrape_config_files:
- configs/*.yml
- single_scrape_config.yml
@@ -335,7 +328,7 @@ scrape_config_files:
Every referred file can contain arbitrary number of [supported scrape configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
There is no need in specifying top-level `scrape_configs` section in these files. For example:
```yaml
```yml
- job_name: foo
static_configs:
- targets: ["vmagent:8429"]
@@ -375,7 +368,7 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
For example, the following command starts `vmagent`, which adds `{datacenter="foobar"}` label to all the metrics pushed
to all the configured remote storage systems (all the `-remoteWrite.url` flag values):
```bash
```
/path/to/vmagent -remoteWrite.label=datacenter=foobar ...
```
@@ -503,16 +496,13 @@ with [additional enhancements](#relabeling-enhancements). The relabeling can be
This relabeling can be debugged via `http://vmagent:8429/metric-relabel-debug` page. See [these docs](#relabel-debug) for details.
* At the `-remoteWrite.urlRelabelConfig` files. This relabeling is used for modifying labels for metrics
and for dropping unneeded metrics before sending them to the particular `-remoteWrite.url`.
and for dropping unneeded metrics before sending them to a particular `-remoteWrite.url`.
This relabeling can be debugged via `http://vmagent:8429/metric-relabel-debug` page. See [these docs](#relabel-debug) for details.
All the files with relabeling configs can contain special placeholders in the form `%{ENV_VAR}`,
which are replaced by the corresponding environment variable values.
[Streaming aggregation](https://docs.victoriametrics.com/stream-aggregation.html), if configured,
is pefrormed after applying all the relabeling stages mentioned above.
The following articles contain useful information about Prometheus relabeling:
* [Cookbook for common relabeling tasks](https://docs.victoriametrics.com/relabeling.html)
@@ -743,7 +733,7 @@ stream parsing mode can be explicitly enabled in the following places:
Examples:
```yaml
```yml
scrape_configs:
- job_name: 'big-federate'
stream_parse: true
@@ -770,7 +760,7 @@ Each `vmagent` instance in the cluster must use identical `-promscrape.config` f
in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster specified via `-promscrape.cluster.membersCount`.
For example, the following commands spread scrape targets among a cluster of two `vmagent` instances:
```text
```
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
```
@@ -782,7 +772,7 @@ By default, each scrape target is scraped only by a single `vmagent` instance in
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances:
```text
```
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ...
@@ -796,7 +786,7 @@ The `-promscrape.cluster.memberLabel` command-line flag allows specifying a name
The value of the `member num` label is set to `-promscrape.cluster.memberNum`. For example, the following config instructs adding `vmagent_instance="0"` label
to all the metrics scraped by the given `vmagent` instance:
```text
```
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=0 -promscrape.cluster.memberLabel=vmagent_instance
```
@@ -823,7 +813,7 @@ See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679)
`vmagent` supports scraping targets via http, https and socks5 proxies. Proxy address must be specified in `proxy_url` option. For example, the following scrape config instructs
target scraping via https proxy at `https://proxy-addr:1234`:
```yaml
```yml
scrape_configs:
- job_name: foo
proxy_url: https://proxy-addr:1234
@@ -840,7 +830,7 @@ Proxy can be configured with the following optional settings:
For example:
```yaml
```yml
scrape_configs:
- job_name: foo
proxy_url: https://proxy-addr:1234
@@ -990,7 +980,7 @@ If you have suggestions for improvements or have found a bug - please open an is
* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at the beginning of some interval,
then `scrape_align_interval` option must be used. For example, the following config aligns hourly scrapes to the beginning of hour:
```yaml
```yml
scrape_configs:
- job_name: foo
scrape_interval: 1h
@@ -1000,7 +990,7 @@ If you have suggestions for improvements or have found a bug - please open an is
* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at specific offset, then `scrape_offset` option must be used.
For example, the following config instructs `vmagent` to scrape the target at 10 seconds of every minute:
```yaml
```yml
scrape_configs:
- job_name: foo
scrape_interval: 1m
@@ -1013,14 +1003,14 @@ If you have suggestions for improvements or have found a bug - please open an is
The following relabeling rule may be added to `relabel_configs` section in order to filter out pods with unneeded ports:
```yaml
```yml
- action: keep_if_equal
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
```
The following relabeling rule may be added to `relabel_configs` section in order to filter out init container pods:
```yaml
```yml
- action: drop
source_labels: [__meta_kubernetes_pod_container_init]
regex: true
@@ -1064,7 +1054,7 @@ For example, `-kafka.consumer.topic.brokers=host1:9092;host2:9092`.
The following command starts `vmagent`, which reads metrics in InfluxDB line protocol format from Kafka broker at `localhost:9092`
from the topic `metrics-by-telegraf` and sends them to remote storage at `http://localhost:8428/api/v1/write`:
```bash
```console
./bin/vmagent -remoteWrite.url=http://localhost:8428/api/v1/write \
-kafka.consumer.topic.brokers=localhost:9092 \
-kafka.consumer.topic.format=influx \
@@ -1087,7 +1077,7 @@ These command-line flags are available only in [enterprise](https://docs.victori
which can be downloaded for evaluation from [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) page
(see `vmutils-...-enterprise.tar.gz` archives) and from [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
```text
```
-kafka.consumer.topic array
Kafka topic names for data consumption.
Supports an array of values separated by comma or specified via multiple flags.
@@ -1132,13 +1122,13 @@ Two types of auth are supported:
* sasl with username and password:
```bash
```console
./bin/vmagent -remoteWrite.url=kafka://localhost:9092/?topic=prom-rw&security.protocol=SASL_SSL&sasl.mechanisms=PLAIN -remoteWrite.basicAuth.username=user -remoteWrite.basicAuth.password=password
```
* tls certificates:
```bash
```console
./bin/vmagent -remoteWrite.url=kafka://localhost:9092/?topic=prom-rw&security.protocol=SSL -remoteWrite.tlsCAFile=/opt/ca.pem -remoteWrite.tlsCertFile=/opt/cert.pem -remoteWrite.tlsKeyFile=/opt/key.pem
```
@@ -1169,7 +1159,7 @@ The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
```bash
```console
ROOT_IMAGE=scratch make package-vmagent
```
@@ -1197,7 +1187,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
<div class="with-copy" markdown="1">
```bash
```console
curl http://0.0.0.0:8429/debug/pprof/heap > mem.pprof
```
@@ -1207,7 +1197,7 @@ curl http://0.0.0.0:8429/debug/pprof/heap > mem.pprof
<div class="with-copy" markdown="1">
```bash
```console
curl http://0.0.0.0:8429/debug/pprof/profile > cpu.pprof
```
@@ -1223,7 +1213,7 @@ It is safe sharing the collected profiles from security point of view, since the
`vmagent` can be fine-tuned with various command-line flags. Run `./vmagent -help` in order to see the full list of these flags with their descriptions and default values:
```text
```
./vmagent -help
vmagent collects metrics data via popular data ingestion protocols and routes them to VictoriaMetrics.
@@ -1232,6 +1222,10 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-cacheExpireDuration duration
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
-clients.docker
Decides whether a docker container be brought up automatically
-clients.semaphore
Tells if the job is running on Semaphore
-configAuthKey string
Authorization key for accessing /config page. It must be passed via authKey query arg
-csvTrimTimestamp duration
@@ -1269,12 +1263,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration
@@ -1385,9 +1373,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
-newrelic.maxInsertRequestSize size
The maximum size in bytes of a single NewRelic request to /newrelic/infra/v2/metrics/events/bulk
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentsdbHTTPListenAddr string
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
-opentsdbHTTPListenAddr.useProxyProtocol
@@ -1426,7 +1411,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-promscrape.config.strictParse
Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields (default true)
-promscrape.configCheckInterval duration
Interval for checking for changes in -promscrape.config file. By default, the checking is disabled. See how to reload -promscrape.config file at https://docs.victoriametrics.com/vmagent.html#configuration-update
Interval for checking for changes in '-promscrape.config' file. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes
-promscrape.consul.waitTime duration
Wait time used by Consul service discovery. Default value is used if not set
-promscrape.consulSDCheckInterval duration
@@ -1610,9 +1595,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.shardByURL
Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages
-remoteWrite.shardByURL.labels array
Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.showURL
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
-remoteWrite.significantFigures array

View File

@@ -42,6 +42,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
)
var (
@@ -228,6 +229,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"stream-agg", "streaming aggregation status"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"-/reload", "reload configuration"},
@@ -432,6 +434,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
procutil.SelfSIGHUP()
w.WriteHeader(http.StatusOK)
return true
case "/stream-agg":
streamaggr.WriteHumanReadableState(w, r, remotewrite.GetAggregators())
return true
case "/ready":
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)

View File

@@ -106,15 +106,12 @@ type client struct {
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
authCfg, err := getAuthConfig(argIdx)
if err != nil {
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
}
tlsCfg, err := authCfg.NewTLSConfig()
if err != nil {
logger.Fatalf("cannot initialize tls config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
}
tlsCfg := authCfg.NewTLSConfig()
awsCfg, err := getAWSAPIConfig(argIdx)
if err != nil {
logger.Fatalf("cannot initialize AWS Config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
logger.Fatalf("FATAL: cannot initialize AWS Config for remoteWrite.url=%q: %s", remoteWriteURL, err)
}
tr := &http.Transport{
DialContext: statDial,
@@ -331,25 +328,15 @@ func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
return nil, err
}
resp, err := c.hc.Do(req)
if err == nil {
return resp, nil
if err != nil && errors.Is(err, io.EOF) {
// it is likely connection became stale.
// So we do one more attempt in hope request will succeed.
// If not, the error should be handled by the caller as usual.
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
req, _ = c.newRequest(url, body)
resp, err = c.hc.Do(req)
}
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
return nil, err
}
// It is likely connection became stale or timed out during the first request.
// Make another attempt in hope request will succeed.
// If not, the error should be handled by the caller as usual.
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
req, err = c.newRequest(url, body)
if err != nil {
return nil, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("second attempt: %w", err)
}
return resp, nil
return resp, err
}
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
@@ -375,7 +362,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
if c.awsCfg != nil {
sigv4Hash := awsapi.HashHex(body)
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
return nil, fmt.Errorf("cannot sign remoteWrite request with AWS sigv4: %w", err)
// there is no need in retry, request will be rejected by client.Do and retried by code below
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
}
}
return req, nil

View File

@@ -6,6 +6,7 @@ import (
"net/url"
"path/filepath"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
@@ -23,7 +24,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
@@ -41,9 +41,8 @@ var (
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
"even distribution of series over the specified -remoteWrite.url systems")
shardByURLLabels = flag.String("remoteWrite.shardByURL.labels", "", "Comma-separated list of label names for sharding across all the -remoteWrite.url. All labels of timeseries are used by default. "+
"See also -remoteWrite.shardByURL and https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
"See also -remoteWrite.maxDiskUsagePerURL")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
@@ -96,6 +95,8 @@ var (
// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
defaultAuthToken = &auth.Token{}
shardLabelsFilter map[string]struct{}
)
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
@@ -120,8 +121,6 @@ func InitSecretFlags() {
}
}
var shardByURLLabelsMap map[string]struct{}
// Init initializes remotewrite.
//
// It must be called after flag.Parse().
@@ -158,13 +157,6 @@ func Init() {
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 {
m := make(map[string]struct{}, len(*shardByURLLabels))
for _, label := range *shardByURLLabels {
m[label] = struct{}{}
}
shardByURLLabelsMap = m
}
initLabelsGlobal()
// Register SIGHUP handler for config reload before loadRelabelConfigs.
@@ -184,6 +176,12 @@ func Init() {
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
}
if *shardByURLLabels != "" {
for _, label := range strings.Split(*shardByURLLabels, ",") {
shardLabelsFilter[strings.TrimSpace(label)] = struct{}{}
}
}
// Start config reloader.
configReloaderWG.Add(1)
go func() {
@@ -431,23 +429,11 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
if *shardByURL {
// Shard the data among rwctxs
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
tmpLabels := promutils.GetLabels()
for _, ts := range tssBlock {
hashLabels := ts.Labels
if len(shardByURLLabelsMap) > 0 {
hashLabels = tmpLabels.Labels[:0]
for _, label := range ts.Labels {
if _, ok := shardByURLLabelsMap[label.Name]; ok {
hashLabels = append(hashLabels, label)
}
}
}
h := getLabelsHash(hashLabels)
h := getLabelsHash(ts.Labels, shardLabelsFilter)
idx := h % uint64(len(tssByURL))
tssByURL[idx] = append(tssByURL[idx], ts)
}
promutils.PutLabels(tmpLabels)
// Push sharded data to remote storages in parallel in order to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
@@ -497,7 +483,7 @@ func limitSeriesCardinality(tss []prompbmarshal.TimeSeries) []prompbmarshal.Time
dst := make([]prompbmarshal.TimeSeries, 0, len(tss))
for i := range tss {
labels := tss[i].Labels
h := getLabelsHash(labels)
h := getLabelsHash(labels, nil)
if hourlySeriesLimiter != nil && !hourlySeriesLimiter.Add(h) {
hourlySeriesLimitRowsDropped.Add(len(tss[i].Samples))
logSkippedSeries(labels, "-remoteWrite.maxHourlySeries", hourlySeriesLimiter.MaxItems())
@@ -521,10 +507,16 @@ var (
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
)
func getLabelsHash(labels []prompbmarshal.Label) uint64 {
func getLabelsHash(labels []prompbmarshal.Label, filterLabels map[string]struct{}) uint64 {
bb := labelsHashBufPool.Get()
b := bb.B[:0]
for _, label := range labels {
if len(filterLabels) > 0 {
_, ok := filterLabels[label.Name]
if !ok {
continue
}
}
b = append(b, label.Name...)
b = append(b, label.Value...)
}
@@ -827,3 +819,23 @@ func CheckStreamAggrConfigs() error {
}
return nil
}
func GetAggregators() map[string]*streamaggr.Aggregators {
var result = map[string]*streamaggr.Aggregators{}
if len(*remoteWriteMultitenantURLs) > 0 {
rwctxsMapLock.Lock()
for tenant, rwctxs := range rwctxsMap {
for rwNum, rw := range rwctxs {
result[fmt.Sprintf("rw %d for tenant %v:%v", rwNum, tenant.AccountID, tenant.ProjectID)] = rw.sas.Load()
}
}
rwctxsMapLock.Unlock()
} else {
for rwNum, rw := range rwctxsDefault {
result[fmt.Sprintf("remote write %d", rwNum)] = rw.sas.Load()
}
}
return result
}

View File

@@ -1,12 +0,0 @@
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
ARG certs_image
ARG root_image
FROM $certs_image as certs
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
FROM $root_image
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
EXPOSE 8429
ENTRYPOINT ["/vmalert-tool-prod"]
ARG TARGETARCH
COPY vmalert-tool-linux-${TARGETARCH}-prod ./vmalert-tool-prod

View File

@@ -119,13 +119,6 @@ name: <string>
# `eval_offset` can't be bigger than `interval`.
[ eval_offset: <duration> ]
# Optional
# Adjust the `time` parameter of group evaluation requests to compensate intentional query delay from the datasource.
# By default, the value is inherited from the `-rule.evalDelay` cmd-line flag - see its description for details.
# If group has `latency_offset` set in `params`, then it is recommended to set `eval_delay` equal to `latency_offset`.
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 and https://docs.victoriametrics.com/keyConcepts.html#query-latency.
[ eval_delay: <duration> ]
# Limit the number of alerts an alerting rule and series a recording
# rule can produce. 0 is no limit.
[ limit: <int> | default = 0 ]
@@ -801,7 +794,9 @@ Try the following recommendations to reduce the chance of hitting the data delay
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution). For example,
if expression is `rate(my_metric[2m]) > 0` then ensure that `my_metric` resolution is at least `1m` or better `30s`.
If you use VictoriaMetrics as datasource, `[duration]` can be omitted and VictoriaMetrics will adjust it automatically.
* Extend `[duration]` in expr to help tolerate the delay. For example, `max_over_time(errors_total[10m]) > 0` will be active even if there is no data in datasource for last `9m`.
* If you know in advance, that data in datasource is delayed - try changing vmalerts `-datasource.lookback`
command-line flag to add a time shift for evaluations. Or extend `[duration]` to tolerate the delay.
For example, `max_over_time(errors_total[10m]) > 0` will be active even if there is no data in datasource for last `9m`.
* If [time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution)
in datasource is inconsistent or `>=5min` - try changing vmalerts `-datasource.queryStep` command-line flag to specify
how far search query can lookback for the recent datapoint. The recommendation is to have the step
@@ -809,12 +804,9 @@ at least two times bigger than the resolution.
> Please note, data delay is inevitable in distributed systems. And it is better to account for it instead of ignoring.
By default, recently written samples to VictoriaMetrics [aren't visible for queries](https://docs.victoriametrics.com/keyConcepts.html#query-latency)
for up to 30s (see `-search.latencyOffset` command-line flag at vmselect or VictoriaMetrics single-node). Such delay is needed to eliminate risk of
incomplete data on the moment of querying, due to chance that metrics collectors won't be able to deliver that data in time.
To compensate the latency in timestamps for produced evaluation results, `-rule.evalDelay` is also set to `30s` by default.
If you expect data to be delayed for longer intervals (it gets buffered, queued, or just network is slow sometimes)
- consider increasing the `-rule.evalDelay` value accordingly.
By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s
(see `-search.latencyOffset` command-line flag at vmselect). Such delay is needed to eliminate risk of incomplete
data on the moment of querying, since metrics collectors won't be able to deliver the data in time.
### Alerts state
@@ -977,7 +969,7 @@ The shortlist of configuration flags is the following:
-datasource.headers string
Optional HTTP extraHeaders to send with each request to the corresponding -datasource.url. For example, -datasource.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -datasource.url. Multiple headers must be delimited by '^^': -datasource.headers='header1:value1^^header2:value2'
-datasource.lookback duration
Will be deprecated soon, please adjust "-search.latencyOffset" at datasource side or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
-datasource.maxIdleConnections int
Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state. (default 100)
-datasource.oauth2.clientID string
@@ -1045,12 +1037,6 @@ The shortlist of configuration flags is the following:
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration
@@ -1231,7 +1217,7 @@ The shortlist of configuration flags is the following:
-remoteWrite.bearerTokenFile string
Optional path to bearer token file to use for -remoteWrite.url.
-remoteWrite.concurrency int
Defines number of writers for concurrent writing into remote write endpoint (default 1)
Defines number of writers for concurrent writing into remote querier (default 1)
-remoteWrite.disablePathAppend
Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.
-remoteWrite.flushInterval duration
@@ -1301,14 +1287,13 @@ The shortlist of configuration flags is the following:
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
Supports an array of values separated by comma or specified via multiple flags.
-rule.evalDelay time
Adjustment of the time parameter for rule evaluation requests to compensate intentional data delay from the datasource.Normally, should be equal to `-search.latencyOffset` (cmd-line flag configured for VictoriaMetrics single-node or vmselect). (default 30s)
-rule.maxResolveDuration duration
Limits the maxiMum duration for automatic alert expiration, which by default is 4 times evaluationInterval of the parent group
Limits the maximum duration for automatic alert expiration, which by default is 4 times evaluationInterval of the parent group.
-rule.resendDelay duration
MiniMum amount of time to wait before resending an alert to notifier
Minimum amount of time to wait before resending an alert to notifier
-rule.templates array
Path or glob pattern to location with go template definitions for rules annotations templating. Flag can be specified multiple times.
Path or glob pattern to location with go template definitions
for rules annotations templating. Flag can be specified multiple times.
Examples:
-rule.templates="/path/to/file". Path to a single file with go templates
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
@@ -1397,11 +1382,8 @@ For example:
```yaml
static_configs:
- targets:
# support using full url
- 'http://alertmanager:9093/test/api/v2/alerts'
- 'https://alertmanager:9093/api/v2/alerts'
# the following target with only host:port will be used as <scheme>://localhost:9093/<path_prefix>/api/v2/alerts
- localhost:9093
- localhost:9095
consul_sd_configs:
- server: localhost:8500

View File

@@ -19,14 +19,11 @@ import (
// Group contains list of Rules grouped into
// entity with one name and evaluation interval
type Group struct {
Type Type `yaml:"type,omitempty"`
File string
Name string `yaml:"name"`
Interval *promutils.Duration `yaml:"interval,omitempty"`
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *promutils.Duration `yaml:"eval_delay,omitempty"`
Type Type `yaml:"type,omitempty"`
File string
Name string `yaml:"name"`
Interval *promutils.Duration `yaml:"interval,omitempty"`
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
@@ -236,7 +233,7 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
files, err := readFromFS(pathPatterns)
if err != nil {
return nil, fmt.Errorf("failed to read from the config: %w", err)
return nil, fmt.Errorf("failed to read from the config: %s", err)
}
return parse(files, validateTplFn, validateExpressions)
}
@@ -245,11 +242,11 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
files, err := readFromFS(pathPatterns)
if err != nil {
return nil, fmt.Errorf("failed to read from the config: %w", err)
return nil, fmt.Errorf("failed to read from the config: %s", err)
}
groups, err := parse(files, validateTplFn, validateExpressions)
if err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", pathPatterns, err)
return nil, fmt.Errorf("failed to parse %s: %s", pathPatterns, err)
}
if len(groups) < 1 {
cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))

View File

@@ -106,7 +106,7 @@ func TestParseBad(t *testing.T) {
},
{
[]string{"http://unreachable-url"},
"failed to",
"failed to read",
},
}
for _, tc := range testCases {

View File

@@ -49,7 +49,7 @@ func (fs *FS) Read(files []string) (map[string][]byte, error) {
path, resp.StatusCode, http.StatusOK, data)
}
if err != nil {
return nil, fmt.Errorf("cannot read %q: %w", path, err)
return nil, fmt.Errorf("cannot read %q: %s", path, err)
}
result[path] = data
}

View File

@@ -15,7 +15,6 @@ groups:
interval: 2s
concurrency: 2
type: prometheus
eval_delay: 30s
rules:
- alert: Conns
expr: sum(vm_tcplistener_conns) by (instance) > 1

View File

@@ -43,9 +43,7 @@ var (
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url.")
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
lookBack = flag.Duration("datasource.lookback", 0, `Will be deprecated soon, please adjust "-search.latencyOffset" at datasource side `+
`or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. `+
`For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
lookBack = flag.Duration("datasource.lookback", 0, `Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
"If set to 0, rule's evaluation interval will be used instead.")
@@ -85,10 +83,7 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
return nil, fmt.Errorf("datasource.url is empty")
}
if !*queryTimeAlignment {
logger.Warnf("flag `-datasource.queryTimeAlignment` is deprecated and will be removed in next releases. Please use `eval_alignment` in rule group instead.")
}
if *lookBack != 0 {
logger.Warnf("flag `-datasource.lookback` will be deprecated soon. Please use `-rule.evalDelay` command-line flag instead. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 for details.")
logger.Warnf("flag `datasource.queryTimeAlignment` is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead")
}
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
@@ -118,7 +113,7 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
}
_, err = authCfg.GetAuthHeader()
if err != nil {
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %s", *addr, err)
}
return &VMStorage{

View File

@@ -142,30 +142,24 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Resu
return Result{}, nil, err
}
resp, err := s.do(ctx, req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
// Return unexpected error to the caller.
return Result{}, nil, err
}
// Something in the middle between client and datasource might be closing
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
// something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = s.newQueryRequest(query, ts)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
req, _ = s.newQueryRequest(query, ts)
resp, err = s.do(ctx, req)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
if err != nil {
return Result{}, req, err
}
defer func() {
_ = resp.Body.Close()
}()
// Process the received response.
parseFn := parsePrometheusResponse
if s.dataSourceType != datasourcePrometheus {
parseFn = parseGraphiteResponse
}
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
@@ -184,30 +178,22 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
}
req, err := s.newQueryRangeRequest(query, start, end)
if err != nil {
return res, err
return Result{}, err
}
resp, err := s.do(ctx, req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
// Return unexpected error to the caller.
return res, err
}
// Something in the middle between client and datasource might be closing
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
// something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = s.newQueryRangeRequest(query, start, end)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
req, _ = s.newQueryRangeRequest(query, start, end)
resp, err = s.do(ctx, req)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
res, err = parsePrometheusResponse(req, resp)
_ = resp.Body.Close()
return res, err
if err != nil {
return res, err
}
defer func() {
_ = resp.Body.Close()
}()
return parsePrometheusResponse(req, resp)
}
func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
@@ -233,7 +219,7 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*http.Request, error) {
req, err := s.newRequest()
if err != nil {
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", s.datasourceURL, err)
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %s", s.datasourceURL, err)
}
s.setPrometheusRangeReqParams(req, query, start, end)
return req, nil
@@ -242,7 +228,7 @@ func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*h
func (s *VMStorage) newQueryRequest(query string, ts time.Time) (*http.Request, error) {
req, err := s.newRequest()
if err != nil {
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", s.datasourceURL, err)
return nil, fmt.Errorf("cannot create query request to datasource %q: %s", s.datasourceURL, err)
}
switch s.dataSourceType {
case "", datasourcePrometheus:

View File

@@ -112,14 +112,14 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
}
if r.Status != statusSuccess {
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
return res, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
}
var parseFn func() ([]Metric, error)
switch r.Data.ResultType {
case rtVector:
var pi promInstant
if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
return res, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
}
parseFn = pi.metrics
case rtMatrix:

View File

@@ -47,8 +47,8 @@ all files with prefix rule_ in folder dir.
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
`)
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions `+
`for rules annotations templating. Flag can be specified multiple times.
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions
for rules annotations templating. Flag can be specified multiple times.
Examples:
-rule.templates="/path/to/file". Path to a single file with go templates
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
@@ -230,9 +230,7 @@ func newManager(ctx context.Context) (*manager, error) {
if err != nil {
return nil, fmt.Errorf("failed to init remoteWrite: %w", err)
}
if rw != nil {
manager.rw = rw
}
manager.rw = rw
rr, err := remoteread.Init()
if err != nil {

View File

@@ -3,6 +3,7 @@ package notifier
import (
"crypto/md5"
"fmt"
"gopkg.in/yaml.v2"
"net/url"
"os"
"path"
@@ -10,8 +11,6 @@ import (
"strings"
"time"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
@@ -143,23 +142,26 @@ func parseLabels(target string, metaLabels *promutils.Labels, cfg *Config) (stri
if labels.Len() == 0 {
return "", nil, nil
}
scheme := labels.Get("__scheme__")
if len(scheme) == 0 {
scheme = "http"
schemeRelabeled := labels.Get("__scheme__")
if len(schemeRelabeled) == 0 {
schemeRelabeled = "http"
}
alertsPath := labels.Get("__alerts_path__")
if !strings.HasPrefix(alertsPath, "/") {
alertsPath = "/" + alertsPath
}
address := labels.Get("__address__")
if len(address) == 0 {
addressRelabeled := labels.Get("__address__")
if len(addressRelabeled) == 0 {
return "", nil, nil
}
address = addMissingPort(scheme, address)
u := fmt.Sprintf("%s://%s%s", scheme, address, alertsPath)
if strings.Contains(addressRelabeled, "/") {
return "", nil, nil
}
addressRelabeled = addMissingPort(schemeRelabeled, addressRelabeled)
alertsPathRelabeled := labels.Get("__alerts_path__")
if !strings.HasPrefix(alertsPathRelabeled, "/") {
alertsPathRelabeled = "/" + alertsPathRelabeled
}
u := fmt.Sprintf("%s://%s%s", schemeRelabeled, addressRelabeled, alertsPathRelabeled)
if _, err := url.Parse(u); err != nil {
return "", nil, fmt.Errorf("invalid url %q for scheme=%q (%q), target=%q, metrics_path=%q (%q): %w",
u, cfg.Scheme, scheme, target, address, alertsPath, err)
u, cfg.Scheme, schemeRelabeled, target, addressRelabeled, alertsPathRelabeled, err)
}
return u, labels, nil
}
@@ -179,24 +181,9 @@ func addMissingPort(scheme, target string) string {
func mergeLabels(target string, metaLabels *promutils.Labels, cfg *Config) *promutils.Labels {
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
m := promutils.NewLabels(3 + metaLabels.Len())
address := target
scheme := cfg.Scheme
alertsPath := path.Join("/", cfg.PathPrefix, alertManagerPath)
// try to extract optional scheme and alertsPath from __address__.
if strings.HasPrefix(address, "http://") {
scheme = "http"
address = address[len("http://"):]
} else if strings.HasPrefix(address, "https://") {
scheme = "https"
address = address[len("https://"):]
}
if n := strings.IndexByte(address, '/'); n >= 0 {
alertsPath = address[n:]
address = address[:n]
}
m.Add("__address__", address)
m.Add("__scheme__", scheme)
m.Add("__alerts_path__", alertsPath)
m.Add("__address__", target)
m.Add("__scheme__", cfg.Scheme)
m.Add("__alerts_path__", path.Join("/", cfg.PathPrefix, alertManagerPath))
m.AddFrom(metaLabels)
return m
}

View File

@@ -87,7 +87,7 @@ func (cw *configWatcher) reload(path string) error {
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
targets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
for _, err := range errors {
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
return fmt.Errorf("failed to init notifier for %q: %s", typeK, err)
}
cw.setTargets(typeK, targets)
@@ -107,7 +107,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
}
updateTargets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
for _, err := range errors {
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
logger.Errorf("failed to init notifier for %q: %s", typeK, err)
}
cw.setTargets(typeK, updateTargets)
}
@@ -118,7 +118,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator) ([]Target, []error) {
metaLabels, err := labelsFn()
if err != nil {
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
return nil, []error{fmt.Errorf("failed to get labels: %s", err)}
}
var targets []Target
var errors []error
@@ -167,11 +167,11 @@ func (cw *configWatcher) start() error {
for _, target := range cfg.Targets {
address, labels, err := parseLabels(target, nil, cw.cfg)
if err != nil {
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
return fmt.Errorf("failed to parse labels for target %q: %s", target, err)
}
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
if err != nil {
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
return fmt.Errorf("failed to init alertmanager for addr %q: %s", address, err)
}
targets = append(targets, Target{
Notifier: notifier,
@@ -189,14 +189,14 @@ func (cw *configWatcher) start() error {
sdc := &cw.cfg.ConsulSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %s", err)
}
labels = append(labels, targetLabels...)
}
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start consulSD discovery: %w", err)
return fmt.Errorf("failed to start consulSD discovery: %s", err)
}
}
@@ -207,14 +207,14 @@ func (cw *configWatcher) start() error {
sdc := &cw.cfg.DNSSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %s", err)
}
labels = append(labels, targetLabels...)
}
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
return fmt.Errorf("failed to start DNSSD discovery: %s", err)
}
}
return nil

View File

@@ -318,47 +318,3 @@ func TestMergeHTTPClientConfigs(t *testing.T) {
t.Fatalf("expected BasicAuth tp be present")
}
}
func TestParseLabels(t *testing.T) {
testCases := []struct {
name string
target string
cfg *Config
expectedAddress string
expectedErr bool
}{
{
"invalid address",
"invalid:*//url",
&Config{},
"",
true,
},
{
"use some default params",
"alertmanager:9093",
&Config{PathPrefix: "test"},
"http://alertmanager:9093/test/api/v2/alerts",
false,
},
{
"use target address",
"https://alertmanager:9093/api/v1/alerts",
&Config{Scheme: "http", PathPrefix: "test"},
"https://alertmanager:9093/api/v1/alerts",
false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
address, _, err := parseLabels(tc.target, nil, tc.cfg)
if err == nil == tc.expectedErr {
t.Fatalf("unexpected error; got %t; want %t", err != nil, tc.expectedErr)
}
if address != tc.expectedAddress {
t.Fatalf("unexpected address; got %q; want %q", address, tc.expectedAddress)
}
})
}
}

View File

@@ -90,7 +90,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
externalLabels = extLabels
eu, err := url.Parse(externalURL)
if err != nil {
return nil, fmt.Errorf("failed to parse external URL: %w", err)
return nil, fmt.Errorf("failed to parse external URL: %s", err)
}
templates.UpdateWithFuncs(templates.FuncsWithExternalURL(eu))
@@ -116,7 +116,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
if len(*addrs) > 0 {
notifiers, err := notifiersFromFlags(gen)
if err != nil {
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
return nil, fmt.Errorf("failed to create notifier from flag values: %s", err)
}
staticNotifiersFn = func() []Notifier {
return notifiers
@@ -126,7 +126,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
cw, err = newWatcher(*configPath, gen)
if err != nil {
return nil, fmt.Errorf("failed to init config watcher: %w", err)
return nil, fmt.Errorf("failed to init config watcher: %s", err)
}
return cw.notifiers, nil
}

View File

@@ -5,7 +5,6 @@ static_configs:
- targets:
- localhost:9093
- localhost:9095
- https://localhost:9093/test/api/v2/alerts
basic_auth:
username: foo
password: bar

View File

@@ -3,7 +3,6 @@ package remotewrite
import (
"bytes"
"context"
"errors"
"flag"
"fmt"
"io"
@@ -118,19 +117,12 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
// Push adds timeseries into queue for writing into remote storage.
// Push returns and error if client is stopped or if queue is full.
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
rwTotal.Inc()
select {
case <-c.doneCh:
rwErrors.Inc()
droppedRows.Add(len(s.Samples))
droppedBytes.Add(s.Size())
return fmt.Errorf("client is closed")
case c.input <- s:
return nil
default:
rwErrors.Inc()
droppedRows.Add(len(s.Samples))
droppedBytes.Add(s.Size())
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
c.maxQueueSize)
@@ -189,14 +181,11 @@ func (c *Client) run(ctx context.Context) {
}
var (
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
@@ -233,11 +222,6 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
L:
for attempts := 0; ; attempts++ {
err := c.send(ctx, b)
if errors.Is(err, io.EOF) {
// Something in the middle between client and destination might be closing
// the connection. So we do a one more attempt in hope request will succeed.
err = c.send(ctx, b)
}
if err == nil {
sentRows.Add(len(wr.Timeseries))
sentBytes.Add(len(b))
@@ -275,7 +259,6 @@ L:
}
rwErrors.Inc()
droppedRows.Add(len(wr.Timeseries))
droppedBytes.Add(len(b))
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
@@ -299,9 +282,7 @@ func (c *Client) send(ctx context.Context, data []byte) error {
if c.authCfg != nil {
err = c.authCfg.SetHeaders(req, true)
if err != nil {
return &nonRetriableError{
err: err,
}
return &nonRetriableError{err: err}
}
}
if !*disablePathAppend {
@@ -320,14 +301,13 @@ func (c *Client) send(ctx context.Context, data []byte) error {
// Prometheus remote Write compatible receivers MUST
switch resp.StatusCode / 100 {
case 2:
// respond with HTTP 2xx status code when write is successful.
// respond with a HTTP 2xx status code when the write is successful.
return nil
case 4:
if resp.StatusCode != http.StatusTooManyRequests {
// MUST NOT retry write requests on HTTP 4xx responses other than 429
return &nonRetriableError{
err: fmt.Errorf("unexpected response code %d for %s. Response body %q", resp.StatusCode, req.URL.Redacted(), body),
}
return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL.Redacted(), body)}
}
fallthrough
default:

View File

@@ -30,7 +30,7 @@ var (
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines max number of timeseries to be flushed at once")
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote write endpoint")
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote querier")
flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")

View File

@@ -36,11 +36,11 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
}
tFrom, err := time.Parse(time.RFC3339, *replayFrom)
if err != nil {
return fmt.Errorf("failed to parse %q: %w", *replayFrom, err)
return fmt.Errorf("failed to parse %q: %s", *replayFrom, err)
}
tTo, err := time.Parse(time.RFC3339, *replayTo)
if err != nil {
return fmt.Errorf("failed to parse %q: %w", *replayTo, err)
return fmt.Errorf("failed to parse %q: %s", *replayTo, err)
}
if !tTo.After(tFrom) {
return fmt.Errorf("replay.timeTo must be bigger than replay.timeFrom")

View File

@@ -91,7 +91,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
entries: make([]StateEntry, entrySize),
}
labels := fmt.Sprintf(`alertname=%q, group=%q, file=%q, id="%d"`, ar.Name, group.Name, group.File, ar.ID())
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
func() float64 {
ar.alertsMu.RLock()
@@ -269,7 +269,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
Expr: ar.Expr,
})
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
return nil, fmt.Errorf("failed to expand labels: %s", err)
}
for k, v := range extraLabels {
ls.processed[k] = v
@@ -295,33 +295,24 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
}
// execRange executes alerting rule on the given time range similarly to exec.
// When making consecutive calls make sure to respect time linearity for start and end params,
// as this function modifies AlertingRule alerts state.
// It is not thread safe.
// It returns ALERT and ALERT_FOR_STATE time series as a result.
// It doesn't update internal states of the Rule and meant to be used just
// to get time series for backfilling.
// It returns ALERT and ALERT_FOR_STATE time series as result.
func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
if err != nil {
return nil, err
}
var result []prompbmarshal.TimeSeries
holdAlertState := make(map[uint64]*notifier.Alert)
qFn := func(query string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
}
for _, s := range res.Data {
ls, err := ar.toLabels(s, qFn)
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %s", err)
}
h := hash(ls.processed)
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
if err != nil {
return nil, fmt.Errorf("failed to create alert: %w", err)
return nil, fmt.Errorf("failed to create alert: %s", err)
}
// if alert is instant, For: 0
if ar.For == 0 {
if ar.For == 0 { // if alert is instant
a.State = notifier.StateFiring
for i := range s.Values {
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
@@ -333,32 +324,18 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
prevT := time.Time{}
for i := range s.Values {
at := time.Unix(s.Timestamps[i], 0)
// try to restore alert's state on the first iteration
if at.Equal(start) {
if _, ok := ar.alerts[h]; ok {
a = ar.alerts[h]
prevT = at
}
}
if at.Sub(prevT) > ar.EvalInterval {
// reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending
a.ActiveAt = at
a.Start = time.Time{}
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
} else if at.Sub(a.ActiveAt) >= ar.For {
a.State = notifier.StateFiring
a.Start = at
}
prevT = at
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
// save alert's state on last iteration, so it can be used on the next execRange call
if at.Equal(end) {
holdAlertState[h] = a
}
}
}
ar.alerts = holdAlertState
return result, nil
}
@@ -411,7 +388,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
for _, m := range res.Data {
ls, err := ar.toLabels(m, qFn)
if err != nil {
curState.Err = fmt.Errorf("failed to expand labels: %w", err)
curState.Err = fmt.Errorf("failed to expand labels: %s", err)
return nil, curState.Err
}
h := hash(ls.processed)
@@ -536,7 +513,7 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
if ls == nil {
ls, err = ar.toLabels(m, qFn)
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
return nil, fmt.Errorf("failed to expand labels: %s", err)
}
}
a := &notifier.Alert{
@@ -614,41 +591,44 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
return nil
}
nameStr := fmt.Sprintf("%s=%q", alertNameLabel, ar.Name)
if !*disableAlertGroupLabel {
nameStr = fmt.Sprintf("%s=%q,%s=%q", alertGroupNameLabel, ar.GroupName, alertNameLabel, ar.Name)
}
var labelsFilter string
for k, v := range ar.Labels {
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
}
expr := fmt.Sprintf("last_over_time(%s{%s%s}[%ds])",
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
res, _, err := q.Query(ctx, expr, ts)
if err != nil {
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
}
if len(res.Data) < 1 {
ar.logDebugf(ts, nil, "no response was received from restore query")
return nil
}
for _, series := range res.Data {
series.DelLabel("__name__")
labelSet := make(map[string]string, len(series.Labels))
for _, v := range series.Labels {
labelSet[v.Name] = v.Value
}
id := hash(labelSet)
a, ok := ar.alerts[id]
if !ok {
continue
}
for _, a := range ar.alerts {
if a.Restored || a.State != notifier.StatePending {
continue
}
a.ActiveAt = time.Unix(int64(series.Values[0]), 0)
var labelsFilter []string
for k, v := range a.Labels {
labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
}
sort.Strings(labelsFilter)
expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
res, _, err := q.Query(ctx, expr, ts)
if err != nil {
return err
}
qMetrics := res.Data
if len(qMetrics) < 1 {
ar.logDebugf(ts, nil, "no response was received from restore query")
continue
}
// only one series expected in response
m := qMetrics[0]
// __name__ supposed to be alertForStateMetricName
m.DelLabel("__name__")
// we assume that restore query contains all label matchers,
// so all received labels will match anyway if their number is equal.
if len(m.Labels) != len(a.Labels) {
ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
continue
}
a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
a.Restored = true
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
}

View File

@@ -346,18 +346,15 @@ func TestAlertingRule_Exec(t *testing.T) {
}
func TestAlertingRule_ExecRange(t *testing.T) {
fakeGroup := Group{Name: "TestRule_ExecRange"}
testCases := []struct {
rule *AlertingRule
data []datasource.Metric
expAlerts []*notifier.Alert
expHoldAlertStateAlerts map[uint64]*notifier.Alert
rule *AlertingRule
data []datasource.Metric
expAlerts []*notifier.Alert
}{
{
newTestAlertingRule("empty", 0),
[]datasource.Metric{},
nil,
nil,
},
{
newTestAlertingRule("empty labels", 0),
@@ -367,7 +364,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
[]*notifier.Alert{
{State: notifier.StateFiring},
},
nil,
},
{
newTestAlertingRule("single-firing", 0),
@@ -380,7 +376,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
State: notifier.StateFiring,
},
},
nil,
},
{
newTestAlertingRule("single-firing-on-range", 0),
@@ -392,7 +387,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
{State: notifier.StateFiring},
{State: notifier.StateFiring},
},
nil,
},
{
newTestAlertingRule("for-pending", time.Second),
@@ -404,16 +398,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
},
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-pending"}): {
GroupID: fakeGroup.ID(),
Name: "for-pending",
Labels: map[string]string{"alertname": "for-pending"},
Annotations: map[string]string{},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
For: time.Second,
}},
},
{
newTestAlertingRule("for-firing", 3*time.Second),
@@ -425,38 +409,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
},
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-firing"}): {
GroupID: fakeGroup.ID(),
Name: "for-firing",
Labels: map[string]string{"alertname": "for-firing"},
Annotations: map[string]string{},
State: notifier.StateFiring,
ActiveAt: time.Unix(1, 0),
Start: time.Unix(5, 0),
Value: 1,
For: 3 * time.Second,
}},
},
{
newTestAlertingRule("for-hold-pending", time.Second),
[]datasource.Metric{
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 2, 5}},
},
[]*notifier.Alert{
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
},
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-hold-pending"}): {
GroupID: fakeGroup.ID(),
Name: "for-hold-pending",
Labels: map[string]string{"alertname": "for-hold-pending"},
Annotations: map[string]string{},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
For: time.Second,
}},
},
{
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
@@ -470,10 +422,9 @@ func TestAlertingRule_ExecRange(t *testing.T) {
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
},
nil,
},
{
newTestAlertingRule("multi-series", 3*time.Second),
newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
[]datasource.Metric{
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
{
@@ -485,6 +436,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
//
{
State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
Labels: map[string]string{
@@ -498,29 +450,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
},
},
},
map[uint64]*notifier.Alert{
hash(map[string]string{"alertname": "multi-series"}): {
GroupID: fakeGroup.ID(),
Name: "multi-series",
Labels: map[string]string{"alertname": "multi-series"},
Annotations: map[string]string{},
State: notifier.StateFiring,
ActiveAt: time.Unix(1, 0),
Start: time.Unix(5, 0),
Value: 1,
For: 3 * time.Second,
},
hash(map[string]string{"alertname": "multi-series", "foo": "bar"}): {
GroupID: fakeGroup.ID(),
Name: "multi-series",
Labels: map[string]string{"alertname": "multi-series", "foo": "bar"},
Annotations: map[string]string{},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
For: 3 * time.Second,
},
},
},
{
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
@@ -548,16 +477,16 @@ func TestAlertingRule_ExecRange(t *testing.T) {
"source": "vm",
}},
},
nil,
},
}
fakeGroup := Group{Name: "TestRule_ExecRange"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
tc.rule.q = fq
tc.rule.GroupID = fakeGroup.ID()
fq.Add(tc.data...)
gotTS, err := tc.rule.execRange(context.TODO(), time.Unix(1, 0), time.Unix(5, 0))
gotTS, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
@@ -583,11 +512,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
}
}
if tc.expHoldAlertStateAlerts != nil {
if !reflect.DeepEqual(tc.expHoldAlertStateAlerts, tc.rule.alerts) {
t.Fatalf("expected hold alerts state: \n%v but got \n%v", tc.expHoldAlertStateAlerts, tc.rule.alerts)
}
}
})
}
}
@@ -640,9 +564,6 @@ func TestGroup_Restore(t *testing.T) {
if got.ActiveAt != exp.ActiveAt {
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
}
if got.Name != exp.Name {
t.Fatalf("expected alertname %q; got %q", exp.Name, got.Name)
}
}
}
@@ -658,7 +579,6 @@ func TestGroup_Restore(t *testing.T) {
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
Name: "foo",
ActiveAt: defaultTS,
},
})
@@ -672,7 +592,6 @@ func TestGroup_Restore(t *testing.T) {
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
Name: "foo",
ActiveAt: ts,
},
})
@@ -680,7 +599,7 @@ func TestGroup_Restore(t *testing.T) {
// two rules, two active alerts, one with state restored
ts = time.Now().Truncate(time.Hour)
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
stateMetric("bar", ts))
stateMetric("foo", ts))
fn(
[]config.Rule{
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
@@ -688,11 +607,9 @@ func TestGroup_Restore(t *testing.T) {
},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
Name: "foo",
ActiveAt: defaultTS,
},
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
Name: "bar",
ActiveAt: ts,
},
})
@@ -710,11 +627,9 @@ func TestGroup_Restore(t *testing.T) {
},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
Name: "foo",
ActiveAt: ts,
},
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
Name: "bar",
ActiveAt: ts,
},
})
@@ -727,7 +642,6 @@ func TestGroup_Restore(t *testing.T) {
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
Name: "foo",
ActiveAt: defaultTS,
},
})
@@ -740,7 +654,6 @@ func TestGroup_Restore(t *testing.T) {
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
Name: "foo",
ActiveAt: ts,
},
})
@@ -753,7 +666,6 @@ func TestGroup_Restore(t *testing.T) {
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
Name: "foo",
ActiveAt: defaultTS,
},
})

View File

@@ -31,9 +31,7 @@ var (
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent group")
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the `time` parameter for rule evaluation requests to compensate intentional data delay from the datasource."+
"Normally, should be equal to `-search.latencyOffset` (cmd-line flag configured for VictoriaMetrics single-node or vmselect).")
"which by default is 4 times evaluationInterval of the parent ")
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
@@ -41,16 +39,13 @@ var (
// Group is an entity for grouping rules
type Group struct {
mu sync.RWMutex
Name string
File string
Rules []Rule
Type config.Type
Interval time.Duration
EvalOffset *time.Duration
// EvalDelay will adjust timestamp for rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *time.Duration
mu sync.RWMutex
Name string
File string
Rules []Rule
Type config.Type
Interval time.Duration
EvalOffset *time.Duration
Limit int
Concurrency int
Checksum string
@@ -144,9 +139,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
if cfg.EvalOffset != nil {
g.EvalOffset = &cfg.EvalOffset.D
}
if cfg.EvalDelay != nil {
g.EvalDelay = &cfg.EvalDelay.D
}
for _, h := range cfg.Headers {
g.Headers[h.Key] = h.Value
}
@@ -339,7 +331,7 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
g.infof("started")
@@ -528,7 +520,7 @@ func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
if len(g.Rules) < 1 {
return nil
@@ -589,14 +581,10 @@ func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
// to 10:30, to the previous evaluationInterval.
return ts.Add(-g.Interval)
}
// when `eval_offset` is using, ts shouldn't be effect by `eval_alignment` and `eval_delay`
// since it should be always aligned.
// EvalOffset shouldn't interfere with evalAlignment,
// so we return it immediately
return ts
}
timestamp = timestamp.Add(-g.getEvalDelay())
// always apply the alignment as a last step
if g.evalAlignment == nil || *g.evalAlignment {
// align query time with interval to get similar result with grafana when plotting time series.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
@@ -606,13 +594,6 @@ func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
return timestamp
}
func (g *Group) getEvalDelay() time.Duration {
if g.EvalDelay != nil {
return *g.EvalDelay
}
return *evalDelay
}
// executor contains group's notify and rw configs
type executor struct {
Notifiers func() []notifier.Notifier
@@ -621,11 +602,11 @@ type executor struct {
Rw remotewrite.RWClient
previouslySentSeriesToRWMu sync.Mutex
// previouslySentSeriesToRW stores series sent to RW on previous iteration
// PreviouslySentSeriesToRW stores series sent to RW on previous iteration
// map[ruleID]map[ruleLabels][]prompb.Label
// where `ruleID` is ID of the Rule within a Group
// and `ruleLabels` is []prompb.Label marshalled to a string
previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
PreviouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
}
// execConcurrently executes rules concurrently if concurrency>1
@@ -663,6 +644,9 @@ var (
execTotal = metrics.NewCounter(`vmalert_execution_total`)
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
remoteWriteTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
)
func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
@@ -683,7 +667,9 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
var lastErr error
for _, ts := range tss {
remoteWriteTotal.Inc()
if err := e.Rw.Push(ts); err != nil {
remoteWriteErrors.Inc()
lastErr = fmt.Errorf("rule %q: remote write failure: %w", r, err)
}
}
@@ -737,7 +723,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
var staleS []prompbmarshal.TimeSeries
// check whether there are series which disappeared and need to be marked as stale
e.previouslySentSeriesToRWMu.Lock()
for key, labels := range e.previouslySentSeriesToRW[rID] {
for key, labels := range e.PreviouslySentSeriesToRW[rID] {
if _, ok := ruleLabels[key]; ok {
continue
}
@@ -746,7 +732,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
staleS = append(staleS, ss)
}
// set previous series to current
e.previouslySentSeriesToRW[rID] = ruleLabels
e.PreviouslySentSeriesToRW[rID] = ruleLabels
e.previouslySentSeriesToRWMu.Unlock()
return staleS
@@ -764,14 +750,14 @@ func (e *executor) purgeStaleSeries(activeRules []Rule) {
for _, rule := range activeRules {
id := rule.ID()
prev, ok := e.previouslySentSeriesToRW[id]
prev, ok := e.PreviouslySentSeriesToRW[id]
if ok {
// keep previous series for staleness detection
newPreviouslySentSeriesToRW[id] = prev
}
}
e.previouslySentSeriesToRW = nil
e.previouslySentSeriesToRW = newPreviouslySentSeriesToRW
e.PreviouslySentSeriesToRW = nil
e.PreviouslySentSeriesToRW = newPreviouslySentSeriesToRW
e.previouslySentSeriesToRWMu.Unlock()
}

View File

@@ -321,7 +321,7 @@ func TestResolveDuration(t *testing.T) {
func TestGetStaleSeries(t *testing.T) {
ts := time.Now()
e := &executor{
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
t.Helper()
@@ -414,7 +414,7 @@ func TestPurgeStaleSeries(t *testing.T) {
f := func(curRules, newRules, expStaleRules []Rule) {
t.Helper()
e := &executor{
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
// seed executor with series for
// current rules
@@ -424,13 +424,13 @@ func TestPurgeStaleSeries(t *testing.T) {
e.purgeStaleSeries(newRules)
if len(e.previouslySentSeriesToRW) != len(expStaleRules) {
if len(e.PreviouslySentSeriesToRW) != len(expStaleRules) {
t.Fatalf("expected to get %d stale series, got %d",
len(expStaleRules), len(e.previouslySentSeriesToRW))
len(expStaleRules), len(e.PreviouslySentSeriesToRW))
}
for _, exp := range expStaleRules {
if _, ok := e.previouslySentSeriesToRW[exp.ID()]; !ok {
if _, ok := e.PreviouslySentSeriesToRW[exp.ID()]; !ok {
t.Fatalf("expected to have rule %d; got nil instead", exp.ID())
}
}
@@ -515,7 +515,7 @@ func TestFaultyRW(t *testing.T) {
e := &executor{
Rw: &remotewrite.Client{},
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
err := e.exec(context.Background(), r, time.Now(), 0, 10)
@@ -628,7 +628,6 @@ func TestGroupStartDelay(t *testing.T) {
func TestGetPrometheusReqTimestamp(t *testing.T) {
offset := 30 * time.Minute
evalDelay := 1 * time.Minute
disableAlign := false
testCases := []struct {
name string
@@ -636,7 +635,7 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
originTS, expTS string
}{
{
"with query align + default evalDelay",
"with query align",
&Group{
Interval: time.Hour,
},
@@ -644,16 +643,16 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
"2023-08-28T11:00:00+00:00",
},
{
"without query align + default evalDelay",
"without query align",
&Group{
Interval: time.Hour,
evalAlignment: &disableAlign,
},
"2023-08-28T11:11:00+00:00",
"2023-08-28T11:10:30+00:00",
"2023-08-28T11:11:00+00:00",
},
{
"with eval_offset, find previous offset point + default evalDelay",
"with eval_offset, find previous offset point",
&Group{
EvalOffset: &offset,
Interval: time.Hour,
@@ -662,7 +661,7 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
"2023-08-28T10:30:00+00:00",
},
{
"with eval_offset + default evalDelay",
"with eval_offset",
&Group{
EvalOffset: &offset,
Interval: time.Hour,
@@ -670,44 +669,14 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
"2023-08-28T11:41:00+00:00",
"2023-08-28T11:30:00+00:00",
},
{
"1h interval with eval_delay",
&Group{
EvalDelay: &evalDelay,
Interval: time.Hour,
},
"2023-08-28T11:41:00+00:00",
"2023-08-28T11:00:00+00:00",
},
{
"1m interval with eval_delay",
&Group{
EvalDelay: &evalDelay,
Interval: time.Minute,
},
"2023-08-28T11:41:13+00:00",
"2023-08-28T11:40:00+00:00",
},
{
"disable alignment with eval_delay",
&Group{
EvalDelay: &evalDelay,
Interval: time.Hour,
evalAlignment: &disableAlign,
},
"2023-08-28T11:41:00+00:00",
"2023-08-28T11:40:00+00:00",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
originT, _ := time.Parse(time.RFC3339, tc.originTS)
expT, _ := time.Parse(time.RFC3339, tc.expTS)
gotTS := tc.g.adjustReqTimestamp(originT)
if !gotTS.Equal(expT) {
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
}
})
originT, _ := time.Parse(time.RFC3339, tc.originTS)
expT, _ := time.Parse(time.RFC3339, tc.expTS)
gotTS := tc.g.adjustReqTimestamp(originT)
if !gotTS.Equal(expT) {
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
}
}
}

View File

@@ -78,7 +78,7 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
entries: make([]StateEntry, entrySize),
}
labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID())
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
func() float64 {
e := rr.state.getLast()

View File

@@ -166,7 +166,7 @@ func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRul
var n int
for _, ts := range tss {
if err := rw.Push(ts); err != nil {
return n, fmt.Errorf("remote write failure: %w", err)
return n, fmt.Errorf("remote write failure: %s", err)
}
n += len(ts.Samples)
}

View File

@@ -147,11 +147,11 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
}
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
if err != nil {
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramRuleID, err)
}
obj, err := rh.m.ruleAPI(groupID, ruleID)
if err != nil {
@@ -163,11 +163,11 @@ func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
return nil, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
}
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
return nil, fmt.Errorf("failed to read %q param: %s", paramAlertID, err)
}
a, err := rh.m.alertAPI(groupID, alertID)
if err != nil {

View File

@@ -94,10 +94,6 @@ type apiGroup struct {
NotifierHeaders []string `json:"notifier_headers,omitempty"`
// Labels is a set of label value pairs, that will be added to every rule.
Labels map[string]string `json:"labels,omitempty"`
// EvalOffset Group will be evaluated at the exact time offset on the range of [0...evaluationInterval]
EvalOffset float64 `json:"eval_offset,omitempty"`
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
EvalDelay float64 `json:"eval_delay,omitempty"`
}
// groupAlerts represents a group of alerts for WEB view
@@ -313,12 +309,6 @@ func groupToAPI(g *rule.Group) apiGroup {
Labels: g.Labels,
}
if g.EvalOffset != nil {
ag.EvalOffset = g.EvalOffset.Seconds()
}
if g.EvalDelay != nil {
ag.EvalDelay = g.EvalDelay.Seconds()
}
ag.Rules = make([]apiRule, 0)
for _, r := range g.Rules {
ag.Rules = append(ag.Rules, ruleToAPI(r))

View File

@@ -32,38 +32,6 @@ Pass `-help` to `vmauth` in order to see all the supported command-line flags wi
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML,
accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.com/vmgateway.html).
## Dropping request path prefix
By default `vmauth` doesn't drop the path prefix from the original request when proxying the request to the matching backend.
Sometimes it is needed to drop path prefix before routing the request to the backend. This can be done by specifying the number of `/`-delimited
prefix parts to drop from the request path via `drop_src_path_prefix_parts` option at `url_map` level or at `user` level.
For example, if you need to serve requests to [vmalert](https://docs.victoriametrics.com/vmalert.html) at `/vmalert/` path prefix,
while serving requests to [vmagent](https://docs.victoriametrics.com/vmagent.html) at `/vmagent/` path prefix for a particular user,
then the following [-auth.config](#auth-config) can be used:
```yml
users:
- username: foo
url_map:
# proxy all the requests, which start with `/vmagent/`, to vmagent backend
- src_paths:
- "/vmagent/.+"
# drop /vmagent/ path prefix from the original request before proxying it to url_prefix.
drop_src_path_prefix_parts: 1
url_prefix: "http://vmagent-backend:8429/"
# proxy all the requests, which start with `/vmalert`, to vmalert backend
- src_paths:
- "/vmalert/.+"
# drop /vmalert/ path prefix from the original request before proxying it to url_prefix.
drop_src_path_prefix_parts: 1
url_prefix: "http://vmalert-backend:8880/"
```
## Load balancing
Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
@@ -133,31 +101,6 @@ The following [metrics](#monitoring) related to concurrency limits are exposed b
- `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
## Backend TLS setup
By default `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option
in the [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config). These settings can be overridden with the following command-line flags:
- `-backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
via `tls_insecure_skip_verify` option. For example:
```yml
- username: "foo"
url_prefix: "https://localhost"
tls_insecure_skip_verify: true
```
- `-backend.tlsCAFile` allows specifying the path to TLS Root CA, which will be used for TLS verification when connecting to HTTPS backends.
The `-backend.tlsCAFile` may point either to local file or to `http` / `https` url.
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
via `tls_ca_file` option. For example:
```yml
- username: "foo"
url_prefix: "https://localhost"
tls_ca_file: "/path/to/tls/root/ca"
```
## IP filters
@@ -238,15 +181,6 @@ users:
password: "***"
url_prefix: "http://localhost:8428?extra_label=team=dev"
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
# are proxied to https://localhost:8428.
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
# TLS verification is skipped for https://localhost.
- username: "local-single-node-with-tls"
password: "***"
url_prefix: "https://localhost"
tls_insecure_skip_verify: true
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
@@ -288,8 +222,6 @@ users:
# For example, request to http://vmauth:8427/non/existing/path are proxied:
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
#
# Regular expressions are allowed in `src_paths` entries.
- username: "foobar"
url_map:
- src_paths:
@@ -316,8 +248,6 @@ users:
# Requests are routed in round-robin fashion between `url_prefix` backends.
# The deny_partial_response query arg is added to all the routed requests.
# The requests are re-tried if url_prefix backends send 500 or 503 response status codes.
# Note that the unauthorized_user section takes precedence when processing a route without credentials,
# even if such a route also exists in the users section (see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5236).
unauthorized_user:
url_prefix:
- http://vmselect-az1/?deny_partial_response=1
@@ -478,12 +408,6 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration

View File

@@ -5,7 +5,6 @@ import (
"encoding/base64"
"flag"
"fmt"
"net/http"
"net/url"
"os"
"regexp"
@@ -15,14 +14,13 @@ import (
"sync/atomic"
"time"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
)
var (
@@ -40,25 +38,20 @@ type AuthConfig struct {
// UserInfo is user information read from authConfigPath
type UserInfo struct {
Name string `yaml:"name,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
URLMaps []URLMap `yaml:"url_map,omitempty"`
HeadersConf HeadersConf `yaml:",inline"`
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
TLSInsecureSkipVerify *bool `yaml:"tls_insecure_skip_verify,omitempty"`
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
Name string `yaml:"name,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
URLMaps []URLMap `yaml:"url_map,omitempty"`
HeadersConf HeadersConf `yaml:",inline"`
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
concurrencyLimitCh chan struct{}
concurrencyLimitReached *metrics.Counter
httpTransport *http.Transport
requests *metrics.Counter
requestsDuration *metrics.Summary
}
@@ -120,11 +113,10 @@ func (h *Header) MarshalYAML() (interface{}, error) {
// URLMap is a mapping from source paths to target urls.
type URLMap struct {
SrcPaths []*SrcPath `yaml:"src_paths,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
HeadersConf HeadersConf `yaml:",inline"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
SrcPaths []*SrcPath `yaml:"src_paths,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
HeadersConf HeadersConf `yaml:",inline"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
}
// SrcPath represents an src path
@@ -428,18 +420,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
}
ui := ac.UnauthorizedUser
if ui != nil {
if ui.Username != "" {
return nil, fmt.Errorf("field username can't be specified for unauthorized_user section")
}
if ui.Password != "" {
return nil, fmt.Errorf("field password can't be specified for unauthorized_user section")
}
if ui.BearerToken != "" {
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
}
if ui.Name != "" {
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
}
ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total`)
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds`)
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
@@ -450,11 +430,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_current`, func() float64 {
return float64(len(ui.concurrencyLimitCh))
})
tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
if err != nil {
return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
}
ui.httpTransport = tr
}
return &ac, nil
}
@@ -525,12 +500,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 {
return float64(len(ui.concurrencyLimitCh))
})
tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
if err != nil {
return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
}
ui.httpTransport = tr
byAuthToken[at1] = ui
byAuthToken[at2] = ui
}

View File

@@ -221,26 +221,22 @@ func TestParseAuthConfigSuccess(t *testing.T) {
}
// Single user
insecureSkipVerifyTrue := true
f(`
users:
- username: foo
password: bar
url_prefix: http://aaa:343/bbb
max_concurrent_requests: 5
tls_insecure_skip_verify: true
`, map[string]*UserInfo{
getAuthToken("", "foo", "bar"): {
Username: "foo",
Password: "bar",
URLPrefix: mustParseURL("http://aaa:343/bbb"),
MaxConcurrentRequests: 5,
TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
},
})
// Multiple url_prefix entries
insecureSkipVerifyFalse := false
f(`
users:
- username: foo
@@ -248,9 +244,6 @@ users:
url_prefix:
- http://node1:343/bbb
- http://node2:343/bbb
tls_insecure_skip_verify: false
retry_status_codes: [500, 501]
drop_src_path_prefix_parts: 1
`, map[string]*UserInfo{
getAuthToken("", "foo", "bar"): {
Username: "foo",
@@ -259,9 +252,6 @@ users:
"http://node1:343/bbb",
"http://node2:343/bbb",
}),
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
RetryStatusCodes: []int{500, 501},
DropSrcPathPrefixParts: 1,
},
})
@@ -458,47 +448,6 @@ users:
}
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
c := `
users:
- username: foo
password: bar
url_prefix: https://aaa/bbb
max_concurrent_requests: 5
tls_insecure_skip_verify: true
unauthorized_user:
url_prefix: http://aaa:343/bbb
max_concurrent_requests: 5
tls_insecure_skip_verify: false
`
ac, err := parseAuthConfig([]byte(c))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
m, err := parseAuthConfigUsers(ac)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
ui := m[getAuthToken("", "foo", "bar")]
if !isSetBool(ui.TLSInsecureSkipVerify, true) || !ui.httpTransport.TLSClientConfig.InsecureSkipVerify {
t.Fatalf("unexpected TLSInsecureSkipVerify value for user foo")
}
if !isSetBool(ac.UnauthorizedUser.TLSInsecureSkipVerify, false) || ac.UnauthorizedUser.httpTransport.TLSClientConfig.InsecureSkipVerify {
t.Fatalf("unexpected TLSInsecureSkipVerify value for unauthorized_user")
}
}
func isSetBool(boolP *bool, expectedValue bool) bool {
if boolP == nil {
return false
}
return *boolP == expectedValue
}
func getSrcPaths(paths []string) []*SrcPath {
var sps []*SrcPath
for _, path := range paths {

View File

@@ -42,15 +42,6 @@ users:
password: "***"
url_prefix: "http://localhost:8428?extra_label=team=dev"
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
# are proxied to https://localhost:8428
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
# TLS verification is ignored for https://localhost.
- username: "local-single-node-with-tls"
password: "***"
url_prefix: "https://localhost"
tls_insecure_skip_verify: true
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
@@ -91,8 +82,6 @@ users:
# For example, request to http://vmauth:8427/non/existing/path are proxied:
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
#
# Regular expressions are allowed in `src_paths` entries.
- username: "foobar"
url_map:
- src_paths:

View File

@@ -20,8 +20,6 @@ users:
# For example, request to http://vmauth:8427/non/existing/path are proxied:
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
#
# Regular expressions are allowed in `src_paths` entries.
- username: "foobar"
url_map:
- src_paths:

View File

@@ -2,8 +2,6 @@ package main
import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
"flag"
"fmt"
@@ -17,19 +15,16 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
@@ -51,10 +46,6 @@ var (
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
"Bigger values may require more memory")
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
"See https://docs.victoriametrics.com/vmauth.html#backend-tls-setup")
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
"See https://docs.victoriametrics.com/vmauth.html#backend-tls-setup")
)
func main() {
@@ -164,19 +155,11 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
u := normalizeURL(r.URL)
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u)
isDefault := false
if up == nil {
missingRouteRequests.Inc()
if ui.DefaultURL == nil {
// Authorization should be requested for http requests without credentials
// to a route that is not in the configuration for unauthorized user.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5236
if ui.BearerToken == "" && ui.Username == "" && len(*authUsers.Load()) > 0 {
w.Header().Set("WWW-Authenticate", `Basic realm="Restricted"`)
http.Error(w, "missing `Authorization` request header", http.StatusUnauthorized)
return
}
missingRouteRequests.Inc()
httpserver.Errorf(w, r, "missing route for %q", u.String())
return
}
@@ -198,9 +181,9 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
query.Set("request_path", u.Path)
targetURL.RawQuery = query.Encode()
} else { // Update path for regular routes.
targetURL = mergeURLs(targetURL, u, dropSrcPathPrefixParts)
targetURL = mergeURLs(targetURL, u)
}
ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes, ui.httpTransport)
ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes)
bu.put()
if ok {
return
@@ -214,12 +197,12 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
httpserver.Errorf(w, r, "%s", err)
}
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, transport *http.Transport) bool {
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int) bool {
// This code has been copied from net/http/httputil/reverseproxy.go
req := sanitizeRequestHeaders(r)
req.URL = targetURL
req.Host = targetURL.Host
updateHeadersByConfig(req.Header, hc.RequestHeaders)
transportOnce.Do(transportInit)
res, err := transport.RoundTrip(req)
rtb, rtbOK := req.Body.(*readTrackingBody)
if err != nil {
@@ -362,77 +345,23 @@ var (
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
)
func getTransport(insecureSkipVerifyP *bool, caFile string) (*http.Transport, error) {
if insecureSkipVerifyP == nil {
insecureSkipVerifyP = backendTLSInsecureSkipVerify
}
insecureSkipVerify := *insecureSkipVerifyP
if caFile == "" {
caFile = *backendTLSCAFile
}
var (
transport *http.Transport
transportOnce sync.Once
)
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = appendTransportKey(bb.B[:0], insecureSkipVerify, caFile)
transportMapLock.Lock()
defer transportMapLock.Unlock()
tr := transportMap[string(bb.B)]
if tr == nil {
trLocal, err := newTransport(insecureSkipVerify, caFile)
if err != nil {
return nil, err
}
transportMap[string(bb.B)] = trLocal
tr = trLocal
}
return tr, nil
}
var transportMap = make(map[string]*http.Transport)
var transportMapLock sync.Mutex
func appendTransportKey(dst []byte, insecureSkipVerify bool, caFile string) []byte {
dst = encoding.MarshalBool(dst, insecureSkipVerify)
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(caFile))
return dst
}
var bbPool bytesutil.ByteBufferPool
func newTransport(insecureSkipVerify bool, caFile string) (*http.Transport, error) {
func transportInit() {
tr := http.DefaultTransport.(*http.Transport).Clone()
tr.ResponseHeaderTimeout = *responseTimeout
// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
tr.DisableCompression = true
// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
tr.ForceAttemptHTTP2 = false
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
}
tlsCfg := tr.TLSClientConfig
if tlsCfg == nil {
tlsCfg = &tls.Config{}
tr.TLSClientConfig = tlsCfg
}
if insecureSkipVerify || caFile != "" {
tlsCfg.ClientSessionCache = tls.NewLRUClientSessionCache(0)
tlsCfg.InsecureSkipVerify = insecureSkipVerify
if caFile != "" {
data, err := fs.ReadFileOrHTTP(caFile)
if err != nil {
return nil, fmt.Errorf("cannot read tls_ca_file: %w", err)
}
rootCA := x509.NewCertPool()
if !rootCA.AppendCertsFromPEM(data) {
return nil, fmt.Errorf("cannot parse data read from tls_ca_file %q", caFile)
}
tlsCfg.RootCAs = rootCA
}
}
return tr, nil
transport = tr
}
var (

View File

@@ -6,13 +6,12 @@ import (
"strings"
)
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int) *url.URL {
func mergeURLs(uiURL, requestURI *url.URL) *url.URL {
targetURL := *uiURL
srcPath := dropPrefixParts(requestURI.Path, dropSrcPathPrefixParts)
if strings.HasPrefix(srcPath, "/") {
if strings.HasPrefix(requestURI.Path, "/") {
targetURL.Path = strings.TrimSuffix(targetURL.Path, "/")
}
targetURL.Path += srcPath
targetURL.Path += requestURI.Path
requestParams := requestURI.Query()
// fast path
if len(requestParams) == 0 {
@@ -33,34 +32,18 @@ func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int) *url.URL
return &targetURL
}
func dropPrefixParts(path string, parts int) string {
if parts <= 0 {
return path
}
for parts > 0 {
path = strings.TrimPrefix(path, "/")
n := strings.IndexByte(path, '/')
if n < 0 {
return ""
}
path = path[n:]
parts--
}
return path
}
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int, int) {
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int) {
for _, e := range ui.URLMaps {
for _, sp := range e.SrcPaths {
if sp.match(u.Path) {
return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes, e.DropSrcPathPrefixParts
return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes
}
}
}
if ui.URLPrefix != nil {
return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes, ui.DropSrcPathPrefixParts
return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes
}
return nil, HeadersConf{}, nil, 0
return nil, HeadersConf{}, nil
}
func normalizeURL(uOrig *url.URL) *url.URL {

View File

@@ -7,91 +7,20 @@ import (
"testing"
)
func TestDropPrefixParts(t *testing.T) {
f := func(path string, parts int, expectedResult string) {
t.Helper()
result := dropPrefixParts(path, parts)
if result != expectedResult {
t.Fatalf("unexpected result; got %q; want %q", result, expectedResult)
}
}
f("", 0, "")
f("", 1, "")
f("", 10, "")
f("foo", 0, "foo")
f("foo", -1, "foo")
f("foo", 1, "")
f("/foo", 0, "/foo")
f("/foo/bar", 0, "/foo/bar")
f("/foo/bar/baz", 0, "/foo/bar/baz")
f("foo", 0, "foo")
f("foo/bar", 0, "foo/bar")
f("foo/bar/baz", 0, "foo/bar/baz")
f("/foo/", 0, "/foo/")
f("/foo/bar/", 0, "/foo/bar/")
f("/foo/bar/baz/", 0, "/foo/bar/baz/")
f("/foo", 1, "")
f("/foo/bar", 1, "/bar")
f("/foo/bar/baz", 1, "/bar/baz")
f("foo", 1, "")
f("foo/bar", 1, "/bar")
f("foo/bar/baz", 1, "/bar/baz")
f("/foo/", 1, "/")
f("/foo/bar/", 1, "/bar/")
f("/foo/bar/baz/", 1, "/bar/baz/")
f("/foo", 2, "")
f("/foo/bar", 2, "")
f("/foo/bar/baz", 2, "/baz")
f("foo", 2, "")
f("foo/bar", 2, "")
f("foo/bar/baz", 2, "/baz")
f("/foo/", 2, "")
f("/foo/bar/", 2, "/")
f("/foo/bar/baz/", 2, "/baz/")
f("/foo", 3, "")
f("/foo/bar", 3, "")
f("/foo/bar/baz", 3, "")
f("foo", 3, "")
f("foo/bar", 3, "")
f("foo/bar/baz", 3, "")
f("/foo/", 3, "")
f("/foo/bar/", 3, "")
f("/foo/bar/baz/", 3, "/")
f("/foo/", 4, "")
f("/foo/bar/", 4, "")
f("/foo/bar/baz/", 4, "")
}
func TestCreateTargetURLSuccess(t *testing.T) {
f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string,
expectedRetryStatusCodes []int, expectedDropSrcPathPrefixParts int) {
f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string, expectedRetryStatusCodes []int) {
t.Helper()
u, err := url.Parse(requestURI)
if err != nil {
t.Fatalf("cannot parse %q: %s", requestURI, err)
}
u = normalizeURL(u)
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u)
if up == nil {
t.Fatalf("cannot determie backend: %s", err)
}
bu := up.getLeastLoadedBackendURL()
target := mergeURLs(bu.url, u, dropSrcPathPrefixParts)
target := mergeURLs(bu.url, u)
bu.put()
if target.String() != expectedTarget {
t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
@@ -103,14 +32,11 @@ func TestCreateTargetURLSuccess(t *testing.T) {
if !reflect.DeepEqual(retryStatusCodes, expectedRetryStatusCodes) {
t.Fatalf("unexpected retryStatusCodes; got %d; want %d", retryStatusCodes, expectedRetryStatusCodes)
}
if dropSrcPathPrefixParts != expectedDropSrcPathPrefixParts {
t.Fatalf("unexpected dropSrcPathPrefixParts; got %d; want %d", dropSrcPathPrefixParts, expectedDropSrcPathPrefixParts)
}
}
// Simple routing with `url_prefix`
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar"),
}, "", "http://foo.bar/.", "[]", "[]", nil, 0)
}, "", "http://foo.bar/.", "[]", "[]", nil)
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar"),
HeadersConf: HeadersConf{
@@ -119,30 +45,29 @@ func TestCreateTargetURLSuccess(t *testing.T) {
Value: "aaa",
}},
},
RetryStatusCodes: []int{503, 501},
DropSrcPathPrefixParts: 2,
}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, 2)
RetryStatusCodes: []int{503, 501},
}, "/", "http://foo.bar", `[{"bb" "aaa"}]`, `[]`, []int{503, 501})
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar/federate"),
}, "/", "http://foo.bar/federate", "[]", "[]", nil, 0)
}, "/", "http://foo.bar/federate", "[]", "[]", nil)
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar"),
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, 0)
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil)
f(&UserInfo{
URLPrefix: mustParseURL("https://sss:3894/x/y"),
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, 0)
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil)
f(&UserInfo{
URLPrefix: mustParseURL("https://sss:3894/x/y"),
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, 0)
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil)
f(&UserInfo{
URLPrefix: mustParseURL("https://sss:3894/x/y"),
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, 0)
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil)
// Complex routing with `url_map`
ui := &UserInfo{
URLMaps: []URLMap{
{
SrcPaths: getSrcPaths([]string{"/vmsingle/api/v1/query"}),
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
HeadersConf: HeadersConf{
RequestHeaders: []Header{
@@ -162,8 +87,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
},
},
RetryStatusCodes: []int{503, 500, 501},
DropSrcPathPrefixParts: 1,
RetryStatusCodes: []int{503, 500, 501},
},
{
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
@@ -181,12 +105,11 @@ func TestCreateTargetURLSuccess(t *testing.T) {
Value: "y",
}},
},
RetryStatusCodes: []int{502},
DropSrcPathPrefixParts: 2,
RetryStatusCodes: []int{502},
}
f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, 1)
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, 2)
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501})
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil)
f(ui, "/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502})
// Complex routing regexp paths in `url_map`
ui = &UserInfo{
@@ -202,17 +125,17 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
URLPrefix: mustParseURL("http://default-server"),
}
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, 0)
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, 0)
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, 0)
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, 0)
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil)
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil)
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil)
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil)
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil)
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"),
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, 0)
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil)
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"),
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, 0)
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil)
}
func TestCreateTargetURLFailure(t *testing.T) {
@@ -223,7 +146,7 @@ func TestCreateTargetURLFailure(t *testing.T) {
t.Fatalf("cannot parse %q: %s", requestURI, err)
}
u = normalizeURL(u)
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u)
if up != nil {
t.Fatalf("unexpected non-empty up=%#v", up)
}
@@ -236,9 +159,6 @@ func TestCreateTargetURLFailure(t *testing.T) {
if retryStatusCodes != nil {
t.Fatalf("unexpected non-empty retryStatusCodes=%d", retryStatusCodes)
}
if dropSrcPathPrefixParts != 0 {
t.Fatalf("unexpected non-zero dropSrcPathPrefixParts=%d", dropSrcPathPrefixParts)
}
}
f(&UserInfo{}, "/foo/bar")
f(&UserInfo{

View File

@@ -316,12 +316,6 @@ Run `vmbackup -help` in order to see all the available options:
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration

View File

@@ -168,7 +168,7 @@ See the docs at https://docs.victoriametrics.com/vmbackup.html .
func newSrcFS() (*fslocal.FS, error) {
if err := snapshot.Validate(*snapshotName); err != nil {
return nil, fmt.Errorf("invalid -snapshotName=%q: %w", *snapshotName, err)
return nil, fmt.Errorf("invalid -snapshotName=%q: %s", *snapshotName, err)
}
snapshotPath := filepath.Join(*storageDataPath, "snapshots", *snapshotName)

View File

@@ -450,12 +450,6 @@ command-line flags:
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration

View File

@@ -1,4 +1,5 @@
//go:build aix || linux || solaris || zos
// +build aix linux solaris zos
package terminal

View File

@@ -1,4 +1,5 @@
//go:build darwin || freebsd || openbsd
// +build darwin freebsd openbsd
package terminal

View File

@@ -1,4 +1,5 @@
//go:build windows
// +build windows
package terminal

View File

@@ -353,12 +353,6 @@ The shortlist of configuration flags include the following:
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration

View File

@@ -211,3 +211,7 @@ func pushAggregateSeries(tss []prompbmarshal.TimeSeries) {
logger.Errorf("cannot flush aggregate series: %s", err)
}
}
func GetAggregators() map[string]*streamaggr.Aggregators {
return map[string]*streamaggr.Aggregators{"default": sasGlobal.Load()}
}

View File

@@ -4,6 +4,7 @@ import (
"embed"
"flag"
"fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"net/http"
"strings"
"sync/atomic"
@@ -326,6 +327,9 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
procutil.SelfSIGHUP()
w.WriteHeader(http.StatusNoContent)
return true
case "/stream-agg":
streamaggr.WriteHumanReadableState(w, r, vminsertCommon.GetAggregators())
return true
case "/ready":
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
errMsg := fmt.Sprintf("waiting for scrape config to init targets, configs left: %d", rdy)

View File

@@ -113,12 +113,6 @@ i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/q
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
-http.disableResponseCompression
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
-http.header.csp string
Value for 'Content-Security-Policy' header
-http.header.frameOptions string
Value for 'X-Frame-Options' header
-http.header.hsts string
Value for 'Strict-Transport-Security' header
-http.idleConnTimeout duration
Timeout for incoming idle http connections (default 1m0s)
-http.maxGracefulShutdownDuration duration

View File

@@ -81,7 +81,7 @@ var funcs = func() map[string]*funcInfo {
var m map[string]*funcInfo
if err := json.Unmarshal(funcsJSON, &m); err != nil {
// Do not use logger.Panicf, since it isn't ready yet.
panic(fmt.Errorf("cannot parse funcsJSON: %w", err))
panic(fmt.Errorf("cannot parse funcsJSON: %s", err))
}
return m
}()

View File

@@ -197,7 +197,7 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
return err
}
if err := b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal block during export: %w", err)
return fmt.Errorf("cannot unmarshal block during export: %s", err)
}
xb := exportBlockPool.Get().(*exportBlock)
xb.mn = mn
@@ -407,7 +407,7 @@ func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, cp *commonPara
return err
}
if err := b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal block during export: %w", err)
return fmt.Errorf("cannot unmarshal block during export: %s", err)
}
xb := exportBlockPool.Get().(*exportBlock)
xb.mn = mn

View File

@@ -29,12 +29,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
]
},
"stats":{
{% code
// seriesFetched is string instead of int because of historical reasons.
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
%}
"seriesFetched": "{%dl qs.SeriesFetched %}",
"executionTimeMsec": {%dl qs.ExecutionTimeMsec %}
"seriesFetched": "{%d qs.SeriesFetched %}"
}
{% code
qt.Printf("generate /api/v1/query_range response for series=%d, points=%d", seriesCount, pointsCount)

View File

@@ -60,95 +60,85 @@ func StreamQueryRangeResponse(qw422016 *qt422016.Writer, rs []netstorage.Result,
//line app/vmselect/prometheus/query_range_response.qtpl:28
}
//line app/vmselect/prometheus/query_range_response.qtpl:28
qw422016.N().S(`]},"stats":{`)
//line app/vmselect/prometheus/query_range_response.qtpl:33
// seriesFetched is string instead of int because of historical reasons.
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
qw422016.N().S(`]},"stats":{"seriesFetched": "`)
//line app/vmselect/prometheus/query_range_response.qtpl:32
qw422016.N().D(qs.SeriesFetched)
//line app/vmselect/prometheus/query_range_response.qtpl:32
qw422016.N().S(`"}`)
//line app/vmselect/prometheus/query_range_response.qtpl:35
qw422016.N().S(`"seriesFetched": "`)
//line app/vmselect/prometheus/query_range_response.qtpl:36
qw422016.N().DL(qs.SeriesFetched)
//line app/vmselect/prometheus/query_range_response.qtpl:36
qw422016.N().S(`","executionTimeMsec":`)
//line app/vmselect/prometheus/query_range_response.qtpl:37
qw422016.N().DL(qs.ExecutionTimeMsec)
//line app/vmselect/prometheus/query_range_response.qtpl:37
qw422016.N().S(`}`)
//line app/vmselect/prometheus/query_range_response.qtpl:40
qt.Printf("generate /api/v1/query_range response for series=%d, points=%d", seriesCount, pointsCount)
qtDone()
//line app/vmselect/prometheus/query_range_response.qtpl:43
//line app/vmselect/prometheus/query_range_response.qtpl:38
streamdumpQueryTrace(qw422016, qt)
//line app/vmselect/prometheus/query_range_response.qtpl:43
//line app/vmselect/prometheus/query_range_response.qtpl:38
qw422016.N().S(`}`)
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
}
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
func WriteQueryRangeResponse(qq422016 qtio422016.Writer, rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) {
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
StreamQueryRangeResponse(qw422016, rs, qt, qtDone, qs)
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
qt422016.ReleaseWriter(qw422016)
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
}
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
func QueryRangeResponse(rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) string {
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
WriteQueryRangeResponse(qb422016, rs, qt, qtDone, qs)
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
qs422016 := string(qb422016.B)
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
return qs422016
//line app/vmselect/prometheus/query_range_response.qtpl:45
//line app/vmselect/prometheus/query_range_response.qtpl:40
}
//line app/vmselect/prometheus/query_range_response.qtpl:47
//line app/vmselect/prometheus/query_range_response.qtpl:42
func streamqueryRangeLine(qw422016 *qt422016.Writer, r *netstorage.Result) {
//line app/vmselect/prometheus/query_range_response.qtpl:47
//line app/vmselect/prometheus/query_range_response.qtpl:42
qw422016.N().S(`{"metric":`)
//line app/vmselect/prometheus/query_range_response.qtpl:49
//line app/vmselect/prometheus/query_range_response.qtpl:44
streammetricNameObject(qw422016, &r.MetricName)
//line app/vmselect/prometheus/query_range_response.qtpl:49
//line app/vmselect/prometheus/query_range_response.qtpl:44
qw422016.N().S(`,"values":`)
//line app/vmselect/prometheus/query_range_response.qtpl:50
//line app/vmselect/prometheus/query_range_response.qtpl:45
streamvaluesWithTimestamps(qw422016, r.Values, r.Timestamps)
//line app/vmselect/prometheus/query_range_response.qtpl:50
//line app/vmselect/prometheus/query_range_response.qtpl:45
qw422016.N().S(`}`)
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
}
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
func writequeryRangeLine(qq422016 qtio422016.Writer, r *netstorage.Result) {
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
streamqueryRangeLine(qw422016, r)
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
qt422016.ReleaseWriter(qw422016)
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
}
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
func queryRangeLine(r *netstorage.Result) string {
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
writequeryRangeLine(qb422016, r)
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
qs422016 := string(qb422016.B)
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
return qs422016
//line app/vmselect/prometheus/query_range_response.qtpl:52
//line app/vmselect/prometheus/query_range_response.qtpl:47
}

View File

@@ -31,12 +31,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
]
},
"stats":{
{% code
// seriesFetched is string instead of int because of historical reasons.
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
%}
"seriesFetched": "{%dl qs.SeriesFetched %}",
"executionTimeMsec": {%dl qs.ExecutionTimeMsec %}
"seriesFetched": "{%d qs.SeriesFetched %}"
}
{% code
qt.Printf("generate /api/v1/query response for series=%d", seriesCount)

View File

@@ -70,54 +70,44 @@ func StreamQueryResponse(qw422016 *qt422016.Writer, rs []netstorage.Result, qt *
//line app/vmselect/prometheus/query_response.qtpl:30
}
//line app/vmselect/prometheus/query_response.qtpl:30
qw422016.N().S(`]},"stats":{`)
//line app/vmselect/prometheus/query_response.qtpl:35
// seriesFetched is string instead of int because of historical reasons.
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
qw422016.N().S(`]},"stats":{"seriesFetched": "`)
//line app/vmselect/prometheus/query_response.qtpl:34
qw422016.N().D(qs.SeriesFetched)
//line app/vmselect/prometheus/query_response.qtpl:34
qw422016.N().S(`"}`)
//line app/vmselect/prometheus/query_response.qtpl:37
qw422016.N().S(`"seriesFetched": "`)
//line app/vmselect/prometheus/query_response.qtpl:38
qw422016.N().DL(qs.SeriesFetched)
//line app/vmselect/prometheus/query_response.qtpl:38
qw422016.N().S(`","executionTimeMsec":`)
//line app/vmselect/prometheus/query_response.qtpl:39
qw422016.N().DL(qs.ExecutionTimeMsec)
//line app/vmselect/prometheus/query_response.qtpl:39
qw422016.N().S(`}`)
//line app/vmselect/prometheus/query_response.qtpl:42
qt.Printf("generate /api/v1/query response for series=%d", seriesCount)
qtDone()
//line app/vmselect/prometheus/query_response.qtpl:45
//line app/vmselect/prometheus/query_response.qtpl:40
streamdumpQueryTrace(qw422016, qt)
//line app/vmselect/prometheus/query_response.qtpl:45
//line app/vmselect/prometheus/query_response.qtpl:40
qw422016.N().S(`}`)
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
}
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
func WriteQueryResponse(qq422016 qtio422016.Writer, rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) {
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
StreamQueryResponse(qw422016, rs, qt, qtDone, qs)
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
qt422016.ReleaseWriter(qw422016)
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
}
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
func QueryResponse(rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) string {
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
WriteQueryResponse(qb422016, rs, qt, qtDone, qs)
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
qs422016 := string(qb422016.B)
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
return qs422016
//line app/vmselect/prometheus/query_response.qtpl:47
//line app/vmselect/prometheus/query_response.qtpl:42
}

View File

@@ -38,7 +38,6 @@ var aggrFuncs = map[string]aggrFunc{
"median": aggrFuncMedian,
"min": newAggrFunc(aggrFuncMin),
"mode": newAggrFunc(aggrFuncMode),
"outliers_iqr": aggrFuncOutliersIQR,
"outliers_mad": aggrFuncOutliersMAD,
"outliersk": aggrFuncOutliersK,
"quantile": aggrFuncQuantile,
@@ -945,58 +944,6 @@ func aggrFuncMAD(tss []*timeseries) []*timeseries {
return tss[:1]
}
func aggrFuncOutliersIQR(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
}
afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
// Calculate lower and upper bounds for interquartile range per each point across tss
// according to Outliers section at https://en.wikipedia.org/wiki/Interquartile_range
lower, upper := getPerPointIQRBounds(tss)
// Leave only time series with outliers above upper bound or below lower bound
tssDst := tss[:0]
for _, ts := range tss {
values := ts.Values
for i, v := range values {
if v > upper[i] || v < lower[i] {
tssDst = append(tssDst, ts)
break
}
}
}
return tssDst
}
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, true)
}
func getPerPointIQRBounds(tss []*timeseries) ([]float64, []float64) {
if len(tss) == 0 {
return nil, nil
}
pointsLen := len(tss[0].Values)
values := make([]float64, 0, len(tss))
var qs []float64
lower := make([]float64, pointsLen)
upper := make([]float64, pointsLen)
for i := 0; i < pointsLen; i++ {
values = values[:0]
for _, ts := range tss {
v := ts.Values[i]
if !math.IsNaN(v) {
values = append(values, v)
}
}
qs := quantiles(qs[:0], iqrPhis, values)
iqr := 1.5 * (qs[1] - qs[0])
lower[i] = qs[0] - iqr
upper[i] = qs[1] + iqr
}
return lower, upper
}
var iqrPhis = []float64{0.25, 0.75}
func aggrFuncOutliersMAD(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 2); err != nil {

View File

@@ -79,13 +79,6 @@ type incrementalAggrFuncContext struct {
callbacks *incrementalAggrFuncCallbacks
}
func (iafc *incrementalAggrFuncContext) resetState() {
byWorkerID := iafc.byWorkerID
for i := range byWorkerID {
byWorkerID[i].m = make(map[string]*incrementalAggrContext, len(byWorkerID[i].m))
}
}
func newIncrementalAggrFuncContext(ae *metricsql.AggrFuncExpr, callbacks *incrementalAggrFuncCallbacks) *incrementalAggrFuncContext {
return &incrementalAggrFuncContext{
ae: ae,
@@ -161,8 +154,6 @@ func (iafc *incrementalAggrFuncContext) finalizeTimeseries() []*timeseries {
finalizeAggrFunc(iac)
tss = append(tss, iac.ts)
}
// reset iafc state, so it could be re-used
iafc.resetState()
return tss
}

View File

@@ -9,7 +9,6 @@ import (
"strings"
"sync"
"sync/atomic"
"time"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
@@ -17,13 +16,11 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/metricsql"
)
@@ -36,14 +33,11 @@ var (
"Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated "+
"as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests . "+
"See also -search.logQueryMemoryUsage")
logQueryMemoryUsage = flagutil.NewBytes("search.logQueryMemoryUsage", 0, "Log query and increment vm_memory_intensive_queries_total metric each time "+
"the query requires more memory than specified by this flag. "+
logQueryMemoryUsage = flagutil.NewBytes("search.logQueryMemoryUsage", 0, "Log queries, which require more memory than specified by this flag. "+
"This may help detecting and optimizing heavy queries. Query logging is disabled by default. "+
"See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery")
noStaleMarkers = flag.Bool("search.noStaleMarkers", false, "Set this flag to true if the database doesn't contain Prometheus stale markers, "+
"so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets")
minWindowForInstantRollupOptimization = flagutil.NewDuration("search.minWindowForInstantRollupOptimization", "3h", "Enable cache-based optimization for repeated queries "+
"to /api/v1/query (aka instant queries), which contain rollup functions with lookbehind window exceeding the given value")
)
// The minimum number of points per timeseries for enabling time rounding.
@@ -66,7 +60,7 @@ func ValidateMaxPointsPerSeries(start, end, step int64, maxPoints int) error {
// AdjustStartEnd adjusts start and end values, so response caching may be enabled.
//
// See EvalConfig.mayCache() for details.
// See EvalConfig.mayCache for details.
func AdjustStartEnd(start, end, step int64) (int64, int64) {
if *disableCache {
// Do not adjust start and end values when cache is disabled.
@@ -140,7 +134,8 @@ type EvalConfig struct {
// QueryStats contains various stats for the currently executed query.
//
// The caller must initialize QueryStats, otherwise it isn't collected.
// The caller must initialize the QueryStats if it needs the stats.
// Otherwise the stats isn't collected.
QueryStats *QueryStats
timestamps []int64
@@ -170,24 +165,13 @@ func copyEvalConfig(src *EvalConfig) *EvalConfig {
// QueryStats contains various stats for the query.
type QueryStats struct {
// SeriesFetched contains the number of series fetched from storage during the query evaluation.
SeriesFetched int64
// ExecutionTimeMsec contains the number of milliseconds the query took to execute.
ExecutionTimeMsec int64
SeriesFetched int
}
func (qs *QueryStats) addSeriesFetched(n int) {
if qs == nil {
return
if qs != nil {
qs.SeriesFetched += n
}
atomic.AddInt64(&qs.SeriesFetched, int64(n))
}
func (qs *QueryStats) addExecutionTimeMsec(startTime time.Time) {
if qs == nil {
return
}
d := time.Since(startTime).Milliseconds()
atomic.AddInt64(&qs.ExecutionTimeMsec, d)
}
func (ec *EvalConfig) validate() {
@@ -206,11 +190,6 @@ func (ec *EvalConfig) mayCache() bool {
if !ec.MayCache {
return false
}
if ec.Start == ec.End {
// There is no need in aligning start and end to step for instant query
// in order to cache its results.
return true
}
if ec.Start%ec.Step != 0 {
return false
}
@@ -260,7 +239,7 @@ func getTimestamps(start, end, step int64, maxPointsPerSeries int) []int64 {
func evalExpr(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
if qt.Enabled() {
query := string(e.AppendString(nil))
query = stringsutil.LimitStringLen(query, 300)
query = bytesutil.LimitStringLen(query, 300)
mayCache := ec.mayCache()
qt = qt.NewChild("eval: query=%s, timeRange=%s, step=%d, mayCache=%v", query, ec.timeRangeString(), ec.Step, mayCache)
}
@@ -1059,595 +1038,46 @@ func removeNanValues(dstValues []float64, dstTimestamps []int64, values []float6
return dstValues, dstTimestamps
}
// evalInstantRollup evaluates instant rollup where ec.Start == ec.End.
func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc,
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, window int64) ([]*timeseries, error) {
if ec.Start != ec.End {
logger.Panicf("BUG: evalInstantRollup cannot be called on non-empty time range; got %s", ec.timeRangeString())
}
timestamp := ec.Start
if qt.Enabled() {
qt = qt.NewChild("instant rollup %s; time=%s, window=%d", expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
defer qt.Done()
}
evalAt := func(qt *querytracer.Tracer, timestamp, window int64) ([]*timeseries, error) {
ecCopy := copyEvalConfig(ec)
ecCopy.Start = timestamp
ecCopy.End = timestamp
pointsPerSeries := int64(1)
return evalRollupFuncNoCache(qt, ecCopy, funcName, rf, expr, me, iafc, window, pointsPerSeries)
}
tooBigOffset := func(offset int64) bool {
maxOffset := window / 2
if maxOffset > 1800*1000 {
maxOffset = 1800 * 1000
}
return offset >= maxOffset
}
deleteCachedSeries := func(qt *querytracer.Tracer) {
rollupResultCacheV.DeleteInstantValues(qt, expr, window, ec.Step, ec.EnforcedTagFilterss)
}
getCachedSeries := func(qt *querytracer.Tracer) ([]*timeseries, int64, error) {
again:
offset := int64(0)
tssCached := rollupResultCacheV.GetInstantValues(qt, expr, window, ec.Step, ec.EnforcedTagFilterss)
ec.QueryStats.addSeriesFetched(len(tssCached))
if len(tssCached) == 0 {
// Cache miss. Re-populate the missing data.
start := int64(fasttime.UnixTimestamp()*1000) - cacheTimestampOffset.Milliseconds()
offset = timestamp - start
if offset < 0 {
start = timestamp
offset = 0
}
if tooBigOffset(offset) {
qt.Printf("cannot apply instant rollup optimization because the -search.cacheTimestampOffset=%s is too big "+
"for the requested time=%s and window=%d", cacheTimestampOffset, storage.TimestampToHumanReadableFormat(timestamp), window)
tss, err := evalAt(qt, timestamp, window)
return tss, 0, err
}
qt.Printf("calculating the rollup at time=%s, because it is missing in the cache", storage.TimestampToHumanReadableFormat(start))
tss, err := evalAt(qt, start, window)
if err != nil {
return nil, 0, err
}
if hasDuplicateSeries(tss) {
qt.Printf("cannot apply instant rollup optimization because the result contains duplicate series")
tss, err := evalAt(qt, timestamp, window)
return tss, 0, err
}
rollupResultCacheV.PutInstantValues(qt, expr, window, ec.Step, ec.EnforcedTagFilterss, tss)
return tss, offset, nil
}
// Cache hit. Verify whether it is OK to use the cached data.
offset = timestamp - tssCached[0].Timestamps[0]
if offset < 0 {
qt.Printf("do not apply instant rollup optimization because the cached values have bigger timestamp=%s than the requested one=%s",
storage.TimestampToHumanReadableFormat(tssCached[0].Timestamps[0]), storage.TimestampToHumanReadableFormat(timestamp))
// Delete the outdated cached values, so the cache could be re-populated with newer values.
deleteCachedSeries(qt)
goto again
}
if tooBigOffset(offset) {
qt.Printf("do not apply instant rollup optimization because the offset=%d between the requested timestamp "+
"and the cached values is too big comparing to window=%d", offset, window)
// Delete the outdated cached values, so the cache could be re-populated with newer values.
deleteCachedSeries(qt)
goto again
}
return tssCached, offset, nil
}
if !ec.mayCache() {
qt.Printf("do not apply instant rollup optimization because of disabled cache")
return evalAt(qt, timestamp, window)
}
if window < minWindowForInstantRollupOptimization.Milliseconds() {
qt.Printf("do not apply instant rollup optimization because of too small window=%d; must be equal or bigger than %d",
window, minWindowForInstantRollupOptimization.Milliseconds())
return evalAt(qt, timestamp, window)
}
switch funcName {
case "avg_over_time":
if iafc != nil {
qt.Printf("do not apply instant rollup optimization for incremental aggregate %s()", iafc.ae.Name)
return evalAt(qt, timestamp, window)
}
qt.Printf("optimized calculation for instant rollup avg_over_time(m[d]) as (sum_over_time(m[d]) / count_over_time(m[d]))")
fe := expr.(*metricsql.FuncExpr)
feSum := *fe
feSum.Name = "sum_over_time"
feCount := *fe
feCount.Name = "count_over_time"
be := &metricsql.BinaryOpExpr{
Op: "/",
KeepMetricNames: fe.KeepMetricNames,
Left: &feSum,
Right: &feCount,
}
return evalExpr(qt, ec, be)
case "rate":
if iafc != nil {
if strings.ToLower(iafc.ae.Name) != "sum" {
qt.Printf("do not apply instant rollup optimization for incremental aggregate %s()", iafc.ae.Name)
return evalAt(qt, timestamp, window)
}
qt.Printf("optimized calculation for sum(rate(m[d])) as (sum(increase(m[d])) / d)")
afe := expr.(*metricsql.AggrFuncExpr)
fe := afe.Args[0].(*metricsql.FuncExpr)
feIncrease := *fe
feIncrease.Name = "increase"
re := fe.Args[0].(*metricsql.RollupExpr)
d := re.Window.Duration(ec.Step)
if d == 0 {
d = ec.Step
}
afeIncrease := *afe
afeIncrease.Args = []metricsql.Expr{&feIncrease}
be := &metricsql.BinaryOpExpr{
Op: "/",
KeepMetricNames: true,
Left: &afeIncrease,
Right: &metricsql.NumberExpr{
N: float64(d) / 1000,
},
}
return evalExpr(qt, ec, be)
}
qt.Printf("optimized calculation for instant rollup rate(m[d]) as (increase(m[d]) / d)")
fe := expr.(*metricsql.FuncExpr)
feIncrease := *fe
feIncrease.Name = "increase"
re := fe.Args[0].(*metricsql.RollupExpr)
d := re.Window.Duration(ec.Step)
if d == 0 {
d = ec.Step
}
be := &metricsql.BinaryOpExpr{
Op: "/",
KeepMetricNames: fe.KeepMetricNames,
Left: &feIncrease,
Right: &metricsql.NumberExpr{
N: float64(d) / 1000,
},
}
return evalExpr(qt, ec, be)
case "max_over_time":
if iafc != nil {
if strings.ToLower(iafc.ae.Name) != "max" {
qt.Printf("do not apply instant rollup optimization for non-max incremental aggregate %s()", iafc.ae.Name)
return evalAt(qt, timestamp, window)
}
}
// Calculate
//
// max_over_time(m[window] @ timestamp)
//
// as the maximum of
//
// - max_over_time(m[window] @ (timestamp-offset))
// - max_over_time(m[offset] @ timestamp)
//
// if max_over_time(m[offset] @ (timestamp-window)) < max_over_time(m[window] @ (timestamp-offset))
// otherwise do not apply the optimization
//
// where
//
// - max_over_time(m[window] @ (timestamp-offset)) is obtained from cache
// - max_over_time(m[offset] @ timestamp) and max_over_time(m[offset] @ (timestamp-window)) are calculated from the storage
// These rollups are calculated faster than max_over_time(m[window]) because offset is smaller than window.
qtChild := qt.NewChild("optimized calculation for instant rollup %s at time=%s with lookbehind window=%d",
expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
defer qtChild.Done()
tssCached, offset, err := getCachedSeries(qtChild)
if err != nil {
return nil, err
}
if offset == 0 {
return tssCached, nil
}
// Calculate max_over_time(m[offset] @ timestamp)
tssStart, err := evalAt(qtChild, timestamp, offset)
if err != nil {
return nil, err
}
if hasDuplicateSeries(tssStart) {
qtChild.Printf("cannot apply instant rollup optimization, since tssStart contains duplicate series")
return evalAt(qtChild, timestamp, window)
}
// Calculate max_over_time(m[offset] @ (timestamp - window))
tssEnd, err := evalAt(qtChild, timestamp-window, offset)
if err != nil {
return nil, err
}
if hasDuplicateSeries(tssEnd) {
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains duplicate series")
return evalAt(qtChild, timestamp, window)
}
// Calculate the result
tss, ok := getMaxInstantValues(qtChild, tssCached, tssStart, tssEnd)
if !ok {
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains bigger values than tssCached")
deleteCachedSeries(qtChild)
return evalAt(qt, timestamp, window)
}
return tss, nil
case "min_over_time":
if iafc != nil {
if strings.ToLower(iafc.ae.Name) != "min" {
qt.Printf("do not apply instant rollup optimization for non-min incremental aggregate %s()", iafc.ae.Name)
return evalAt(qt, timestamp, window)
}
}
// Calculate
//
// min_over_time(m[window] @ timestamp)
//
// as the minimum of
//
// - min_over_time(m[window] @ (timestamp-offset))
// - min_over_time(m[offset] @ timestamp)
//
// if min_over_time(m[offset] @ (timestamp-window)) > min_over_time(m[window] @ (timestamp-offset))
// otherwise do not apply the optimization
//
// where
//
// - min_over_time(m[window] @ (timestamp-offset)) is obtained from cache
// - min_over_time(m[offset] @ timestamp) and min_over_time(m[offset] @ (timestamp-window)) are calculated from the storage
// These rollups are calculated faster than min_over_time(m[window]) because offset is smaller than window.
qtChild := qt.NewChild("optimized calculation for instant rollup %s at time=%s with lookbehind window=%d",
expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
defer qtChild.Done()
tssCached, offset, err := getCachedSeries(qtChild)
if err != nil {
return nil, err
}
if offset == 0 {
return tssCached, nil
}
// Calculate min_over_time(m[offset] @ timestamp)
tssStart, err := evalAt(qtChild, timestamp, offset)
if err != nil {
return nil, err
}
if hasDuplicateSeries(tssStart) {
qtChild.Printf("cannot apply instant rollup optimization, since tssStart contains duplicate series")
return evalAt(qtChild, timestamp, window)
}
// Calculate min_over_time(m[offset] @ (timestamp - window))
tssEnd, err := evalAt(qtChild, timestamp-window, offset)
if err != nil {
return nil, err
}
if hasDuplicateSeries(tssEnd) {
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains duplicate series")
return evalAt(qtChild, timestamp, window)
}
// Calculate the result
tss, ok := getMinInstantValues(qtChild, tssCached, tssStart, tssEnd)
if !ok {
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains smaller values than tssCached")
deleteCachedSeries(qtChild)
return evalAt(qt, timestamp, window)
}
return tss, nil
case
"count_eq_over_time",
"count_gt_over_time",
"count_le_over_time",
"count_ne_over_time",
"count_over_time",
"increase",
"increase_pure",
"sum_over_time":
if iafc != nil && strings.ToLower(iafc.ae.Name) != "sum" {
qt.Printf("do not apply instant rollup optimization for non-sum incremental aggregate %s()", iafc.ae.Name)
return evalAt(qt, timestamp, window)
}
// Calculate
//
// rf(m[window] @ timestamp)
//
// as
//
// rf(m[window] @ (timestamp-offset)) + rf(m[offset] @ timestamp) - rf(m[offset] @ (timestamp-window))
//
// where
//
// - rf is count_over_time, sum_over_time or increase
// - rf(m[window] @ (timestamp-offset)) is obtained from cache
// - rf(m[offset] @ timestamp) and rf(m[offset] @ (timestamp-window)) are calculated from the storage
// These rollups are calculated faster than rf(m[window]) because offset is smaller than window.
qtChild := qt.NewChild("optimized calculation for instant rollup %s at time=%s with lookbehind window=%d",
expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
defer qtChild.Done()
tssCached, offset, err := getCachedSeries(qtChild)
if err != nil {
return nil, err
}
if offset == 0 {
return tssCached, nil
}
// Calculate rf(m[offset] @ timestamp)
tssStart, err := evalAt(qtChild, timestamp, offset)
if err != nil {
return nil, err
}
if hasDuplicateSeries(tssStart) {
qtChild.Printf("cannot apply instant rollup optimization, since tssStart contains duplicate series")
return evalAt(qtChild, timestamp, window)
}
// Calculate rf(m[offset] @ (timestamp - window))
tssEnd, err := evalAt(qtChild, timestamp-window, offset)
if err != nil {
return nil, err
}
if hasDuplicateSeries(tssEnd) {
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains duplicate series")
return evalAt(qtChild, timestamp, window)
}
// Calculate the result
tss := getSumInstantValues(qtChild, tssCached, tssStart, tssEnd)
return tss, nil
default:
qt.Printf("instant rollup optimization isn't implemented for %s()", funcName)
return evalAt(qt, timestamp, window)
}
}
func hasDuplicateSeries(tss []*timeseries) bool {
if len(tss) <= 1 {
return false
}
m := make(map[string]struct{}, len(tss))
bb := bbPool.Get()
defer bbPool.Put(bb)
for _, ts := range tss {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
if _, ok := m[string(bb.B)]; ok {
return true
}
m[string(bb.B)] = struct{}{}
}
return false
}
func getMinInstantValues(qt *querytracer.Tracer, tssCached, tssStart, tssEnd []*timeseries) ([]*timeseries, bool) {
qt = qt.NewChild("calculate the minimum for instant values across series; cached=%d, start=%d, end=%d", len(tssCached), len(tssStart), len(tssEnd))
defer qt.Done()
getMin := func(a, b float64) float64 {
if a < b {
return a
}
return b
}
tss, ok := getMinMaxInstantValues(tssCached, tssStart, tssEnd, getMin)
qt.Printf("resulting series=%d; ok=%v", len(tss), ok)
return tss, ok
}
func getMaxInstantValues(qt *querytracer.Tracer, tssCached, tssStart, tssEnd []*timeseries) ([]*timeseries, bool) {
qt = qt.NewChild("calculate the maximum for instant values across series; cached=%d, start=%d, end=%d", len(tssCached), len(tssStart), len(tssEnd))
defer qt.Done()
getMax := func(a, b float64) float64 {
if a > b {
return a
}
return b
}
tss, ok := getMinMaxInstantValues(tssCached, tssStart, tssEnd, getMax)
qt.Printf("resulting series=%d", len(tss))
return tss, ok
}
func getMinMaxInstantValues(tssCached, tssStart, tssEnd []*timeseries, f func(a, b float64) float64) ([]*timeseries, bool) {
assertInstantValues(tssCached)
assertInstantValues(tssStart)
assertInstantValues(tssEnd)
bb := bbPool.Get()
defer bbPool.Put(bb)
m := make(map[string]*timeseries, len(tssCached))
for _, ts := range tssCached {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
if _, ok := m[string(bb.B)]; ok {
logger.Panicf("BUG: duplicate series found: %s", &ts.MetricName)
}
m[string(bb.B)] = ts
}
mStart := make(map[string]*timeseries, len(tssStart))
for _, ts := range tssStart {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
if _, ok := mStart[string(bb.B)]; ok {
logger.Panicf("BUG: duplicate series found: %s", &ts.MetricName)
}
mStart[string(bb.B)] = ts
tsCached := m[string(bb.B)]
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) {
if !math.IsNaN(ts.Values[0]) {
tsCached.Values[0] = f(ts.Values[0], tsCached.Values[0])
}
} else {
m[string(bb.B)] = ts
}
}
for _, ts := range tssEnd {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
tsCached := m[string(bb.B)]
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) && !math.IsNaN(ts.Values[0]) {
if ts.Values[0] == f(ts.Values[0], tsCached.Values[0]) {
tsStart := mStart[string(bb.B)]
if tsStart == nil || math.IsNaN(tsStart.Values[0]) || tsStart.Values[0] != f(ts.Values[0], tsStart.Values[0]) {
return nil, false
}
}
}
}
rvs := make([]*timeseries, 0, len(m))
for _, ts := range m {
rvs = append(rvs, ts)
}
return rvs, true
}
// getSumInstantValues calculates tssCached + tssStart - tssEnd
func getSumInstantValues(qt *querytracer.Tracer, tssCached, tssStart, tssEnd []*timeseries) []*timeseries {
qt = qt.NewChild("calculate the sum for instant values across series; cached=%d, start=%d, end=%d", len(tssCached), len(tssStart), len(tssEnd))
defer qt.Done()
assertInstantValues(tssCached)
assertInstantValues(tssStart)
assertInstantValues(tssEnd)
m := make(map[string]*timeseries, len(tssCached))
bb := bbPool.Get()
defer bbPool.Put(bb)
for _, ts := range tssCached {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
if _, ok := m[string(bb.B)]; ok {
logger.Panicf("BUG: duplicate series found: %s", &ts.MetricName)
}
m[string(bb.B)] = ts
}
for _, ts := range tssStart {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
tsCached := m[string(bb.B)]
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) {
if !math.IsNaN(ts.Values[0]) {
tsCached.Values[0] += ts.Values[0]
}
} else {
m[string(bb.B)] = ts
}
}
for _, ts := range tssEnd {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
tsCached := m[string(bb.B)]
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) {
if !math.IsNaN(ts.Values[0]) {
tsCached.Values[0] -= ts.Values[0]
}
}
}
rvs := make([]*timeseries, 0, len(m))
for _, ts := range m {
rvs = append(rvs, ts)
}
qt.Printf("resulting series=%d", len(rvs))
return rvs
}
func assertInstantValues(tss []*timeseries) {
for _, ts := range tss {
if len(ts.Values) != 1 {
logger.Panicf("BUG: instant series must contain a single value; got %d values", len(ts.Values))
}
if len(ts.Timestamps) != 1 {
logger.Panicf("BUG: instant series must contain a single timestamp; got %d timestamps", len(ts.Timestamps))
}
}
}
var (
rollupResultCacheFullHits = metrics.NewCounter(`vm_rollup_result_cache_full_hits_total`)
rollupResultCachePartialHits = metrics.NewCounter(`vm_rollup_result_cache_partial_hits_total`)
rollupResultCacheMiss = metrics.NewCounter(`vm_rollup_result_cache_miss_total`)
memoryIntensiveQueries = metrics.NewCounter(`vm_memory_intensive_queries_total`)
)
func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc,
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowExpr *metricsql.DurationExpr) ([]*timeseries, error) {
var rollupMemorySize int64
window, err := windowExpr.NonNegativeDuration(ec.Step)
if err != nil {
return nil, fmt.Errorf("cannot parse lookbehind window in square brackets at %s: %w", expr.AppendString(nil), err)
}
if qt.Enabled() {
qt = qt.NewChild("rollup %s(): timeRange=%s, step=%d, window=%d", funcName, ec.timeRangeString(), ec.Step, window)
defer func() {
qt.Donef("neededMemoryBytes=%d", rollupMemorySize)
}()
}
if me.IsEmpty() {
return evalNumber(ec, nan), nil
}
if ec.Start == ec.End {
rvs, err := evalInstantRollup(qt, ec, funcName, rf, expr, me, iafc, window)
if err != nil {
err = &UserReadableError{
Err: err,
}
return nil, err
}
return rvs, nil
}
// Search for partial results in cache.
tssCached, start := rollupResultCacheV.GetSeries(qt, ec, expr, window)
ec.QueryStats.addSeriesFetched(len(tssCached))
tssCached, start := rollupResultCacheV.Get(qt, ec, expr, window)
if start > ec.End {
qt.Printf("the result is fully cached")
// The result is fully cached.
rollupResultCacheFullHits.Inc()
return tssCached, nil
}
if start > ec.Start {
qt.Printf("partial cache hit")
rollupResultCachePartialHits.Inc()
} else {
qt.Printf("cache miss")
rollupResultCacheMiss.Inc()
}
ecCopy := copyEvalConfig(ec)
ecCopy.Start = start
pointsPerSeries := 1 + (ec.End-ec.Start)/ec.Step
tss, err := evalRollupFuncNoCache(qt, ecCopy, funcName, rf, expr, me, iafc, window, pointsPerSeries)
if err != nil {
err = &UserReadableError{
Err: err,
}
return nil, err
}
rvs, err := mergeTimeseries(qt, tssCached, tss, start, ec)
if err != nil {
return nil, fmt.Errorf("cannot merge series: %w", err)
}
if tss != nil {
rollupResultCacheV.PutSeries(qt, ec, expr, window, tss)
}
return rvs, nil
}
// evalRollupFuncNoCache calculates the given rf with the given lookbehind window.
//
// pointsPerSeries is used only for estimating the needed memory for query processing
func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc,
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, window, pointsPerSeries int64) ([]*timeseries, error) {
if qt.Enabled() {
qt = qt.NewChild("rollup %s: timeRange=%s, step=%d, window=%d", expr.AppendString(nil), ec.timeRangeString(), ec.Step, window)
defer qt.Done()
}
if window < 0 {
return nil, nil
}
// Obtain rollup configs before fetching data from db, so type errors could be caught earlier.
sharedTimestamps := getTimestamps(ec.Start, ec.End, ec.Step, ec.MaxPointsPerSeries)
preFunc, rcs, err := getRollupConfigs(funcName, rf, expr, ec.Start, ec.End, ec.Step, ec.MaxPointsPerSeries, window, ec.LookbackDelta, sharedTimestamps)
// Obtain rollup configs before fetching data from db,
// so type errors can be caught earlier.
sharedTimestamps := getTimestamps(start, ec.End, ec.Step, ec.MaxPointsPerSeries)
preFunc, rcs, err := getRollupConfigs(funcName, rf, expr, start, ec.End, ec.Step, ec.MaxPointsPerSeries, window, ec.LookbackDelta, sharedTimestamps)
if err != nil {
return nil, err
}
@@ -1655,10 +1085,7 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
// Fetch the remaining part of the result.
tfss := searchutils.ToTagFilterss(me.LabelFilterss)
tfss = searchutils.JoinTagFilterss(tfss, ec.EnforcedTagFilterss)
minTimestamp := ec.Start
if needSilenceIntervalForRollupFunc(funcName) {
minTimestamp -= maxSilenceInterval
}
minTimestamp := start - maxSilenceInterval
if window > ec.Step {
minTimestamp -= window
} else {
@@ -1670,16 +1097,21 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
sq := storage.NewSearchQuery(minTimestamp, ec.End, tfss, ec.MaxSeries)
rss, err := netstorage.ProcessSearchQuery(qt, sq, ec.Deadline)
if err != nil {
return nil, err
return nil, &UserReadableError{
Err: err,
}
}
rssLen := rss.Len()
if rssLen == 0 {
rss.Cancel()
return nil, nil
tss := mergeTimeseries(tssCached, nil, start, ec)
return tss, nil
}
ec.QueryStats.addSeriesFetched(rssLen)
// Verify timeseries fit available memory during rollup calculations.
// Verify timeseries fit available memory after the rollup.
// Take into account points from tssCached.
pointsPerTimeseries := 1 + (ec.End-ec.Start)/ec.Step
timeseriesLen := rssLen
if iafc != nil {
// Incremental aggregates require holding only GOMAXPROCS timeseries in memory.
@@ -1699,10 +1131,9 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
timeseriesLen = rssLen
}
}
rollupPoints := mulNoOverflow(pointsPerSeries, int64(timeseriesLen*len(rcs)))
rollupMemorySize := sumNoOverflow(mulNoOverflow(int64(rssLen), 1000), mulNoOverflow(rollupPoints, 16))
rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs)))
rollupMemorySize = sumNoOverflow(mulNoOverflow(int64(rssLen), 1000), mulNoOverflow(rollupPoints, 16))
if maxMemory := int64(logQueryMemoryUsage.N); maxMemory > 0 && rollupMemorySize > maxMemory {
memoryIntensiveQueries.Inc()
requestURI := ec.GetRequestURI()
logger.Warnf("remoteAddr=%s, requestURI=%s: the %s requires %d bytes of memory for processing; "+
"logging this query, since it exceeds the -search.logQueryMemoryUsage=%d; "+
@@ -1711,33 +1142,44 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
}
if maxMemory := int64(maxMemoryPerQuery.N); maxMemory > 0 && rollupMemorySize > maxMemory {
rss.Cancel()
err := fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series "+
"according to -search.maxMemoryPerQuery=%d; requested memory: %d bytes; "+
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
"increasing -search.maxMemoryPerQuery",
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerSeries, maxMemory, rollupMemorySize, float64(ec.Step)/1e3)
return nil, err
return nil, &UserReadableError{
Err: fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series "+
"according to -search.maxMemoryPerQuery=%d; requested memory: %d bytes; "+
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
"increasing -search.maxMemoryPerQuery",
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, maxMemory, rollupMemorySize, float64(ec.Step)/1e3),
}
}
rml := getRollupMemoryLimiter()
if !rml.Get(uint64(rollupMemorySize)) {
rss.Cancel()
err := fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series; "+
"total available memory for concurrent requests: %d bytes; requested memory: %d bytes; "+
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
"switching to node with more RAM; increasing -memory.allowedPercent",
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerSeries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3)
return nil, err
return nil, &UserReadableError{
Err: fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series; "+
"total available memory for concurrent requests: %d bytes; "+
"requested memory: %d bytes; "+
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
"switching to node with more RAM; increasing -memory.allowedPercent",
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3),
}
}
defer rml.Put(uint64(rollupMemorySize))
qt.Printf("the rollup evaluation needs an estimated %d bytes of RAM for %d series and %d points per series (summary %d points)",
rollupMemorySize, timeseriesLen, pointsPerSeries, rollupPoints)
// Evaluate rollup
keepMetricNames := getKeepMetricNames(expr)
var tss []*timeseries
if iafc != nil {
return evalRollupWithIncrementalAggregate(qt, funcName, keepMetricNames, iafc, rss, rcs, preFunc, sharedTimestamps)
tss, err = evalRollupWithIncrementalAggregate(qt, funcName, keepMetricNames, iafc, rss, rcs, preFunc, sharedTimestamps)
} else {
tss, err = evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps)
}
return evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps)
if err != nil {
return nil, &UserReadableError{
Err: err,
}
}
tss = mergeTimeseries(tssCached, tss, start, ec)
rollupResultCacheV.Put(qt, ec, expr, window, tss)
return tss, nil
}
var (
@@ -1752,53 +1194,6 @@ func getRollupMemoryLimiter() *memoryLimiter {
return &rollupMemoryLimiter
}
func needSilenceIntervalForRollupFunc(funcName string) bool {
// All rollup the functions, which do not rely on the previous sample
// before the lookbehind window (aka prevValue), do not need silence interval.
switch strings.ToLower(funcName) {
case
"absent_over_time",
"avg_over_time",
"count_eq_over_time",
"count_gt_over_time",
"count_le_over_time",
"count_ne_over_time",
"count_over_time",
"default_rollup",
"first_over_time",
"histogram_over_time",
"hoeffding_bound_lower",
"hoeffding_bound_upper",
"last_over_time",
"mad_over_time",
"max_over_time",
"median_over_time",
"min_over_time",
"predict_linear",
"present_over_time",
"quantile_over_time",
"quantiles_over_time",
"range_over_time",
"share_gt_over_time",
"share_le_over_time",
"share_eq_over_time",
"stale_samples_over_time",
"stddev_over_time",
"stdvar_over_time",
"sum_over_time",
"tfirst_over_time",
"timestamp",
"timestamp_with_name",
"tlast_over_time",
"tmax_over_time",
"tmin_over_time",
"zscore_over_time":
return false
default:
return true
}
}
func evalRollupWithIncrementalAggregate(qt *querytracer.Tracer, funcName string, keepMetricNames bool,
iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64) ([]*timeseries, error) {

View File

@@ -48,10 +48,7 @@ func (ure *UserReadableError) Error() string {
func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) {
if querystats.Enabled() {
startTime := time.Now()
defer func() {
querystats.RegisterQuery(q, ec.End-ec.Start, startTime)
ec.QueryStats.addExecutionTimeMsec(startTime)
}()
defer querystats.RegisterQuery(q, ec.End-ec.Start, startTime)
}
ec.validate()

View File

@@ -6910,30 +6910,6 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`outliers_iqr()`, func(t *testing.T) {
t.Parallel()
q := `sort(outliers_iqr((
alias(time(), "m1"),
alias(time()*1.5, "m2"),
alias(time()*10, "m3"),
alias(time()*1.2, "m4"),
alias(time()*0.1, "m5"),
)))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{100, 120, 140, 160, 180, 200},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("m5")
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10000, 12000, 14000, 16000, 18000, 20000},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("m3")
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`outliers_mad(1)`, func(t *testing.T) {
t.Parallel()
q := `outliers_mad(1, (

View File

@@ -0,0 +1,12 @@
package promql
import "testing"
func TestIsMetricSelectorWithRollup(t *testing.T) {
childQuery, _, _ := IsMetricSelectorWithRollup("metric_name{label='value'}[365d] or vector(0)")
if childQuery != "" {
t.Fatalf("AAAAA: %v", childQuery)
} else {
t.Fatalf("BBBBB")
}
}

View File

@@ -62,7 +62,6 @@ var rollupFuncs = map[string]newRollupFunc{
"median_over_time": newRollupFuncOneArg(rollupMedian),
"min_over_time": newRollupFuncOneArg(rollupMin),
"mode_over_time": newRollupFuncOneArg(rollupModeOverTime),
"outlier_iqr_over_time": newRollupFuncOneArg(rollupOutlierIQR),
"predict_linear": newRollupPredictLinear,
"present_over_time": newRollupFuncOneArg(rollupPresent),
"quantile_over_time": newRollupQuantile,
@@ -123,7 +122,6 @@ var rollupAggrFuncs = map[string]rollupFunc{
"increases_over_time": rollupIncreases,
"integrate": rollupIntegrate,
"irate": rollupIderiv,
"iqr_over_time": rollupOutlierIQR,
"lag": rollupLag,
"last_over_time": rollupLast,
"lifetime": rollupLifetime,
@@ -227,7 +225,6 @@ var rollupFuncsKeepMetricName = map[string]bool{
"hoeffding_bound_lower": true,
"hoeffding_bound_upper": true,
"holt_winters": true,
"iqr_over_time": true,
"last_over_time": true,
"max_over_time": true,
"median_over_time": true,
@@ -955,11 +952,11 @@ func newRollupHoltWinters(args []interface{}) (rollupFunc, error) {
return rfa.prevValue
}
sf := sfs[rfa.idx]
if sf < 0 || sf > 1 {
if sf <= 0 || sf >= 1 {
return nan
}
tf := tfs[rfa.idx]
if tf < 0 || tf > 1 {
if tf <= 0 || tf >= 1 {
return nan
}
@@ -1290,29 +1287,6 @@ func newRollupQuantiles(args []interface{}) (rollupFunc, error) {
return rf, nil
}
func rollupOutlierIQR(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
// See Outliers section at https://en.wikipedia.org/wiki/Interquartile_range
values := rfa.values
if len(values) < 2 {
return nan
}
qs := getFloat64s()
qs.A = quantiles(qs.A[:0], iqrPhis, values)
q25 := qs.A[0]
q75 := qs.A[1]
iqr := 1.5 * (q75 - q25)
putFloat64s(qs)
v := values[len(values)-1]
if v > q75+iqr || v < q25-iqr {
return v
}
return nan
}
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err

View File

@@ -17,7 +17,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
"github.com/VictoriaMetrics/fastcache"
"github.com/VictoriaMetrics/metrics"
@@ -203,74 +202,11 @@ func ResetRollupResultCache() {
logger.Infof("rollupResult cache has been cleared")
}
func (rrc *rollupResultCache) GetInstantValues(qt *querytracer.Tracer, expr metricsql.Expr, window, step int64, etfss [][]storage.TagFilter) []*timeseries {
func (rrc *rollupResultCache) Get(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) {
if qt.Enabled() {
query := string(expr.AppendString(nil))
query = stringsutil.LimitStringLen(query, 300)
qt = qt.NewChild("rollup cache get instant values: query=%s, window=%d, step=%d", query, window, step)
defer qt.Done()
}
// Obtain instant values from the cache
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = marshalRollupResultCacheKeyForInstantValues(bb.B[:0], expr, window, step, etfss)
tss, ok := rrc.getSeriesFromCache(qt, bb.B)
if !ok || len(tss) == 0 {
return nil
}
assertInstantValues(tss)
qt.Printf("found %d series for time=%s", len(tss), storage.TimestampToHumanReadableFormat(tss[0].Timestamps[0]))
return tss
}
func (rrc *rollupResultCache) PutInstantValues(qt *querytracer.Tracer, expr metricsql.Expr, window, step int64, etfss [][]storage.TagFilter, tss []*timeseries) {
if qt.Enabled() {
query := string(expr.AppendString(nil))
query = stringsutil.LimitStringLen(query, 300)
startStr := ""
if len(tss) > 0 {
startStr = storage.TimestampToHumanReadableFormat(tss[0].Timestamps[0])
}
qt = qt.NewChild("rollup cache put instant values: query=%s, window=%d, step=%d, series=%d, time=%s", query, window, step, len(tss), startStr)
defer qt.Done()
}
if len(tss) == 0 {
qt.Printf("do not cache empty series list")
return
}
assertInstantValues(tss)
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = marshalRollupResultCacheKeyForInstantValues(bb.B[:0], expr, window, step, etfss)
_ = rrc.putSeriesToCache(qt, bb.B, step, tss)
}
func (rrc *rollupResultCache) DeleteInstantValues(qt *querytracer.Tracer, expr metricsql.Expr, window, step int64, etfss [][]storage.TagFilter) {
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = marshalRollupResultCacheKeyForInstantValues(bb.B[:0], expr, window, step, etfss)
if !rrc.putSeriesToCache(qt, bb.B, step, nil) {
logger.Panicf("BUG: cannot store zero series to cache")
}
if qt.Enabled() {
query := string(expr.AppendString(nil))
query = stringsutil.LimitStringLen(query, 300)
qt.Printf("rollup result cache delete instant values: query=%s, window=%d, step=%d", query, window, step)
}
}
func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) {
if qt.Enabled() {
query := string(expr.AppendString(nil))
query = stringsutil.LimitStringLen(query, 300)
qt = qt.NewChild("rollup cache get series: query=%s, timeRange=%s, window=%d, step=%d", query, ec.timeRangeString(), window, ec.Step)
query = bytesutil.LimitStringLen(query, 300)
qt = qt.NewChild("rollup cache get: query=%s, timeRange=%s, step=%d, window=%d", query, ec.timeRangeString(), ec.Step, window)
defer qt.Done()
}
if !ec.mayCache() {
@@ -282,7 +218,7 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
metainfoBuf := rrc.c.Get(nil, bb.B)
if len(metainfoBuf) == 0 {
qt.Printf("nothing found")
@@ -297,17 +233,31 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
qt.Printf("nothing found on the timeRange")
return nil, ec.Start
}
var ok bool
bb.B = key.Marshal(bb.B[:0])
tss, ok = rrc.getSeriesFromCache(qt, bb.B)
if !ok {
compressedResultBuf := resultBufPool.Get()
defer resultBufPool.Put(compressedResultBuf)
compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], bb.B)
if len(compressedResultBuf.B) == 0 {
mi.RemoveKey(key)
metainfoBuf = mi.Marshal(metainfoBuf[:0])
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
rrc.c.Set(bb.B, metainfoBuf)
qt.Printf("missing cache entry")
return nil, ec.Start
}
// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
// refers to the byte slice, so it cannot be returned to the resultBufPool.
qt.Printf("load compressed entry from cache with size %d bytes", len(compressedResultBuf.B))
resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
if err != nil {
logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
}
qt.Printf("unpack the entry into %d bytes", len(resultBuf))
tss, err = unmarshalTimeseriesFast(resultBuf)
if err != nil {
logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
}
qt.Printf("unmarshal %d series", len(tss))
// Extract values for the matching timestamps
timestamps := tss[0].Timestamps
@@ -316,10 +266,12 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
i++
}
if i == len(timestamps) {
// no matches.
qt.Printf("no datapoints found in the cached series on the given timeRange")
return nil, ec.Start
}
if timestamps[i] != ec.Start {
// The cached range doesn't cover the requested range.
qt.Printf("cached series don't cover the given timeRange")
return nil, ec.Start
}
@@ -330,7 +282,7 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
}
j++
if j <= i {
qt.Printf("no matching samples for the given timeRange")
// no matches.
return nil, ec.Start
}
@@ -351,21 +303,17 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
var resultBufPool bytesutil.ByteBufferPool
func (rrc *rollupResultCache) PutSeries(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) {
func (rrc *rollupResultCache) Put(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) {
if qt.Enabled() {
query := string(expr.AppendString(nil))
query = stringsutil.LimitStringLen(query, 300)
qt = qt.NewChild("rollup cache put series: query=%s, timeRange=%s, step=%d, window=%d, series=%d", query, ec.timeRangeString(), ec.Step, window, len(tss))
query = bytesutil.LimitStringLen(query, 300)
qt = qt.NewChild("rollup cache put: query=%s, timeRange=%s, step=%d, window=%d, series=%d", query, ec.timeRangeString(), ec.Step, window, len(tss))
defer qt.Done()
}
if !ec.mayCache() {
if len(tss) == 0 || !ec.mayCache() {
qt.Printf("do not store series to cache, since it is disabled in the current context")
return
}
if len(tss) == 0 {
qt.Printf("do not store empty series list")
return
}
// Remove values up to currentTime - step - cacheTimestampOffset,
// since these values may be added later.
@@ -398,7 +346,7 @@ func (rrc *rollupResultCache) PutSeries(qt *querytracer.Tracer, ec *EvalConfig,
metainfoBuf := bbPool.Get()
defer bbPool.Put(metainfoBuf)
metainfoKey.B = marshalRollupResultCacheKeyForSeries(metainfoKey.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
metainfoKey.B = marshalRollupResultCacheKey(metainfoKey.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
metainfoBuf.B = rrc.c.Get(metainfoBuf.B[:0], metainfoKey.B)
var mi rollupResultCacheMetainfo
if len(metainfoBuf.B) > 0 {
@@ -417,17 +365,31 @@ func (rrc *rollupResultCache) PutSeries(qt *querytracer.Tracer, ec *EvalConfig,
return
}
maxMarshaledSize := getRollupResultCacheSize() / 4
resultBuf := resultBufPool.Get()
defer resultBufPool.Put(resultBuf)
resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, ec.Step)
if len(resultBuf.B) == 0 {
tooBigRollupResults.Inc()
qt.Printf("cannot store series in the cache, since they would occupy more than %d bytes", maxMarshaledSize)
return
}
if qt.Enabled() {
startString := storage.TimestampToHumanReadableFormat(start)
endString := storage.TimestampToHumanReadableFormat(end)
qt.Printf("marshal %d series on a timeRange=[%s..%s] into %d bytes", len(tss), startString, endString, len(resultBuf.B))
}
compressedResultBuf := resultBufPool.Get()
defer resultBufPool.Put(compressedResultBuf)
compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)
qt.Printf("compress %d bytes into %d bytes", len(resultBuf.B), len(compressedResultBuf.B))
var key rollupResultCacheKey
key.prefix = rollupResultCacheKeyPrefix
key.suffix = atomic.AddUint64(&rollupResultCacheKeySuffix, 1)
bb := bbPool.Get()
bb.B = key.Marshal(bb.B[:0])
ok := rrc.putSeriesToCache(qt, bb.B, ec.Step, tss)
bbPool.Put(bb)
if !ok {
return
}
rollupResultKey := key.Marshal(nil)
rrc.c.SetBig(rollupResultKey, compressedResultBuf.B)
qt.Printf("store %d bytes in the cache", len(compressedResultBuf.B))
mi.AddKey(key, timestamps[0], timestamps[len(timestamps)-1])
metainfoBuf.B = mi.Marshal(metainfoBuf.B[:0])
@@ -439,52 +401,6 @@ var (
rollupResultCacheKeySuffix = uint64(time.Now().UnixNano())
)
func (rrc *rollupResultCache) getSeriesFromCache(qt *querytracer.Tracer, key []byte) ([]*timeseries, bool) {
compressedResultBuf := resultBufPool.Get()
compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], key)
if len(compressedResultBuf.B) == 0 {
qt.Printf("nothing found in the cache")
resultBufPool.Put(compressedResultBuf)
return nil, false
}
qt.Printf("load compressed entry from cache with size %d bytes", len(compressedResultBuf.B))
// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
// refers to the byte slice, so it cannot be re-used.
resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
if err != nil {
logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
}
resultBufPool.Put(compressedResultBuf)
qt.Printf("unpack the entry into %d bytes", len(resultBuf))
tss, err := unmarshalTimeseriesFast(resultBuf)
if err != nil {
logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
}
qt.Printf("unmarshal %d series", len(tss))
return tss, true
}
func (rrc *rollupResultCache) putSeriesToCache(qt *querytracer.Tracer, key []byte, step int64, tss []*timeseries) bool {
maxMarshaledSize := getRollupResultCacheSize() / 4
resultBuf := resultBufPool.Get()
defer resultBufPool.Put(resultBuf)
resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, step)
if len(resultBuf.B) == 0 {
tooBigRollupResults.Inc()
qt.Printf("cannot store %d series in the cache, since they would occupy more than %d bytes", len(tss), maxMarshaledSize)
return false
}
qt.Printf("marshal %d series into %d bytes", len(tss), len(resultBuf.B))
compressedResultBuf := resultBufPool.Get()
defer resultBufPool.Put(compressedResultBuf)
compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)
qt.Printf("compress %d bytes into %d bytes", len(resultBuf.B), len(compressedResultBuf.B))
rrc.c.SetBig(key, compressedResultBuf.B)
qt.Printf("store %d bytes in the cache", len(compressedResultBuf.B))
return true
}
func newRollupResultCacheKeyPrefix() uint64 {
var buf [8]byte
if _, err := rand.Read(buf[:]); err != nil {
@@ -523,36 +439,14 @@ func mustSaveRollupResultCacheKeyPrefix(path string) {
var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")
// Increment this value every time the format of the cache changes.
const rollupResultCacheVersion = 10
const rollupResultCacheVersion = 9
const (
rollupResultCacheTypeSeries = 0
rollupResultCacheTypeInstantValues = 1
)
func marshalRollupResultCacheKeyForSeries(dst []byte, expr metricsql.Expr, window, step int64, etfs [][]storage.TagFilter) []byte {
func marshalRollupResultCacheKey(dst []byte, expr metricsql.Expr, window, step int64, etfs [][]storage.TagFilter) []byte {
dst = append(dst, rollupResultCacheVersion)
dst = encoding.MarshalUint64(dst, rollupResultCacheKeyPrefix)
dst = append(dst, rollupResultCacheTypeSeries)
dst = encoding.MarshalInt64(dst, window)
dst = encoding.MarshalInt64(dst, step)
dst = marshalTagFiltersForRollupResultCacheKey(dst, etfs)
dst = expr.AppendString(dst)
return dst
}
func marshalRollupResultCacheKeyForInstantValues(dst []byte, expr metricsql.Expr, window, step int64, etfs [][]storage.TagFilter) []byte {
dst = append(dst, rollupResultCacheVersion)
dst = encoding.MarshalUint64(dst, rollupResultCacheKeyPrefix)
dst = append(dst, rollupResultCacheTypeInstantValues)
dst = encoding.MarshalInt64(dst, window)
dst = encoding.MarshalInt64(dst, step)
dst = marshalTagFiltersForRollupResultCacheKey(dst, etfs)
dst = expr.AppendString(dst)
return dst
}
func marshalTagFiltersForRollupResultCacheKey(dst []byte, etfs [][]storage.TagFilter) []byte {
for i, etf := range etfs {
for _, f := range etf {
dst = f.Marshal(dst)
@@ -567,15 +461,12 @@ func marshalTagFiltersForRollupResultCacheKey(dst []byte, etfs [][]storage.TagFi
// mergeTimeseries concatenates b with a and returns the result.
//
// Preconditions:
// - a mustn't intersect with b by timestamps.
// - a mustn't intersect with b.
// - a timestamps must be smaller than b timestamps.
//
// Postconditions:
// - a and b cannot be used after returning from the call.
func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, ec *EvalConfig) ([]*timeseries, error) {
qt = qt.NewChild("merge series len(a)=%d, len(b)=%d", len(a), len(b))
defer qt.Done()
func mergeTimeseries(a, b []*timeseries, bStart int64, ec *EvalConfig) []*timeseries {
sharedTimestamps := ec.getSharedTimestamps()
if bStart == ec.Start {
// Nothing to merge - b covers all the time range.
@@ -587,7 +478,7 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
logger.Panicf("BUG: unexpected number of values in b; got %d; want %d", len(tsB.Values), len(tsB.Timestamps))
}
}
return b, nil
return b
}
m := make(map[string]*timeseries, len(a))
@@ -595,9 +486,6 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
defer bbPool.Put(bb)
for _, ts := range a {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
if _, ok := m[string(bb.B)]; ok {
return nil, fmt.Errorf("duplicate series found: %s", &ts.MetricName)
}
m[string(bb.B)] = ts
}
@@ -624,8 +512,7 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
}
tmp.Values = append(tmp.Values, tsB.Values...)
if len(tmp.Values) != len(tmp.Timestamps) {
logger.Panicf("BUG: unexpected values after merging new values; got %d; want %d; len(a.Values)=%d; len(b.Values)=%d",
len(tmp.Values), len(tmp.Timestamps), len(tsA.Values), len(tsB.Values))
logger.Panicf("BUG: unexpected values after merging new values; got %d; want %d", len(tmp.Values), len(tmp.Timestamps))
}
rvs = append(rvs, &tmp)
}
@@ -649,8 +536,7 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
}
rvs = append(rvs, &tmp)
}
qt.Printf("resulting series=%d", len(rvs))
return rvs, nil
return rvs
}
type rollupResultCacheMetainfo struct {

View File

@@ -61,7 +61,7 @@ func TestRollupResultCache(t *testing.T) {
// Try obtaining an empty value.
t.Run("empty", func(t *testing.T) {
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != ec.Start {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, ec.Start)
}
@@ -79,8 +79,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 1400 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
}
@@ -100,8 +100,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.PutSeries(nil, ec, ae, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, ae, window)
rollupResultCacheV.Put(nil, ec, ae, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, ae, window)
if newStart != 1400 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
}
@@ -123,8 +123,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{333, 0, 1, 2},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -142,8 +142,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -161,8 +161,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -180,8 +180,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -199,8 +199,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2, 3, 4, 5, 6, 7},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 2200 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
}
@@ -222,8 +222,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{1, 2, 3, 4, 5, 6},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 2200 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
}
@@ -247,8 +247,8 @@ func TestRollupResultCache(t *testing.T) {
}
tss = append(tss, ts)
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
tssResult, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss)
tssResult, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 2200 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
}
@@ -276,10 +276,10 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss1)
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss2)
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss3)
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
rollupResultCacheV.Put(nil, ec, fe, window, tss1)
rollupResultCacheV.Put(nil, ec, fe, window, tss2)
rollupResultCacheV.Put(nil, ec, fe, window, tss3)
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
if newStart != 1400 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
}
@@ -311,10 +311,7 @@ func TestMergeTimeseries(t *testing.T) {
Values: []float64{1, 2, 3, 4, 5, 6},
},
}
tss, err := mergeTimeseries(nil, a, b, 1000, ec)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
tss := mergeTimeseries(a, b, 1000, ec)
tssExpected := []*timeseries{
{
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
@@ -331,10 +328,7 @@ func TestMergeTimeseries(t *testing.T) {
Values: []float64{3, 4, 5, 6},
},
}
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
tss := mergeTimeseries(a, b, bStart, ec)
tssExpected := []*timeseries{
{
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
@@ -351,10 +345,7 @@ func TestMergeTimeseries(t *testing.T) {
},
}
b := []*timeseries{}
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
tss := mergeTimeseries(a, b, bStart, ec)
tssExpected := []*timeseries{
{
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
@@ -376,10 +367,7 @@ func TestMergeTimeseries(t *testing.T) {
Values: []float64{3, 4, 5, 6},
},
}
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
tss := mergeTimeseries(a, b, bStart, ec)
tssExpected := []*timeseries{
{
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
@@ -403,10 +391,7 @@ func TestMergeTimeseries(t *testing.T) {
},
}
b[0].MetricName.MetricGroup = []byte("foo")
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
tss := mergeTimeseries(a, b, bStart, ec)
tssExpected := []*timeseries{
{
MetricName: storage.MetricName{

View File

@@ -12,35 +12,6 @@ var (
testTimestamps = []int64{5, 15, 24, 36, 49, 60, 78, 80, 97, 115, 120, 130}
)
func TestRollupOutlierIQR(t *testing.T) {
f := func(values []float64, resultExpected float64) {
t.Helper()
rfa := &rollupFuncArg{
values: values,
timestamps: nil,
}
result := rollupOutlierIQR(rfa)
if math.IsNaN(result) {
if !math.IsNaN(resultExpected) {
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
}
} else {
if math.IsNaN(resultExpected) {
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
}
if result != resultExpected {
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
}
}
}
f([]float64{1, 2, 3, 4, 5}, nan)
f([]float64{1, 2, 3, 4, 7}, nan)
f([]float64{1, 2, 3, 4, 8}, 8)
f([]float64{1, 2, 3, 4, -2}, nan)
f([]float64{1, 2, 3, 4, -3}, -3)
}
func TestRollupIderivDuplicateTimestamps(t *testing.T) {
rfa := &rollupFuncArg{
values: []float64{1, 2, 3, 4, 5},
@@ -189,7 +160,7 @@ func TestDerivValues(t *testing.T) {
testRowsEqual(t, values, timestamps, valuesExpected, timestamps)
}
func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected float64) {
func testRollupFuncWithValues(t *testing.T, funcName string, args []interface{}, vInput []float64, vTimestamps []int64, vExpected float64) {
t.Helper()
nrf := getRollupFunc(funcName)
if nrf == nil {
@@ -201,9 +172,11 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected
}
var rfa rollupFuncArg
rfa.prevValue = nan
rfa.realPrevValue = nan
rfa.realNextValue = nan
rfa.prevTimestamp = 0
rfa.values = append(rfa.values, testValues...)
rfa.timestamps = append(rfa.timestamps, testTimestamps...)
rfa.values = append(rfa.values, vInput...)
rfa.timestamps = append(rfa.timestamps, vTimestamps...)
rfa.window = rfa.timestamps[len(rfa.timestamps)-1] - rfa.timestamps[0]
if rollupFuncsRemoveCounterResets[funcName] {
removeCounterResets(rfa.values)
@@ -215,9 +188,6 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
}
} else {
if math.IsNaN(v) {
t.Fatalf("unexpected value; got %v want %v", v, vExpected)
}
eps := math.Abs(v - vExpected)
if eps > 1e-14 {
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
@@ -226,6 +196,10 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected
}
}
func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected float64) {
testRollupFuncWithValues(t, funcName, args, testValues, testTimestamps, vExpected)
}
func TestRollupDurationOverTime(t *testing.T) {
f := func(maxInterval, dExpected float64) {
t.Helper()
@@ -470,12 +444,12 @@ func TestRollupHoltWinters(t *testing.T) {
}
f(-1, 0.5, nan)
f(0, 0.5, -856)
f(1, 0.5, 34)
f(0, 0.5, nan)
f(1, 0.5, nan)
f(2, 0.5, nan)
f(0.5, -1, nan)
f(0.5, 0, -54.1474609375)
f(0.5, 1, 25.25)
f(0.5, 0, nan)
f(0.5, 1, nan)
f(0.5, 2, nan)
f(0.5, 0.5, 34.97794532775879)
f(0.1, 0.5, -131.30529492371622)
@@ -546,7 +520,6 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
f("increase", 398)
f("increase_prometheus", 275)
f("irate", 0)
f("outlier_iqr_over_time", nan)
f("rate", 2200)
f("resets", 5)
f("range_over_time", 111)
@@ -1507,3 +1480,121 @@ func TestRollupDelta(t *testing.T) {
f(1, nan, nan, nil, 0)
f(100, nan, nan, nil, 0)
}
func TestIncrease(t *testing.T) {
f := func(funcName string, vInput []float64, vExpected float64) {
t.Helper()
var me metricsql.MetricExpr
args := []interface{}{&metricsql.RollupExpr{Expr: &me}}
testRollupFuncWithValues(t, funcName, args, vInput, []int64{1, 2}, vExpected)
}
f(
"increase",
[]float64{100, 100},
0,
)
f(
"increase",
[]float64{100, 90},
0,
)
f(
"increase",
[]float64{100, 88},
0,
)
f(
"increase",
[]float64{100, 87},
187,
)
f(
"increase",
[]float64{100, 187},
187,
)
f(
"increase",
[]float64{100, 87, 200, 287},
387,
)
f(
"increase",
[]float64{100, 187, 200, 287},
287,
)
f(
"increase",
[]float64{100, 87, 200, 87},
387,
)
f(
"increase",
[]float64{100, 87, 200, 187},
300,
)
f(
"increase",
[]float64{100, 87, 200, 177},
300,
)
f(
"increase",
[]float64{100, 13},
113,
)
f(
"increase",
[]float64{100, 9},
9,
)
f(
"increase",
[]float64{100, 1},
1,
)
f(
"increase",
[]float64{100, 0},
0,
)
f(
"increase",
[]float64{100, -1},
-1,
)
f(
"increase",
[]float64{100, -10},
90,
)
f(
"increase",
[]float64{100, -90},
10,
)
}

View File

@@ -79,10 +79,7 @@ var timeseriesPool sync.Pool
func marshalTimeseriesFast(dst []byte, tss []*timeseries, maxSize int, step int64) []byte {
if len(tss) == 0 {
// marshal zero timeseries and zero timestamps
dst = encoding.MarshalUint64(dst, 0)
dst = encoding.MarshalUint64(dst, 0)
return dst
logger.Panicf("BUG: tss cannot be empty")
}
// timestamps are stored only once for all the tss, since they must be identical

View File

@@ -1,4 +1,5 @@
//go:build go1.15
// +build go1.15
package promql

View File

@@ -1,13 +1,13 @@
{
"files": {
"main.css": "./static/css/main.349e6522.css",
"main.js": "./static/js/main.c93073e5.js",
"static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
"main.css": "./static/css/main.d9ac05de.css",
"main.js": "./static/js/main.70434a4f.js",
"static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.957b90ab4cb4852eec26.md",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.349e6522.css",
"static/js/main.c93073e5.js"
"static/css/main.d9ac05de.css",
"static/js/main.70434a4f.js"
]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.c93073e5.js"></script><link href="./static/css/main.349e6522.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.70434a4f.js"></script><link href="./static/css/main.d9ac05de.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -7,7 +7,7 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @remix-run/router v1.10.0
* @remix-run/router v1.7.2
*
* Copyright (c) Remix Software Inc.
*
@@ -18,7 +18,7 @@
*/
/**
* React Router DOM v6.17.0
* React Router DOM v6.14.2
*
* Copyright (c) Remix Software Inc.
*
@@ -29,7 +29,7 @@
*/
/**
* React Router v6.17.0
* React Router v6.14.2
*
* Copyright (c) Remix Software Inc.
*

File diff suppressed because one or more lines are too long

View File

@@ -1,11 +1,11 @@
---
sort: 23
weight: 23
sort: 14
weight: 14
title: MetricsQL
menu:
docs:
parent: 'victoriametrics'
weight: 23
parent: "victoriametrics"
weight: 14
aliases:
- /ExtendedPromQL.html
- /MetricsQL.html
@@ -21,8 +21,7 @@ However, there are some [intentional differences](https://medium.com/@romanhavro
[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/metricsql) can be used for parsing MetricsQL in external apps.
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085)
and introduction into [basic querying via MetricsQL](https://docs.victoriametrics.com/keyConcepts.html#metricsql).
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
The following functionality is implemented differently in MetricsQL compared to PromQL. This improves user experience:
@@ -110,7 +109,7 @@ The list of MetricsQL features on top of PromQL:
* [histogram_quantile](#histogram_quantile) accepts optional third arg - `boundsLabel`.
In this case it returns `lower` and `upper` bounds for the estimated percentile.
See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`. See also [drop_empty_series](#drop_empty_series).
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`.
* `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`.
* `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`.
* `WITH` templates. This feature simplifies writing and managing complex queries.
@@ -532,7 +531,7 @@ See also [duration_over_time](#duration_over_time) and [lag](#lag).
`mad_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation)
over raw samples on the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
See also [mad](#mad), [range_mad](#range_mad) and [outlier_iqr_over_time](#outlier_iqr_over_time).
See also [mad](#mad) and [range_mad](#range_mad).
#### max_over_time
@@ -562,18 +561,6 @@ This function is supported by PromQL. See also [tmin_over_time](#tmin_over_time)
for raw samples on the given lookbehind window `d`. It is calculated individually per each time series returned
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). It is expected that raw sample values are discrete.
#### outlier_iqr_over_time
`outlier_iqr_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns the last sample on the given lookbehind window `d`
if its value is either smaller than the `q25-1.5*iqr` or bigger than `q75+1.5*iqr` where:
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) over raw samples on the lookbehind window `d`
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) over raw samples on the lookbehind window `d`.
The `outlier_iqr_over_time()` is useful for detecting anomalies in gauge values based on the previous history of values.
For example, `outlier_iqr_over_time(memory_usage_bytes[1h])` triggers when `memory_usage_bytes` suddenly goes outside the usual value range for the last 24 hours.
See also [outliers_iqr](#outliers_iqr).
#### predict_linear
`predict_linear(series_selector[d], t)` is a [rollup function](#rollup-functions), which calculates the value `t` seconds in the future using
@@ -878,7 +865,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
See also [zscore](#zscore), [range_trim_zscore](#range_trim_zscore) and [outlier_iqr_over_time](#outlier_iqr_over_time).
See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore).
### Transform functions
@@ -1068,17 +1055,6 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
This function is supported by PromQL. See also [rad](#rad).
#### drop_empty_series
`drop_empty_series(q)` is a [transform function](#transform-functions), which drops empty series from `q`.
This function can be used when `default` operator should be applied only to non-empty series. For example,
`drop_empty_series(temperature < 30) default 42` returns series, which have at least a single sample smaller than 30 on the selected time range,
while filling gaps in the returned series with 42.
On the other hand `(temperature < 30) default 40` returns all the `temperature` series, even if they have no samples smaller than 30,
by replacing all the values bigger or equal to 30 with 40.
#### end
`end()` is a [transform function](#transform-functions), which returns the unix timestamp in seconds for the last point.
@@ -1615,7 +1591,7 @@ which maps `label` values from `src_*` to `dst*` for all the time series returne
which drops time series from `q` with `label` not matching the given `regexp`.
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
See also [label_mismatch](#label_mismatch).
#### label_mismatch
@@ -1623,7 +1599,7 @@ See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
which drops time series from `q` with `label` matching the given `regexp`.
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
See also [label_match](#label_match) and [labels_equal](#labels_equal).
See also [label_match](#label_match).
#### label_move
@@ -1666,30 +1642,23 @@ for the given `label` for every time series returned by `q`.
For example, if `label_value(foo, "bar")` is applied to `foo{bar="1.234"}`, then it will return a time series
`foo{bar="1.234"}` with `1.234` value. Function will return no data for non-numeric label values.
#### labels_equal
`labels_equal(q, "label1", "label2", ...)` is [label manipulation function](#label-manipulation-functions), which returns `q` series with identical values for the listed labels
"label1", "label2", etc.
See also [label_match](#label_match) and [label_mismatch](#label_mismatch).
#### sort_by_label
`sort_by_label(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
`sort_by_label(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
See also [sort_by_label_desc](#sort_by_label_desc) and [sort_by_label_numeric](#sort_by_label_numeric).
#### sort_by_label_desc
`sort_by_label_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
`sort_by_label_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
See also [sort_by_label](#sort_by_label) and [sort_by_label_numeric_desc](#sort_by_label_numeric_desc).
#### sort_by_label_numeric
`sort_by_label_numeric(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
`sort_by_label_numeric(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")` would return series
in the following order of `bar` label values: `1`, `2`, `15` and `101`.
@@ -1698,7 +1667,7 @@ See also [sort_by_label_numeric_desc](#sort_by_label_numeric_desc) and [sort_by_
#### sort_by_label_numeric_desc
`sort_by_label_numeric_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
`sort_by_label_numeric_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
by the given set of labels using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")`
would return series in the following order of `bar` label values: `101`, `15`, `2` and `1`.
@@ -1870,33 +1839,20 @@ This function is supported by PromQL.
`mode(q) by (group_labels)` is [aggregate function](#aggregate-functions), which returns [mode](https://en.wikipedia.org/wiki/Mode_(statistics))
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
#### outliers_iqr
`outliers_iqr(q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least a single point
outside e.g. [Interquartile range outlier bounds](https://en.wikipedia.org/wiki/Interquartile_range) `[q25-1.5*iqr .. q75+1.5*iqr]`
comparing to other time series at the given point, where:
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) calculated independently per each point on the graph across `q` series.
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) calculated independently per each point on the graph across `q` series.
The `outliers_iqr()` is useful for detecting anomalous series in the group of series. For example, `outliers_iqr(temperature) by (country)` returns
per-country series with anomalous outlier values comparing to the rest of per-country series.
See also [outliers_mad](#outliers_mad), [outliersk](#outliersk) and [outlier_iqr_over_time](#outlier_iqr_over_time).
#### outliers_mad
`outliers_mad(tolerance, q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least
a single point outside [Median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) (aka MAD) multiplied by `tolerance`.
E.g. it returns time series with at least a single point below `median(q) - mad(q)` or a single point above `median(q) + mad(q)`.
See also [outliers_iqr](#outliers_iqr), [outliersk](#outliersk) and [mad](#mad).
See also [outliersk](#outliersk) and [mad](#mad).
#### outliersk
`outliersk(k, q)` is [aggregate function](#aggregate-functions), which returns up to `k` time series with the biggest standard deviation (aka outliers)
out of time series returned by `q`.
See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad).
See also [outliers_mad](#outliers_mad).
#### quantile
@@ -2016,7 +1972,7 @@ See also [bottomk_min](#bottomk_min).
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
This function is useful for detecting anomalies in the group of related time series.
See also [zscore_over_time](#zscore_over_time), [range_trim_zscore](#range_trim_zscore) and [outliers_iqr](#outliers_iqr).
See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore).
## Subqueries

2
app/vmui/.gitignore vendored
View File

@@ -105,3 +105,5 @@ dist
# WebStorm etc
.idea/
MetricsQL.md

View File

@@ -1,4 +1,4 @@
FROM golang:1.21.4 as build-web-stage
FROM golang:1.21.3 as build-web-stage
COPY build /build
WORKDIR /build

Some files were not shown because too many files have changed in this diff Show More