mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-24 20:26:32 +03:00
Compare commits
3 Commits
v1.95.0
...
streaming-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5bc3488538 | ||
|
|
1cd6232537 | ||
|
|
ed1bef0e2d |
2
.github/workflows/check-licenses.yml
vendored
2
.github/workflows/check-licenses.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
go-version: 1.21.3
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
2
.github/workflows/codeql-analysis.yml
vendored
2
.github/workflows/codeql-analysis.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
6
.github/workflows/main.yml
vendored
6
.github/workflows/main.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
id: go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
go-version: 1.21.3
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
|
||||
3
.github/workflows/sync-docs.yml
vendored
3
.github/workflows/sync-docs.yml
vendored
@@ -8,7 +8,6 @@ on:
|
||||
workflow_dispatch: {}
|
||||
env:
|
||||
PAGEFIND_VERSION: "1.0.3"
|
||||
HUGO_VERSION: "latest"
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout and to commit back image update
|
||||
deployments: write
|
||||
@@ -29,7 +28,7 @@ jobs:
|
||||
path: docs
|
||||
- uses: peaceiris/actions-hugo@v2
|
||||
with:
|
||||
hugo-version: ${{env.HUGO_VERSION}}
|
||||
hugo-version: 'latest'
|
||||
extended: true
|
||||
- name: Install PageFind #install the static search engine for index build
|
||||
uses: supplypike/setup-bin@v3
|
||||
|
||||
88
Makefile
88
Makefile
@@ -25,143 +25,143 @@ all: \
|
||||
victoria-logs-prod \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmalert-tool-prod \
|
||||
vmauth-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod \
|
||||
vmctl-prod
|
||||
vmctl-prod \
|
||||
vmalert-tool-prod
|
||||
|
||||
clean:
|
||||
rm -rf bin/*
|
||||
|
||||
publish: \
|
||||
publish: package-base \
|
||||
publish-victoria-metrics \
|
||||
publish-vmagent \
|
||||
publish-vmalert \
|
||||
publish-vmalert-tool \
|
||||
publish-vmauth \
|
||||
publish-vmbackup \
|
||||
publish-vmrestore \
|
||||
publish-vmctl
|
||||
publish-vmctl \
|
||||
publish-vmalert-tool
|
||||
|
||||
package: \
|
||||
package-victoria-metrics \
|
||||
package-victoria-logs \
|
||||
package-vmagent \
|
||||
package-vmalert \
|
||||
package-vmalert-tool \
|
||||
package-vmauth \
|
||||
package-vmbackup \
|
||||
package-vmrestore \
|
||||
package-vmctl
|
||||
package-vmctl \
|
||||
package-vmalert-tool
|
||||
|
||||
vmutils: \
|
||||
vmagent \
|
||||
vmalert \
|
||||
vmalert-tool \
|
||||
vmauth \
|
||||
vmbackup \
|
||||
vmrestore \
|
||||
vmctl
|
||||
vmctl \
|
||||
vmalert-tool
|
||||
|
||||
vmutils-pure: \
|
||||
vmagent-pure \
|
||||
vmalert-pure \
|
||||
vmalert-tool-pure \
|
||||
vmauth-pure \
|
||||
vmbackup-pure \
|
||||
vmrestore-pure \
|
||||
vmctl-pure
|
||||
vmctl-pure \
|
||||
vmalert-tool-pure
|
||||
|
||||
vmutils-linux-amd64: \
|
||||
vmagent-linux-amd64 \
|
||||
vmalert-linux-amd64 \
|
||||
vmalert-tool-linux-amd64 \
|
||||
vmauth-linux-amd64 \
|
||||
vmbackup-linux-amd64 \
|
||||
vmrestore-linux-amd64 \
|
||||
vmctl-linux-amd64
|
||||
vmctl-linux-amd64 \
|
||||
vmalert-tool-linux-amd64
|
||||
|
||||
vmutils-linux-arm64: \
|
||||
vmagent-linux-arm64 \
|
||||
vmalert-linux-arm64 \
|
||||
vmalert-tool-linux-arm64 \
|
||||
vmauth-linux-arm64 \
|
||||
vmbackup-linux-arm64 \
|
||||
vmrestore-linux-arm64 \
|
||||
vmctl-linux-arm64
|
||||
vmctl-linux-arm64 \
|
||||
vmalert-tool-linux-arm64
|
||||
|
||||
vmutils-linux-arm: \
|
||||
vmagent-linux-arm \
|
||||
vmalert-linux-arm \
|
||||
vmalert-tool-linux-arm \
|
||||
vmauth-linux-arm \
|
||||
vmbackup-linux-arm \
|
||||
vmrestore-linux-arm \
|
||||
vmctl-linux-arm
|
||||
vmctl-linux-arm \
|
||||
vmalert-tool-linux-arm
|
||||
|
||||
vmutils-linux-386: \
|
||||
vmagent-linux-386 \
|
||||
vmalert-linux-386 \
|
||||
vmalert-tool-linux-386 \
|
||||
vmauth-linux-386 \
|
||||
vmbackup-linux-386 \
|
||||
vmrestore-linux-386 \
|
||||
vmctl-linux-386
|
||||
vmctl-linux-386 \
|
||||
vmalert-tool-linux-386
|
||||
|
||||
vmutils-linux-ppc64le: \
|
||||
vmagent-linux-ppc64le \
|
||||
vmalert-linux-ppc64le \
|
||||
vmalert-tool-linux-ppc64le \
|
||||
vmauth-linux-ppc64le \
|
||||
vmbackup-linux-ppc64le \
|
||||
vmrestore-linux-ppc64le \
|
||||
vmctl-linux-ppc64le
|
||||
vmctl-linux-ppc64le \
|
||||
vmalert-tool-linux-ppc64le
|
||||
|
||||
vmutils-darwin-amd64: \
|
||||
vmagent-darwin-amd64 \
|
||||
vmalert-darwin-amd64 \
|
||||
vmalert-tool-darwin-amd64 \
|
||||
vmauth-darwin-amd64 \
|
||||
vmbackup-darwin-amd64 \
|
||||
vmrestore-darwin-amd64 \
|
||||
vmctl-darwin-amd64
|
||||
vmctl-darwin-amd64 \
|
||||
vmalert-tool-darwin-amd64
|
||||
|
||||
vmutils-darwin-arm64: \
|
||||
vmagent-darwin-arm64 \
|
||||
vmalert-darwin-arm64 \
|
||||
vmalert-tool-darwin-arm64 \
|
||||
vmauth-darwin-arm64 \
|
||||
vmbackup-darwin-arm64 \
|
||||
vmrestore-darwin-arm64 \
|
||||
vmctl-darwin-arm64
|
||||
vmctl-darwin-arm64 \
|
||||
vmalert-tool-darwin-arm64
|
||||
|
||||
vmutils-freebsd-amd64: \
|
||||
vmagent-freebsd-amd64 \
|
||||
vmalert-freebsd-amd64 \
|
||||
vmalert-tool-freebsd-amd64 \
|
||||
vmauth-freebsd-amd64 \
|
||||
vmbackup-freebsd-amd64 \
|
||||
vmrestore-freebsd-amd64 \
|
||||
vmctl-freebsd-amd64
|
||||
vmctl-freebsd-amd64 \
|
||||
vmalert-tool-freebsd-amd64
|
||||
|
||||
vmutils-openbsd-amd64: \
|
||||
vmagent-openbsd-amd64 \
|
||||
vmalert-openbsd-amd64 \
|
||||
vmalert-tool-openbsd-amd64 \
|
||||
vmauth-openbsd-amd64 \
|
||||
vmbackup-openbsd-amd64 \
|
||||
vmrestore-openbsd-amd64 \
|
||||
vmctl-openbsd-amd64
|
||||
vmctl-openbsd-amd64 \
|
||||
vmalert-tool-openbsd-amd64
|
||||
|
||||
vmutils-windows-amd64: \
|
||||
vmagent-windows-amd64 \
|
||||
vmalert-windows-amd64 \
|
||||
vmalert-tool-windows-amd64 \
|
||||
vmauth-windows-amd64 \
|
||||
vmbackup-windows-amd64 \
|
||||
vmrestore-windows-amd64 \
|
||||
vmctl-windows-amd64
|
||||
vmctl-windows-amd64 \
|
||||
vmalert-tool-windows-amd64
|
||||
|
||||
victoria-metrics-crossbuild: \
|
||||
victoria-metrics-linux-386 \
|
||||
@@ -354,72 +354,72 @@ release-vmutils-windows-amd64:
|
||||
release-vmutils-goos-goarch: \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||
&& sha256sum vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod
|
||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||
|
||||
release-vmutils-windows-goarch: \
|
||||
vmagent-windows-$(GOARCH)-prod \
|
||||
vmalert-windows-$(GOARCH)-prod \
|
||||
vmalert-tool-windows-$(GOARCH)-prod \
|
||||
vmauth-windows-$(GOARCH)-prod \
|
||||
vmbackup-windows-$(GOARCH)-prod \
|
||||
vmrestore-windows-$(GOARCH)-prod \
|
||||
vmctl-windows-$(GOARCH)-prod
|
||||
vmctl-windows-$(GOARCH)-prod \
|
||||
vmalert-tool-windows-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
vmagent-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||
vmauth-windows-$(GOARCH)-prod.exe \
|
||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||
vmctl-windows-$(GOARCH)-prod.exe
|
||||
vmctl-windows-$(GOARCH)-prod.exe \
|
||||
vmalert-tool-windows-$(GOARCH)-prod.exe
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
@@ -486,7 +486,7 @@ golangci-lint: install-golangci-lint
|
||||
golangci-lint run
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.55.1
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.54.2
|
||||
|
||||
govulncheck: install-govulncheck
|
||||
govulncheck ./...
|
||||
|
||||
133
README.md
133
README.md
@@ -338,8 +338,7 @@ which can be used as faster and less resource-hungry alternative to Prometheus.
|
||||
|
||||
## Grafana setup
|
||||
|
||||
Create [Prometheus datasource](https://grafana.com/docs/grafana/latest/datasources/prometheus/configure-prometheus-data-source/)
|
||||
in Grafana with the following url:
|
||||
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following url:
|
||||
|
||||
```url
|
||||
http://<victoriametrics-addr>:8428
|
||||
@@ -353,9 +352,6 @@ or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
|
||||
Alternatively, use VictoriaMetrics [datasource plugin](https://github.com/VictoriaMetrics/grafana-datasource) with support of extra features.
|
||||
See more in [description](https://github.com/VictoriaMetrics/grafana-datasource#victoriametrics-data-source-for-grafana).
|
||||
|
||||
Creating a datasource may require [specific permissions](https://grafana.com/docs/grafana/latest/administration/data-source-management/).
|
||||
If you don't see an option to create a data source - try contacting system administrator.
|
||||
|
||||
## How to upgrade VictoriaMetrics
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking [the CHANGELOG page](https://docs.victoriametrics.com/CHANGELOG.html) and performing regular upgrades.
|
||||
@@ -446,7 +442,7 @@ This information is obtained from the `/api/v1/status/active_queries` HTTP endpo
|
||||
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
|
||||
|
||||
1. Open the `vmui` at `http://victoriametrics:8428/vmui/`.
|
||||
1. Click the `Explore Prometheus metrics` tab.
|
||||
1. Click the `Explore metrics` tab.
|
||||
1. Select the `job` you want to explore.
|
||||
1. Optionally select the `instance` for the selected job to explore.
|
||||
1. Select metrics you want to explore and compare.
|
||||
@@ -1130,18 +1126,6 @@ For example, the following command builds the image on top of [scratch](https://
|
||||
ROOT_IMAGE=scratch make package-victoria-metrics
|
||||
```
|
||||
|
||||
#### Building VictoriaMetrics with Podman
|
||||
|
||||
VictoriaMetrics can be built with Podman in either rootful or rootless mode.
|
||||
|
||||
When building via rootlful Podman, simply add `DOCKER=podman` to the relevant `make` commandline. To build
|
||||
via rootless Podman, add `DOCKER=podman DOCKER_RUN="podman run --userns=keep-id"` to the `make`
|
||||
commandline.
|
||||
|
||||
For example: `make victoria-metrics-pure DOCKER=podman DOCKER_RUN="podman run --userns=keep-id"`
|
||||
|
||||
Note that `production` builds are not supported via Podman becuase Podman does not support `buildx`.
|
||||
|
||||
## Start with docker-compose
|
||||
|
||||
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
|
||||
@@ -1168,15 +1152,6 @@ Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-s
|
||||
is the command-line flag value. Snapshots can be archived to backup storage at any time
|
||||
with [vmbackup](https://docs.victoriametrics.com/vmbackup.html).
|
||||
|
||||
Snapshots consist of a mix of hard-links and soft-links to various files and directories inside `-storageDataPath`.
|
||||
See [this article](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
|
||||
for more details. This adds some restrictions on what can be done with the contents of `<-storageDataPath>/snapshots` directory:
|
||||
|
||||
- Do not delete subdirectories inside `<-storageDataPath>/snapshots` with `rm` or similar commands, since this will leave some snapshot data undeleted.
|
||||
Prefer using the `/snapshot/delete` API for deleting snapshot. See below for more details about this API.
|
||||
- Do not copy subdirectories inside `<-storageDataPath>/snapshot` with `cp`, `rsync` or similar commands, since there are high chances
|
||||
that these commands won't copy some data stored in the snapshot. Prefer using [vmbackup](https://docs.victoriametrics.com/vmbackup.html) for making copies of snapshot data.
|
||||
|
||||
The `http://<victoriametrics-addr>:8428/snapshot/list` page contains the list of available snapshots.
|
||||
|
||||
Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete?snapshot=<snapshot-name>` in order
|
||||
@@ -1699,44 +1674,43 @@ See also [cardinality limiter](#cardinality-limiter) and [capacity planning docs
|
||||
|
||||
## High availability
|
||||
|
||||
The general approach for achieving high availability is the following:
|
||||
|
||||
- to run two identically configured VictoriaMetrics instances in distinct datacenters (availability zones)
|
||||
- to store the collected data simultaneously into these instances via [vmagent](https://docs.victoriametrics.com/vmagent.html) or Prometheus
|
||||
- to query the first VictoriaMetrics instance and to fail over to the second instance when the first instance becomes temporarily unavailable.
|
||||
|
||||
Such a setup guarantees that the collected data isn't lost when one of VictoriaMetrics instance becomes unavailable.
|
||||
The collected data continues to be written to the available VictoriaMetrics instance, so it should be available for querying.
|
||||
Both [vmagent](https://docs.victoriametrics.com/vmagent.html) and Prometheus buffer the collected data locally if they cannot send it
|
||||
to the configured remote storage. So the collected data will be written to the temporarily unavailable VictoriaMetrics instance
|
||||
after it becomes available.
|
||||
|
||||
If you use [vmagent](https://docs.victoriametrics.com/vmagent.html) for storing the data into VictoriaMetrics,
|
||||
then it can be configured with multiple `-remoteWrite.url` command-line flags, where every flag points to the VictoriaMetrics
|
||||
instance in a particular availability zone, in order to replicate the collected data to all the VictoriaMetrics instances.
|
||||
For example, the following command instructs `vmagent` to replicate data to `vm-az1` and `vm-az2` instances of VictoriaMetrics:
|
||||
* Install multiple VictoriaMetrics instances in distinct datacenters (availability zones).
|
||||
* Pass addresses of these instances to [vmagent](https://docs.victoriametrics.com/vmagent.html) via `-remoteWrite.url` command-line flag:
|
||||
|
||||
```console
|
||||
/path/to/vmagent \
|
||||
-remoteWrite.url=http://<vm-az1>:8428/api/v1/write \
|
||||
-remoteWrite.url=http://<vm-az2>:8428/api/v1/write
|
||||
/path/to/vmagent -remoteWrite.url=http://<victoriametrics-addr-1>:8428/api/v1/write -remoteWrite.url=http://<victoriametrics-addr-2>:8428/api/v1/write
|
||||
```
|
||||
|
||||
If you use Prometheus for collecting and writing the data to VictoriaMetrics,
|
||||
then the following [`remote_write`](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section
|
||||
in Prometheus config can be used for replicating the collected data to `vm-az1` and `vm-az2` VictoriaMetrics instances:
|
||||
Alternatively these addresses may be passed to `remote_write` section in Prometheus config:
|
||||
|
||||
```yml
|
||||
remote_write:
|
||||
- url: http://<vm-az1>:8428/api/v1/write
|
||||
- url: http://<vm-az2>:8428/api/v1/write
|
||||
- url: http://<victoriametrics-addr-1>:8428/api/v1/write
|
||||
queue_config:
|
||||
max_samples_per_send: 10000
|
||||
# ...
|
||||
- url: http://<victoriametrics-addr-N>:8428/api/v1/write
|
||||
queue_config:
|
||||
max_samples_per_send: 10000
|
||||
```
|
||||
|
||||
It is recommended to use [vmagent](https://docs.victoriametrics.com/vmagent.html) instead of Prometheus for highly loaded setups,
|
||||
since it uses lower amounts of RAM, CPU and network bandwidth than Prometheus.
|
||||
* Apply the updated config:
|
||||
|
||||
If you use identically configured [vmagent](https://docs.victoriametrics.com/vmagent.html) instances for collecting the same data
|
||||
and sending it to VictoriaMetrics, then do not forget enabling [deduplication](#deduplication) at VictoriaMetrics side.
|
||||
```console
|
||||
kill -HUP `pidof prometheus`
|
||||
```
|
||||
|
||||
It is recommended to use [vmagent](https://docs.victoriametrics.com/vmagent.html) instead of Prometheus for highly loaded setups.
|
||||
|
||||
* Now Prometheus should write data into all the configured `remote_write` urls in parallel.
|
||||
* Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
|
||||
* Set up Prometheus datasource in Grafana that points to Promxy.
|
||||
|
||||
If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
|
||||
to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
|
||||
|
||||
Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
|
||||
with the enabled de-duplication. See [this section](#deduplication) for details.
|
||||
|
||||
## Deduplication
|
||||
|
||||
@@ -1923,23 +1897,8 @@ See how to request a free trial license [here](https://victoriametrics.com/produ
|
||||
|
||||
* `-downsampling.period=30d:5m,180d:1h` instructs VictoriaMetrics to deduplicate samples older than 30 days with 5 minutes interval and to deduplicate samples older than 180 days with 1 hour interval.
|
||||
|
||||
Downsampling is applied independently per each time series and leaves a single [raw sample](https://docs.victoriametrics.com/keyConcepts.html#raw-samples)
|
||||
with the biggest [timestamp](https://en.wikipedia.org/wiki/Unix_time) on the configured interval, in the same way as [deduplication](#deduplication) does.
|
||||
It works the best for [counters](https://docs.victoriametrics.com/keyConcepts.html#counter) and [histograms](https://docs.victoriametrics.com/keyConcepts.html#histogram),
|
||||
as their values are always increasing. But downsampling [gauges](https://docs.victoriametrics.com/keyConcepts.html#gauge)
|
||||
and [summaries](https://docs.victoriametrics.com/keyConcepts.html#summary)
|
||||
would mean losing the changes within the downsampling interval. Please note, you can use [recording rules](https://docs.victoriametrics.com/vmalert.html#rules)
|
||||
or [steaming aggregation](https://docs.victoriametrics.com/stream-aggregation.html)
|
||||
to apply custom aggregation functions, like min/max/avg etc., in order to make gauges more resilient to downsampling.
|
||||
|
||||
Downsampling can reduce disk space usage and improve query performance if it is applied to time series with big number
|
||||
of samples per each series. The downsampling doesn't improve query performance if the database contains big number
|
||||
of time series with small number of samples per each series (aka [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)),
|
||||
since downsampling doesn't reduce the number of time series. In this case the majority of query time is spent on searching for the matching time series
|
||||
instead of processing the found samples.
|
||||
It is possible to use [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) in [vmagent](https://docs.victoriametrics.com/vmagent.html)
|
||||
or recording rules in [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to
|
||||
[reduce the number of time series](https://docs.victoriametrics.com/vmalert.html#downsampling-and-aggregation-via-vmalert).
|
||||
Downsampling is applied independently per each time series. It can reduce disk space usage and improve query performance if it is applied to time series with big number of samples per each series. The downsampling doesn't improve query performance if the database contains big number of time series with small number of samples per each series (aka [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)), since downsampling doesn't reduce the number of time series. So the majority of time is spent on searching for the matching time series.
|
||||
It is possible to use [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) in vmagent or recording rules in [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to [reduce the number of time series](https://docs.victoriametrics.com/vmalert.html#downsampling-and-aggregation-via-vmalert).
|
||||
|
||||
Downsampling happens during [background merges](https://docs.victoriametrics.com/#storage)
|
||||
and can't be performed if there is not enough of free disk space or if vmstorage
|
||||
@@ -1997,8 +1956,6 @@ VictoriaMetrics provides the following security-related command-line flags:
|
||||
* `-flagsAuthKey` for protecting `/flags` endpoint.
|
||||
* `-pprofAuthKey` for protecting `/debug/pprof/*` endpoints, which can be used for [profiling](#profiling).
|
||||
* `-denyQueryTracing` for disallowing [query tracing](#query-tracing).
|
||||
* `-http.header.hsts`, `-http.header.csp`, and `-http.header.frameOptions` for serving `Strict-Transport-Security`, `Content-Security-Policy`
|
||||
and `X-Frame-Options` HTTP response headers.
|
||||
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
|
||||
@@ -2530,8 +2487,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
```
|
||||
-bigMergeConcurrency int
|
||||
Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
|
||||
-blockcache.missesBeforeCaching int
|
||||
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
|
||||
-cacheExpireDuration duration
|
||||
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
||||
-configAuthKey string
|
||||
@@ -2552,7 +2507,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-downsampling.period array
|
||||
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-dryRun
|
||||
Whether to check config files without running VictoriaMetrics. The following config files are checked: -promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag
|
||||
@@ -2586,12 +2541,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
@@ -2659,8 +2608,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 500)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
@@ -2683,9 +2630,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-newrelic.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single NewRelic request to /newrelic/infra/v2/metrics/events/bulk
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
|
||||
-opentsdbHTTPListenAddr.useProxyProtocol
|
||||
@@ -2726,7 +2670,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-promscrape.config.strictParse
|
||||
Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields (default true)
|
||||
-promscrape.configCheckInterval duration
|
||||
Interval for checking for changes in -promscrape.config file. By default, the checking is disabled. See how to reload -promscrape.config file at https://docs.victoriametrics.com/vmagent.html#configuration-update
|
||||
Interval for checking for changes in '-promscrape.config' file. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
@@ -2809,11 +2753,11 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-relabelConfig string
|
||||
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. The path can point either to local file or to http url. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
|
||||
-retentionFilter array
|
||||
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-retentionPeriod value
|
||||
Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. See also -retentionFilter
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
|
||||
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
|
||||
-retentionTimezoneOffset duration
|
||||
The offset for performing indexdb rotation. If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)
|
||||
-search.cacheTimestampOffset duration
|
||||
@@ -2829,7 +2773,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.latencyOffset duration
|
||||
The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
|
||||
-search.logQueryMemoryUsage size
|
||||
Log query and increment vm_memory_intensive_queries_total metric each time the query requires more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
|
||||
Log queries, which require more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.logSlowQueryDuration duration
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
|
||||
@@ -2891,9 +2835,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 4)
|
||||
-search.minStalenessInterval duration
|
||||
The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
|
||||
-search.minWindowForInstantRollupOptimization value
|
||||
Enable cache-based optimization for repeated queries to /api/v1/query (aka instant queries), which contain rollup functions with lookbehind window exceeding the given value
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 6h)
|
||||
-search.noStaleMarkers
|
||||
Set this flag to true if the database doesn't contain Prometheus stale markers, so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets
|
||||
-search.queryStats.lastQueriesCount int
|
||||
@@ -2920,7 +2861,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The timeout for creating new snapshot. If set, make sure that timeout is lower than backup period
|
||||
-snapshotsMaxAge value
|
||||
Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
|
||||
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-storage.cacheSizeIndexDBDataBlocks size
|
||||
|
||||
@@ -117,6 +117,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"expand-with-exprs", "WITH expressions' tutorial"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"stream-agg", "streaming aggregation status"},
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
{"api/v1/status/tsdb", "tsdb status page"},
|
||||
|
||||
@@ -120,10 +120,10 @@ func compressData(s string) string {
|
||||
var bb bytes.Buffer
|
||||
zw := gzip.NewWriter(&bb)
|
||||
if _, err := zw.Write([]byte(s)); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when compressing data: %w", err))
|
||||
panic(fmt.Errorf("unexpected error when compressing data: %s", err))
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when closing gzip writer: %w", err))
|
||||
panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err))
|
||||
}
|
||||
return bb.String()
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
|
||||
r.Reset(dataBytes)
|
||||
_, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -29,7 +29,7 @@ func benchmarkParseJSONRequest(b *testing.B, streams, rows, labels int) {
|
||||
for pb.Next() {
|
||||
_, err := parseJSONRequest(data, func(timestamp int64, fields []logstorage.Field) {})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -84,7 +84,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
|
||||
|
||||
err = req.Unmarshal(bb.B)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse request body: %w", err)
|
||||
return 0, fmt.Errorf("cannot parse request body: %s", err)
|
||||
}
|
||||
|
||||
var commonFields []logstorage.Field
|
||||
@@ -97,7 +97,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
|
||||
// Labels are same for all entries in the stream.
|
||||
commonFields, err = parsePromLabels(commonFields[:0], stream.Labels)
|
||||
if err != nil {
|
||||
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %w", stream.Labels, err)
|
||||
return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %s", stream.Labels, err)
|
||||
}
|
||||
fields := commonFields
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ func benchmarkParseProtobufRequest(b *testing.B, streams, rows, labels int) {
|
||||
for pb.Next() {
|
||||
_, err := parseProtobufRequest(body, func(timestamp int64, fields []logstorage.Field) {})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
{
|
||||
"files": {
|
||||
"main.css": "./static/css/main.d1313636.css",
|
||||
"main.js": "./static/js/main.1919fefe.js",
|
||||
"static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
|
||||
"main.css": "./static/css/main.9a224445.css",
|
||||
"main.js": "./static/js/main.02178f4b.js",
|
||||
"static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.957b90ab4cb4852eec26.md",
|
||||
"index.html": "./index.html"
|
||||
},
|
||||
"entrypoints": [
|
||||
"static/css/main.d1313636.css",
|
||||
"static/js/main.1919fefe.js"
|
||||
"static/css/main.9a224445.css",
|
||||
"static/js/main.02178f4b.js"
|
||||
]
|
||||
}
|
||||
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
@@ -1 +1 @@
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.1919fefe.js"></script><link href="./static/css/main.d1313636.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.02178f4b.js"></script><link href="./static/css/main.9a224445.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
1
app/vlselect/vmui/static/css/main.9a224445.css
Normal file
1
app/vlselect/vmui/static/css/main.9a224445.css
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
2
app/vlselect/vmui/static/js/main.02178f4b.js
Normal file
2
app/vlselect/vmui/static/js/main.02178f4b.js
Normal file
File diff suppressed because one or more lines are too long
@@ -7,7 +7,7 @@
|
||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
||||
|
||||
/**
|
||||
* @remix-run/router v1.10.0
|
||||
* @remix-run/router v1.7.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router DOM v6.17.0
|
||||
* React Router DOM v6.14.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -29,7 +29,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router v6.17.0
|
||||
* React Router v6.14.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
File diff suppressed because one or more lines are too long
@@ -1,11 +1,11 @@
|
||||
---
|
||||
sort: 23
|
||||
weight: 23
|
||||
sort: 14
|
||||
weight: 14
|
||||
title: MetricsQL
|
||||
menu:
|
||||
docs:
|
||||
parent: 'victoriametrics'
|
||||
weight: 23
|
||||
parent: "victoriametrics"
|
||||
weight: 14
|
||||
aliases:
|
||||
- /ExtendedPromQL.html
|
||||
- /MetricsQL.html
|
||||
@@ -21,8 +21,7 @@ However, there are some [intentional differences](https://medium.com/@romanhavro
|
||||
|
||||
[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/metricsql) can be used for parsing MetricsQL in external apps.
|
||||
|
||||
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085)
|
||||
and introduction into [basic querying via MetricsQL](https://docs.victoriametrics.com/keyConcepts.html#metricsql).
|
||||
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
|
||||
|
||||
The following functionality is implemented differently in MetricsQL compared to PromQL. This improves user experience:
|
||||
|
||||
@@ -110,7 +109,7 @@ The list of MetricsQL features on top of PromQL:
|
||||
* [histogram_quantile](#histogram_quantile) accepts optional third arg - `boundsLabel`.
|
||||
In this case it returns `lower` and `upper` bounds for the estimated percentile.
|
||||
See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
|
||||
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`. See also [drop_empty_series](#drop_empty_series).
|
||||
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`.
|
||||
* `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`.
|
||||
* `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`.
|
||||
* `WITH` templates. This feature simplifies writing and managing complex queries.
|
||||
@@ -532,7 +531,7 @@ See also [duration_over_time](#duration_over_time) and [lag](#lag).
|
||||
`mad_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation)
|
||||
over raw samples on the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
|
||||
|
||||
See also [mad](#mad), [range_mad](#range_mad) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
See also [mad](#mad) and [range_mad](#range_mad).
|
||||
|
||||
#### max_over_time
|
||||
|
||||
@@ -562,18 +561,6 @@ This function is supported by PromQL. See also [tmin_over_time](#tmin_over_time)
|
||||
for raw samples on the given lookbehind window `d`. It is calculated individually per each time series returned
|
||||
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). It is expected that raw sample values are discrete.
|
||||
|
||||
#### outlier_iqr_over_time
|
||||
|
||||
`outlier_iqr_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns the last sample on the given lookbehind window `d`
|
||||
if its value is either smaller than the `q25-1.5*iqr` or bigger than `q75+1.5*iqr` where:
|
||||
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) over raw samples on the lookbehind window `d`
|
||||
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) over raw samples on the lookbehind window `d`.
|
||||
|
||||
The `outlier_iqr_over_time()` is useful for detecting anomalies in gauge values based on the previous history of values.
|
||||
For example, `outlier_iqr_over_time(memory_usage_bytes[1h])` triggers when `memory_usage_bytes` suddenly goes outside the usual value range for the last 24 hours.
|
||||
|
||||
See also [outliers_iqr](#outliers_iqr).
|
||||
|
||||
#### predict_linear
|
||||
|
||||
`predict_linear(series_selector[d], t)` is a [rollup function](#rollup-functions), which calculates the value `t` seconds in the future using
|
||||
@@ -878,7 +865,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
|
||||
|
||||
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
See also [zscore](#zscore), [range_trim_zscore](#range_trim_zscore) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
|
||||
### Transform functions
|
||||
@@ -1068,17 +1055,6 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
||||
|
||||
This function is supported by PromQL. See also [rad](#rad).
|
||||
|
||||
#### drop_empty_series
|
||||
|
||||
`drop_empty_series(q)` is a [transform function](#transform-functions), which drops empty series from `q`.
|
||||
|
||||
This function can be used when `default` operator should be applied only to non-empty series. For example,
|
||||
`drop_empty_series(temperature < 30) default 42` returns series, which have at least a single sample smaller than 30 on the selected time range,
|
||||
while filling gaps in the returned series with 42.
|
||||
|
||||
On the other hand `(temperature < 30) default 40` returns all the `temperature` series, even if they have no samples smaller than 30,
|
||||
by replacing all the values bigger or equal to 30 with 40.
|
||||
|
||||
#### end
|
||||
|
||||
`end()` is a [transform function](#transform-functions), which returns the unix timestamp in seconds for the last point.
|
||||
@@ -1615,7 +1591,7 @@ which maps `label` values from `src_*` to `dst*` for all the time series returne
|
||||
which drops time series from `q` with `label` not matching the given `regexp`.
|
||||
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
|
||||
|
||||
See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
|
||||
See also [label_mismatch](#label_mismatch).
|
||||
|
||||
#### label_mismatch
|
||||
|
||||
@@ -1623,7 +1599,7 @@ See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
|
||||
which drops time series from `q` with `label` matching the given `regexp`.
|
||||
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
|
||||
|
||||
See also [label_match](#label_match) and [labels_equal](#labels_equal).
|
||||
See also [label_match](#label_match).
|
||||
|
||||
#### label_move
|
||||
|
||||
@@ -1666,30 +1642,23 @@ for the given `label` for every time series returned by `q`.
|
||||
For example, if `label_value(foo, "bar")` is applied to `foo{bar="1.234"}`, then it will return a time series
|
||||
`foo{bar="1.234"}` with `1.234` value. Function will return no data for non-numeric label values.
|
||||
|
||||
#### labels_equal
|
||||
|
||||
`labels_equal(q, "label1", "label2", ...)` is [label manipulation function](#label-manipulation-functions), which returns `q` series with identical values for the listed labels
|
||||
"label1", "label2", etc.
|
||||
|
||||
See also [label_match](#label_match) and [label_mismatch](#label_mismatch).
|
||||
|
||||
#### sort_by_label
|
||||
|
||||
`sort_by_label(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
|
||||
`sort_by_label(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
|
||||
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
|
||||
|
||||
See also [sort_by_label_desc](#sort_by_label_desc) and [sort_by_label_numeric](#sort_by_label_numeric).
|
||||
|
||||
#### sort_by_label_desc
|
||||
|
||||
`sort_by_label_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
|
||||
`sort_by_label_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
|
||||
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
|
||||
|
||||
See also [sort_by_label](#sort_by_label) and [sort_by_label_numeric_desc](#sort_by_label_numeric_desc).
|
||||
|
||||
#### sort_by_label_numeric
|
||||
|
||||
`sort_by_label_numeric(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
|
||||
`sort_by_label_numeric(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
|
||||
using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
|
||||
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")` would return series
|
||||
in the following order of `bar` label values: `1`, `2`, `15` and `101`.
|
||||
@@ -1698,7 +1667,7 @@ See also [sort_by_label_numeric_desc](#sort_by_label_numeric_desc) and [sort_by_
|
||||
|
||||
#### sort_by_label_numeric_desc
|
||||
|
||||
`sort_by_label_numeric_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
|
||||
`sort_by_label_numeric_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
|
||||
by the given set of labels using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
|
||||
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")`
|
||||
would return series in the following order of `bar` label values: `101`, `15`, `2` and `1`.
|
||||
@@ -1870,33 +1839,20 @@ This function is supported by PromQL.
|
||||
`mode(q) by (group_labels)` is [aggregate function](#aggregate-functions), which returns [mode](https://en.wikipedia.org/wiki/Mode_(statistics))
|
||||
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
|
||||
|
||||
#### outliers_iqr
|
||||
|
||||
`outliers_iqr(q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least a single point
|
||||
outside e.g. [Interquartile range outlier bounds](https://en.wikipedia.org/wiki/Interquartile_range) `[q25-1.5*iqr .. q75+1.5*iqr]`
|
||||
comparing to other time series at the given point, where:
|
||||
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) calculated independently per each point on the graph across `q` series.
|
||||
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) calculated independently per each point on the graph across `q` series.
|
||||
|
||||
The `outliers_iqr()` is useful for detecting anomalous series in the group of series. For example, `outliers_iqr(temperature) by (country)` returns
|
||||
per-country series with anomalous outlier values comparing to the rest of per-country series.
|
||||
|
||||
See also [outliers_mad](#outliers_mad), [outliersk](#outliersk) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
|
||||
#### outliers_mad
|
||||
|
||||
`outliers_mad(tolerance, q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least
|
||||
a single point outside [Median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) (aka MAD) multiplied by `tolerance`.
|
||||
E.g. it returns time series with at least a single point below `median(q) - mad(q)` or a single point above `median(q) + mad(q)`.
|
||||
|
||||
See also [outliers_iqr](#outliers_iqr), [outliersk](#outliersk) and [mad](#mad).
|
||||
See also [outliersk](#outliersk) and [mad](#mad).
|
||||
|
||||
#### outliersk
|
||||
|
||||
`outliersk(k, q)` is [aggregate function](#aggregate-functions), which returns up to `k` time series with the biggest standard deviation (aka outliers)
|
||||
out of time series returned by `q`.
|
||||
|
||||
See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad).
|
||||
See also [outliers_mad](#outliers_mad).
|
||||
|
||||
#### quantile
|
||||
|
||||
@@ -2016,7 +1972,7 @@ See also [bottomk_min](#bottomk_min).
|
||||
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
|
||||
This function is useful for detecting anomalies in the group of related time series.
|
||||
|
||||
See also [zscore_over_time](#zscore_over_time), [range_trim_zscore](#range_trim_zscore) and [outliers_iqr](#outliers_iqr).
|
||||
See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
## Subqueries
|
||||
|
||||
@@ -60,7 +60,7 @@ and sending the data to the Prometheus-compatible remote storage:
|
||||
Example command for writing the data received via [supported push-based protocols](#how-to-push-data-to-vmagent)
|
||||
to [single-node VictoriaMetrics](https://docs.victoriametrics.com/) located at `victoria-metrics-host:8428`:
|
||||
|
||||
```bash
|
||||
```console
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
@@ -69,7 +69,7 @@ the data to [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-V
|
||||
|
||||
Example command for scraping Prometheus targets and writing the data to single-node VictoriaMetrics:
|
||||
|
||||
```bash
|
||||
```console
|
||||
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
@@ -110,7 +110,7 @@ additionally to pull-based Prometheus-compatible targets' scraping:
|
||||
|
||||
* Sending `SIGHUP` signal to `vmagent` process:
|
||||
|
||||
```bash
|
||||
```console
|
||||
kill -SIGHUP `pidof vmagent`
|
||||
```
|
||||
|
||||
@@ -173,13 +173,6 @@ by routing outgoing samples for the same time series of [counter](https://docs.v
|
||||
and [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) types from top-level `vmagent` instances
|
||||
to the same second-level `vmagent` instance, so they are aggregated properly.
|
||||
|
||||
If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding
|
||||
among remote storage systems specified in `-remoteWrite.url`. Sometimes it may be needed to use only a particular
|
||||
set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
|
||||
to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURLLabels`
|
||||
command-line flag. For example, `-remoteWrite.shardByURLLabels=instance,__name__` would shard metrics with the same name and `instance`
|
||||
label to the same `-remoteWrite.url`.
|
||||
|
||||
See also [how to scrape big number of targets](#scraping-big-number-of-targets).
|
||||
|
||||
### Relabeling and filtering
|
||||
@@ -325,7 +318,7 @@ in the `scrape_config_files` section of `-promscrape.config` file. For example,
|
||||
loading scrape configs from all the `*.yml` files under `configs` directory, from `single_scrape_config.yml` local file
|
||||
and from `https://config-server/scrape_config.yml` url:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
scrape_config_files:
|
||||
- configs/*.yml
|
||||
- single_scrape_config.yml
|
||||
@@ -335,7 +328,7 @@ scrape_config_files:
|
||||
Every referred file can contain arbitrary number of [supported scrape configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
|
||||
There is no need in specifying top-level `scrape_configs` section in these files. For example:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
- job_name: foo
|
||||
static_configs:
|
||||
- targets: ["vmagent:8429"]
|
||||
@@ -375,7 +368,7 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
|
||||
For example, the following command starts `vmagent`, which adds `{datacenter="foobar"}` label to all the metrics pushed
|
||||
to all the configured remote storage systems (all the `-remoteWrite.url` flag values):
|
||||
|
||||
```bash
|
||||
```
|
||||
/path/to/vmagent -remoteWrite.label=datacenter=foobar ...
|
||||
```
|
||||
|
||||
@@ -503,16 +496,13 @@ with [additional enhancements](#relabeling-enhancements). The relabeling can be
|
||||
This relabeling can be debugged via `http://vmagent:8429/metric-relabel-debug` page. See [these docs](#relabel-debug) for details.
|
||||
|
||||
* At the `-remoteWrite.urlRelabelConfig` files. This relabeling is used for modifying labels for metrics
|
||||
and for dropping unneeded metrics before sending them to the particular `-remoteWrite.url`.
|
||||
and for dropping unneeded metrics before sending them to a particular `-remoteWrite.url`.
|
||||
|
||||
This relabeling can be debugged via `http://vmagent:8429/metric-relabel-debug` page. See [these docs](#relabel-debug) for details.
|
||||
|
||||
All the files with relabeling configs can contain special placeholders in the form `%{ENV_VAR}`,
|
||||
which are replaced by the corresponding environment variable values.
|
||||
|
||||
[Streaming aggregation](https://docs.victoriametrics.com/stream-aggregation.html), if configured,
|
||||
is pefrormed after applying all the relabeling stages mentioned above.
|
||||
|
||||
The following articles contain useful information about Prometheus relabeling:
|
||||
|
||||
* [Cookbook for common relabeling tasks](https://docs.victoriametrics.com/relabeling.html)
|
||||
@@ -743,7 +733,7 @@ stream parsing mode can be explicitly enabled in the following places:
|
||||
|
||||
Examples:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: 'big-federate'
|
||||
stream_parse: true
|
||||
@@ -770,7 +760,7 @@ Each `vmagent` instance in the cluster must use identical `-promscrape.config` f
|
||||
in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster specified via `-promscrape.cluster.membersCount`.
|
||||
For example, the following commands spread scrape targets among a cluster of two `vmagent` instances:
|
||||
|
||||
```text
|
||||
```
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
|
||||
```
|
||||
@@ -782,7 +772,7 @@ By default, each scrape target is scraped only by a single `vmagent` instance in
|
||||
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
|
||||
start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances:
|
||||
|
||||
```text
|
||||
```
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ...
|
||||
@@ -796,7 +786,7 @@ The `-promscrape.cluster.memberLabel` command-line flag allows specifying a name
|
||||
The value of the `member num` label is set to `-promscrape.cluster.memberNum`. For example, the following config instructs adding `vmagent_instance="0"` label
|
||||
to all the metrics scraped by the given `vmagent` instance:
|
||||
|
||||
```text
|
||||
```
|
||||
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=0 -promscrape.cluster.memberLabel=vmagent_instance
|
||||
```
|
||||
|
||||
@@ -823,7 +813,7 @@ See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679)
|
||||
`vmagent` supports scraping targets via http, https and socks5 proxies. Proxy address must be specified in `proxy_url` option. For example, the following scrape config instructs
|
||||
target scraping via https proxy at `https://proxy-addr:1234`:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
proxy_url: https://proxy-addr:1234
|
||||
@@ -840,7 +830,7 @@ Proxy can be configured with the following optional settings:
|
||||
|
||||
For example:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
proxy_url: https://proxy-addr:1234
|
||||
@@ -990,7 +980,7 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at the beginning of some interval,
|
||||
then `scrape_align_interval` option must be used. For example, the following config aligns hourly scrapes to the beginning of hour:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
scrape_interval: 1h
|
||||
@@ -1000,7 +990,7 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at specific offset, then `scrape_offset` option must be used.
|
||||
For example, the following config instructs `vmagent` to scrape the target at 10 seconds of every minute:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
scrape_configs:
|
||||
- job_name: foo
|
||||
scrape_interval: 1m
|
||||
@@ -1013,14 +1003,14 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
|
||||
The following relabeling rule may be added to `relabel_configs` section in order to filter out pods with unneeded ports:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
```
|
||||
|
||||
The following relabeling rule may be added to `relabel_configs` section in order to filter out init container pods:
|
||||
|
||||
```yaml
|
||||
```yml
|
||||
- action: drop
|
||||
source_labels: [__meta_kubernetes_pod_container_init]
|
||||
regex: true
|
||||
@@ -1064,7 +1054,7 @@ For example, `-kafka.consumer.topic.brokers=host1:9092;host2:9092`.
|
||||
The following command starts `vmagent`, which reads metrics in InfluxDB line protocol format from Kafka broker at `localhost:9092`
|
||||
from the topic `metrics-by-telegraf` and sends them to remote storage at `http://localhost:8428/api/v1/write`:
|
||||
|
||||
```bash
|
||||
```console
|
||||
./bin/vmagent -remoteWrite.url=http://localhost:8428/api/v1/write \
|
||||
-kafka.consumer.topic.brokers=localhost:9092 \
|
||||
-kafka.consumer.topic.format=influx \
|
||||
@@ -1087,7 +1077,7 @@ These command-line flags are available only in [enterprise](https://docs.victori
|
||||
which can be downloaded for evaluation from [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) page
|
||||
(see `vmutils-...-enterprise.tar.gz` archives) and from [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
|
||||
|
||||
```text
|
||||
```
|
||||
-kafka.consumer.topic array
|
||||
Kafka topic names for data consumption.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -1132,13 +1122,13 @@ Two types of auth are supported:
|
||||
|
||||
* sasl with username and password:
|
||||
|
||||
```bash
|
||||
```console
|
||||
./bin/vmagent -remoteWrite.url=kafka://localhost:9092/?topic=prom-rw&security.protocol=SASL_SSL&sasl.mechanisms=PLAIN -remoteWrite.basicAuth.username=user -remoteWrite.basicAuth.password=password
|
||||
```
|
||||
|
||||
* tls certificates:
|
||||
|
||||
```bash
|
||||
```console
|
||||
./bin/vmagent -remoteWrite.url=kafka://localhost:9092/?topic=prom-rw&security.protocol=SSL -remoteWrite.tlsCAFile=/opt/ca.pem -remoteWrite.tlsCertFile=/opt/cert.pem -remoteWrite.tlsKeyFile=/opt/key.pem
|
||||
```
|
||||
|
||||
@@ -1169,7 +1159,7 @@ The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```bash
|
||||
```console
|
||||
ROOT_IMAGE=scratch make package-vmagent
|
||||
```
|
||||
|
||||
@@ -1197,7 +1187,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```bash
|
||||
```console
|
||||
curl http://0.0.0.0:8429/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
@@ -1207,7 +1197,7 @@ curl http://0.0.0.0:8429/debug/pprof/heap > mem.pprof
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```bash
|
||||
```console
|
||||
curl http://0.0.0.0:8429/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
@@ -1223,7 +1213,7 @@ It is safe sharing the collected profiles from security point of view, since the
|
||||
|
||||
`vmagent` can be fine-tuned with various command-line flags. Run `./vmagent -help` in order to see the full list of these flags with their descriptions and default values:
|
||||
|
||||
```text
|
||||
```
|
||||
./vmagent -help
|
||||
|
||||
vmagent collects metrics data via popular data ingestion protocols and routes them to VictoriaMetrics.
|
||||
@@ -1232,6 +1222,10 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
|
||||
-cacheExpireDuration duration
|
||||
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
||||
-clients.docker
|
||||
Decides whether a docker container be brought up automatically
|
||||
-clients.semaphore
|
||||
Tells if the job is running on Semaphore
|
||||
-configAuthKey string
|
||||
Authorization key for accessing /config page. It must be passed via authKey query arg
|
||||
-csvTrimTimestamp duration
|
||||
@@ -1269,12 +1263,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
@@ -1385,9 +1373,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-newrelic.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single NewRelic request to /newrelic/infra/v2/metrics/events/bulk
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentsdbHTTPListenAddr string
|
||||
TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
|
||||
-opentsdbHTTPListenAddr.useProxyProtocol
|
||||
@@ -1426,7 +1411,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
-promscrape.config.strictParse
|
||||
Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields (default true)
|
||||
-promscrape.configCheckInterval duration
|
||||
Interval for checking for changes in -promscrape.config file. By default, the checking is disabled. See how to reload -promscrape.config file at https://docs.victoriametrics.com/vmagent.html#configuration-update
|
||||
Interval for checking for changes in '-promscrape.config' file. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
@@ -1610,9 +1595,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.shardByURL
|
||||
Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages
|
||||
-remoteWrite.shardByURL.labels array
|
||||
Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-remoteWrite.showURL
|
||||
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-remoteWrite.significantFigures array
|
||||
|
||||
@@ -42,6 +42,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -228,6 +229,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"metric-relabel-debug", "debug metric relabeling"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"stream-agg", "streaming aggregation status"},
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
{"-/reload", "reload configuration"},
|
||||
@@ -432,6 +434,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "/stream-agg":
|
||||
streamaggr.WriteHumanReadableState(w, r, remotewrite.GetAggregators())
|
||||
return true
|
||||
case "/ready":
|
||||
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
|
||||
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)
|
||||
|
||||
@@ -106,15 +106,12 @@ type client struct {
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
authCfg, err := getAuthConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tlsCfg, err := authCfg.NewTLSConfig()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize tls config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tlsCfg := authCfg.NewTLSConfig()
|
||||
awsCfg, err := getAWSAPIConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize AWS Config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
logger.Fatalf("FATAL: cannot initialize AWS Config for remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
tr := &http.Transport{
|
||||
DialContext: statDial,
|
||||
@@ -331,25 +328,15 @@ func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := c.hc.Do(req)
|
||||
if err == nil {
|
||||
return resp, nil
|
||||
if err != nil && errors.Is(err, io.EOF) {
|
||||
// it is likely connection became stale.
|
||||
// So we do one more attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, _ = c.newRequest(url, body)
|
||||
resp, err = c.hc.Do(req)
|
||||
}
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return nil, err
|
||||
}
|
||||
// It is likely connection became stale or timed out during the first request.
|
||||
// Make another attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, err = c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
return resp, err
|
||||
}
|
||||
|
||||
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
@@ -375,7 +362,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
if c.awsCfg != nil {
|
||||
sigv4Hash := awsapi.HashHex(body)
|
||||
if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
|
||||
return nil, fmt.Errorf("cannot sign remoteWrite request with AWS sigv4: %w", err)
|
||||
// there is no need in retry, request will be rejected by client.Do and retried by code below
|
||||
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
|
||||
}
|
||||
}
|
||||
return req, nil
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -23,7 +24,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -41,9 +41,8 @@ var (
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
|
||||
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
|
||||
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems")
|
||||
shardByURLLabels = flag.String("remoteWrite.shardByURL.labels", "", "Comma-separated list of label names for sharding across all the -remoteWrite.url. All labels of timeseries are used by default. "+
|
||||
"See also -remoteWrite.shardByURL and https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
@@ -96,6 +95,8 @@ var (
|
||||
|
||||
// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
|
||||
defaultAuthToken = &auth.Token{}
|
||||
|
||||
shardLabelsFilter map[string]struct{}
|
||||
)
|
||||
|
||||
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
|
||||
@@ -120,8 +121,6 @@ func InitSecretFlags() {
|
||||
}
|
||||
}
|
||||
|
||||
var shardByURLLabelsMap map[string]struct{}
|
||||
|
||||
// Init initializes remotewrite.
|
||||
//
|
||||
// It must be called after flag.Parse().
|
||||
@@ -158,13 +157,6 @@ func Init() {
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
if len(*shardByURLLabels) > 0 {
|
||||
m := make(map[string]struct{}, len(*shardByURLLabels))
|
||||
for _, label := range *shardByURLLabels {
|
||||
m[label] = struct{}{}
|
||||
}
|
||||
shardByURLLabelsMap = m
|
||||
}
|
||||
initLabelsGlobal()
|
||||
|
||||
// Register SIGHUP handler for config reload before loadRelabelConfigs.
|
||||
@@ -184,6 +176,12 @@ func Init() {
|
||||
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
|
||||
}
|
||||
|
||||
if *shardByURLLabels != "" {
|
||||
for _, label := range strings.Split(*shardByURLLabels, ",") {
|
||||
shardLabelsFilter[strings.TrimSpace(label)] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// Start config reloader.
|
||||
configReloaderWG.Add(1)
|
||||
go func() {
|
||||
@@ -431,23 +429,11 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
||||
if *shardByURL {
|
||||
// Shard the data among rwctxs
|
||||
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
|
||||
tmpLabels := promutils.GetLabels()
|
||||
for _, ts := range tssBlock {
|
||||
hashLabels := ts.Labels
|
||||
if len(shardByURLLabelsMap) > 0 {
|
||||
hashLabels = tmpLabels.Labels[:0]
|
||||
for _, label := range ts.Labels {
|
||||
if _, ok := shardByURLLabelsMap[label.Name]; ok {
|
||||
hashLabels = append(hashLabels, label)
|
||||
}
|
||||
}
|
||||
}
|
||||
h := getLabelsHash(hashLabels)
|
||||
h := getLabelsHash(ts.Labels, shardLabelsFilter)
|
||||
idx := h % uint64(len(tssByURL))
|
||||
tssByURL[idx] = append(tssByURL[idx], ts)
|
||||
}
|
||||
promutils.PutLabels(tmpLabels)
|
||||
|
||||
// Push sharded data to remote storages in parallel in order to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
@@ -497,7 +483,7 @@ func limitSeriesCardinality(tss []prompbmarshal.TimeSeries) []prompbmarshal.Time
|
||||
dst := make([]prompbmarshal.TimeSeries, 0, len(tss))
|
||||
for i := range tss {
|
||||
labels := tss[i].Labels
|
||||
h := getLabelsHash(labels)
|
||||
h := getLabelsHash(labels, nil)
|
||||
if hourlySeriesLimiter != nil && !hourlySeriesLimiter.Add(h) {
|
||||
hourlySeriesLimitRowsDropped.Add(len(tss[i].Samples))
|
||||
logSkippedSeries(labels, "-remoteWrite.maxHourlySeries", hourlySeriesLimiter.MaxItems())
|
||||
@@ -521,10 +507,16 @@ var (
|
||||
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
|
||||
)
|
||||
|
||||
func getLabelsHash(labels []prompbmarshal.Label) uint64 {
|
||||
func getLabelsHash(labels []prompbmarshal.Label, filterLabels map[string]struct{}) uint64 {
|
||||
bb := labelsHashBufPool.Get()
|
||||
b := bb.B[:0]
|
||||
for _, label := range labels {
|
||||
if len(filterLabels) > 0 {
|
||||
_, ok := filterLabels[label.Name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
b = append(b, label.Name...)
|
||||
b = append(b, label.Value...)
|
||||
}
|
||||
@@ -827,3 +819,23 @@ func CheckStreamAggrConfigs() error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetAggregators() map[string]*streamaggr.Aggregators {
|
||||
var result = map[string]*streamaggr.Aggregators{}
|
||||
|
||||
if len(*remoteWriteMultitenantURLs) > 0 {
|
||||
rwctxsMapLock.Lock()
|
||||
for tenant, rwctxs := range rwctxsMap {
|
||||
for rwNum, rw := range rwctxs {
|
||||
result[fmt.Sprintf("rw %d for tenant %v:%v", rwNum, tenant.AccountID, tenant.ProjectID)] = rw.sas.Load()
|
||||
}
|
||||
}
|
||||
rwctxsMapLock.Unlock()
|
||||
} else {
|
||||
for rwNum, rw := range rwctxsDefault {
|
||||
result[fmt.Sprintf("remote write %d", rwNum)] = rw.sas.Load()
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8429
|
||||
ENTRYPOINT ["/vmalert-tool-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vmalert-tool-linux-${TARGETARCH}-prod ./vmalert-tool-prod
|
||||
@@ -119,13 +119,6 @@ name: <string>
|
||||
# `eval_offset` can't be bigger than `interval`.
|
||||
[ eval_offset: <duration> ]
|
||||
|
||||
# Optional
|
||||
# Adjust the `time` parameter of group evaluation requests to compensate intentional query delay from the datasource.
|
||||
# By default, the value is inherited from the `-rule.evalDelay` cmd-line flag - see its description for details.
|
||||
# If group has `latency_offset` set in `params`, then it is recommended to set `eval_delay` equal to `latency_offset`.
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 and https://docs.victoriametrics.com/keyConcepts.html#query-latency.
|
||||
[ eval_delay: <duration> ]
|
||||
|
||||
# Limit the number of alerts an alerting rule and series a recording
|
||||
# rule can produce. 0 is no limit.
|
||||
[ limit: <int> | default = 0 ]
|
||||
@@ -801,7 +794,9 @@ Try the following recommendations to reduce the chance of hitting the data delay
|
||||
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution). For example,
|
||||
if expression is `rate(my_metric[2m]) > 0` then ensure that `my_metric` resolution is at least `1m` or better `30s`.
|
||||
If you use VictoriaMetrics as datasource, `[duration]` can be omitted and VictoriaMetrics will adjust it automatically.
|
||||
* Extend `[duration]` in expr to help tolerate the delay. For example, `max_over_time(errors_total[10m]) > 0` will be active even if there is no data in datasource for last `9m`.
|
||||
* If you know in advance, that data in datasource is delayed - try changing vmalerts `-datasource.lookback`
|
||||
command-line flag to add a time shift for evaluations. Or extend `[duration]` to tolerate the delay.
|
||||
For example, `max_over_time(errors_total[10m]) > 0` will be active even if there is no data in datasource for last `9m`.
|
||||
* If [time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution)
|
||||
in datasource is inconsistent or `>=5min` - try changing vmalerts `-datasource.queryStep` command-line flag to specify
|
||||
how far search query can lookback for the recent datapoint. The recommendation is to have the step
|
||||
@@ -809,12 +804,9 @@ at least two times bigger than the resolution.
|
||||
|
||||
> Please note, data delay is inevitable in distributed systems. And it is better to account for it instead of ignoring.
|
||||
|
||||
By default, recently written samples to VictoriaMetrics [aren't visible for queries](https://docs.victoriametrics.com/keyConcepts.html#query-latency)
|
||||
for up to 30s (see `-search.latencyOffset` command-line flag at vmselect or VictoriaMetrics single-node). Such delay is needed to eliminate risk of
|
||||
incomplete data on the moment of querying, due to chance that metrics collectors won't be able to deliver that data in time.
|
||||
To compensate the latency in timestamps for produced evaluation results, `-rule.evalDelay` is also set to `30s` by default.
|
||||
If you expect data to be delayed for longer intervals (it gets buffered, queued, or just network is slow sometimes)
|
||||
- consider increasing the `-rule.evalDelay` value accordingly.
|
||||
By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s
|
||||
(see `-search.latencyOffset` command-line flag at vmselect). Such delay is needed to eliminate risk of incomplete
|
||||
data on the moment of querying, since metrics collectors won't be able to deliver the data in time.
|
||||
|
||||
### Alerts state
|
||||
|
||||
@@ -977,7 +969,7 @@ The shortlist of configuration flags is the following:
|
||||
-datasource.headers string
|
||||
Optional HTTP extraHeaders to send with each request to the corresponding -datasource.url. For example, -datasource.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -datasource.url. Multiple headers must be delimited by '^^': -datasource.headers='header1:value1^^header2:value2'
|
||||
-datasource.lookback duration
|
||||
Will be deprecated soon, please adjust "-search.latencyOffset" at datasource side or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
|
||||
Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
|
||||
-datasource.maxIdleConnections int
|
||||
Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state. (default 100)
|
||||
-datasource.oauth2.clientID string
|
||||
@@ -1045,12 +1037,6 @@ The shortlist of configuration flags is the following:
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
@@ -1231,7 +1217,7 @@ The shortlist of configuration flags is the following:
|
||||
-remoteWrite.bearerTokenFile string
|
||||
Optional path to bearer token file to use for -remoteWrite.url.
|
||||
-remoteWrite.concurrency int
|
||||
Defines number of writers for concurrent writing into remote write endpoint (default 1)
|
||||
Defines number of writers for concurrent writing into remote querier (default 1)
|
||||
-remoteWrite.disablePathAppend
|
||||
Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.
|
||||
-remoteWrite.flushInterval duration
|
||||
@@ -1301,14 +1287,13 @@ The shortlist of configuration flags is the following:
|
||||
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
|
||||
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-rule.evalDelay time
|
||||
Adjustment of the time parameter for rule evaluation requests to compensate intentional data delay from the datasource.Normally, should be equal to `-search.latencyOffset` (cmd-line flag configured for VictoriaMetrics single-node or vmselect). (default 30s)
|
||||
-rule.maxResolveDuration duration
|
||||
Limits the maxiMum duration for automatic alert expiration, which by default is 4 times evaluationInterval of the parent group
|
||||
Limits the maximum duration for automatic alert expiration, which by default is 4 times evaluationInterval of the parent group.
|
||||
-rule.resendDelay duration
|
||||
MiniMum amount of time to wait before resending an alert to notifier
|
||||
Minimum amount of time to wait before resending an alert to notifier
|
||||
-rule.templates array
|
||||
Path or glob pattern to location with go template definitions for rules annotations templating. Flag can be specified multiple times.
|
||||
Path or glob pattern to location with go template definitions
|
||||
for rules annotations templating. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule.templates="/path/to/file". Path to a single file with go templates
|
||||
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
|
||||
@@ -1397,11 +1382,8 @@ For example:
|
||||
```yaml
|
||||
static_configs:
|
||||
- targets:
|
||||
# support using full url
|
||||
- 'http://alertmanager:9093/test/api/v2/alerts'
|
||||
- 'https://alertmanager:9093/api/v2/alerts'
|
||||
# the following target with only host:port will be used as <scheme>://localhost:9093/<path_prefix>/api/v2/alerts
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
|
||||
consul_sd_configs:
|
||||
- server: localhost:8500
|
||||
|
||||
@@ -19,14 +19,11 @@ import (
|
||||
// Group contains list of Rules grouped into
|
||||
// entity with one name and evaluation interval
|
||||
type Group struct {
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutils.Duration `yaml:"interval,omitempty"`
|
||||
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *promutils.Duration `yaml:"eval_delay,omitempty"`
|
||||
Type Type `yaml:"type,omitempty"`
|
||||
File string
|
||||
Name string `yaml:"name"`
|
||||
Interval *promutils.Duration `yaml:"interval,omitempty"`
|
||||
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
|
||||
Limit int `yaml:"limit,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
@@ -236,7 +233,7 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
|
||||
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
}
|
||||
return parse(files, validateTplFn, validateExpressions)
|
||||
}
|
||||
@@ -245,11 +242,11 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
|
||||
func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
|
||||
files, err := readFromFS(pathPatterns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read from the config: %w", err)
|
||||
return nil, fmt.Errorf("failed to read from the config: %s", err)
|
||||
}
|
||||
groups, err := parse(files, validateTplFn, validateExpressions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %s: %w", pathPatterns, err)
|
||||
return nil, fmt.Errorf("failed to parse %s: %s", pathPatterns, err)
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
|
||||
@@ -106,7 +106,7 @@ func TestParseBad(t *testing.T) {
|
||||
},
|
||||
{
|
||||
[]string{"http://unreachable-url"},
|
||||
"failed to",
|
||||
"failed to read",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
|
||||
@@ -49,7 +49,7 @@ func (fs *FS) Read(files []string) (map[string][]byte, error) {
|
||||
path, resp.StatusCode, http.StatusOK, data)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read %q: %w", path, err)
|
||||
return nil, fmt.Errorf("cannot read %q: %s", path, err)
|
||||
}
|
||||
result[path] = data
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ groups:
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
type: prometheus
|
||||
eval_delay: 30s
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: sum(vm_tcplistener_conns) by (instance) > 1
|
||||
|
||||
@@ -43,9 +43,7 @@ var (
|
||||
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url.")
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Will be deprecated soon, please adjust "-search.latencyOffset" at datasource side `+
|
||||
`or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. `+
|
||||
`For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
|
||||
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
|
||||
"If set to 0, rule's evaluation interval will be used instead.")
|
||||
@@ -85,10 +83,7 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
return nil, fmt.Errorf("datasource.url is empty")
|
||||
}
|
||||
if !*queryTimeAlignment {
|
||||
logger.Warnf("flag `-datasource.queryTimeAlignment` is deprecated and will be removed in next releases. Please use `eval_alignment` in rule group instead.")
|
||||
}
|
||||
if *lookBack != 0 {
|
||||
logger.Warnf("flag `-datasource.lookback` will be deprecated soon. Please use `-rule.evalDelay` command-line flag instead. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 for details.")
|
||||
logger.Warnf("flag `datasource.queryTimeAlignment` is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead")
|
||||
}
|
||||
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
@@ -118,7 +113,7 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
}
|
||||
_, err = authCfg.GetAuthHeader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
|
||||
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %s", *addr, err)
|
||||
}
|
||||
|
||||
return &VMStorage{
|
||||
|
||||
@@ -142,30 +142,24 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Resu
|
||||
return Result{}, nil, err
|
||||
}
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// Return unexpected error to the caller.
|
||||
return Result{}, nil, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = s.newQueryRequest(query, ts)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
req, _ = s.newQueryRequest(query, ts)
|
||||
resp, err = s.do(ctx, req)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return Result{}, req, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
|
||||
// Process the received response.
|
||||
parseFn := parsePrometheusResponse
|
||||
if s.dataSourceType != datasourcePrometheus {
|
||||
parseFn = parseGraphiteResponse
|
||||
}
|
||||
result, err := parseFn(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return result, req, err
|
||||
}
|
||||
|
||||
@@ -184,30 +178,22 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
|
||||
}
|
||||
req, err := s.newQueryRangeRequest(query, start, end)
|
||||
if err != nil {
|
||||
return res, err
|
||||
return Result{}, err
|
||||
}
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// Return unexpected error to the caller.
|
||||
return res, err
|
||||
}
|
||||
// Something in the middle between client and datasource might be closing
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// something in the middle between client and datasource might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
req, err = s.newQueryRangeRequest(query, start, end)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
req, _ = s.newQueryRangeRequest(query, start, end)
|
||||
resp, err = s.do(ctx, req)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Process the received response.
|
||||
res, err = parsePrometheusResponse(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return res, err
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
return parsePrometheusResponse(req, resp)
|
||||
}
|
||||
|
||||
func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
|
||||
@@ -233,7 +219,7 @@ func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response,
|
||||
func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*http.Request, error) {
|
||||
req, err := s.newRequest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", s.datasourceURL, err)
|
||||
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %s", s.datasourceURL, err)
|
||||
}
|
||||
s.setPrometheusRangeReqParams(req, query, start, end)
|
||||
return req, nil
|
||||
@@ -242,7 +228,7 @@ func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*h
|
||||
func (s *VMStorage) newQueryRequest(query string, ts time.Time) (*http.Request, error) {
|
||||
req, err := s.newRequest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", s.datasourceURL, err)
|
||||
return nil, fmt.Errorf("cannot create query request to datasource %q: %s", s.datasourceURL, err)
|
||||
}
|
||||
switch s.dataSourceType {
|
||||
case "", datasourcePrometheus:
|
||||
|
||||
@@ -112,14 +112,14 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
|
||||
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
}
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
var pi promInstant
|
||||
if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
|
||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||
return res, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
|
||||
@@ -47,8 +47,8 @@ all files with prefix rule_ in folder dir.
|
||||
See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
|
||||
`)
|
||||
|
||||
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions `+
|
||||
`for rules annotations templating. Flag can be specified multiple times.
|
||||
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions
|
||||
for rules annotations templating. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule.templates="/path/to/file". Path to a single file with go templates
|
||||
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
|
||||
@@ -230,9 +230,7 @@ func newManager(ctx context.Context) (*manager, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init remoteWrite: %w", err)
|
||||
}
|
||||
if rw != nil {
|
||||
manager.rw = rw
|
||||
}
|
||||
manager.rw = rw
|
||||
|
||||
rr, err := remoteread.Init()
|
||||
if err != nil {
|
||||
|
||||
@@ -3,6 +3,7 @@ package notifier
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"gopkg.in/yaml.v2"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
@@ -10,8 +11,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
@@ -143,23 +142,26 @@ func parseLabels(target string, metaLabels *promutils.Labels, cfg *Config) (stri
|
||||
if labels.Len() == 0 {
|
||||
return "", nil, nil
|
||||
}
|
||||
scheme := labels.Get("__scheme__")
|
||||
if len(scheme) == 0 {
|
||||
scheme = "http"
|
||||
schemeRelabeled := labels.Get("__scheme__")
|
||||
if len(schemeRelabeled) == 0 {
|
||||
schemeRelabeled = "http"
|
||||
}
|
||||
alertsPath := labels.Get("__alerts_path__")
|
||||
if !strings.HasPrefix(alertsPath, "/") {
|
||||
alertsPath = "/" + alertsPath
|
||||
}
|
||||
address := labels.Get("__address__")
|
||||
if len(address) == 0 {
|
||||
addressRelabeled := labels.Get("__address__")
|
||||
if len(addressRelabeled) == 0 {
|
||||
return "", nil, nil
|
||||
}
|
||||
address = addMissingPort(scheme, address)
|
||||
u := fmt.Sprintf("%s://%s%s", scheme, address, alertsPath)
|
||||
if strings.Contains(addressRelabeled, "/") {
|
||||
return "", nil, nil
|
||||
}
|
||||
addressRelabeled = addMissingPort(schemeRelabeled, addressRelabeled)
|
||||
alertsPathRelabeled := labels.Get("__alerts_path__")
|
||||
if !strings.HasPrefix(alertsPathRelabeled, "/") {
|
||||
alertsPathRelabeled = "/" + alertsPathRelabeled
|
||||
}
|
||||
u := fmt.Sprintf("%s://%s%s", schemeRelabeled, addressRelabeled, alertsPathRelabeled)
|
||||
if _, err := url.Parse(u); err != nil {
|
||||
return "", nil, fmt.Errorf("invalid url %q for scheme=%q (%q), target=%q, metrics_path=%q (%q): %w",
|
||||
u, cfg.Scheme, scheme, target, address, alertsPath, err)
|
||||
u, cfg.Scheme, schemeRelabeled, target, addressRelabeled, alertsPathRelabeled, err)
|
||||
}
|
||||
return u, labels, nil
|
||||
}
|
||||
@@ -179,24 +181,9 @@ func addMissingPort(scheme, target string) string {
|
||||
func mergeLabels(target string, metaLabels *promutils.Labels, cfg *Config) *promutils.Labels {
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
||||
m := promutils.NewLabels(3 + metaLabels.Len())
|
||||
address := target
|
||||
scheme := cfg.Scheme
|
||||
alertsPath := path.Join("/", cfg.PathPrefix, alertManagerPath)
|
||||
// try to extract optional scheme and alertsPath from __address__.
|
||||
if strings.HasPrefix(address, "http://") {
|
||||
scheme = "http"
|
||||
address = address[len("http://"):]
|
||||
} else if strings.HasPrefix(address, "https://") {
|
||||
scheme = "https"
|
||||
address = address[len("https://"):]
|
||||
}
|
||||
if n := strings.IndexByte(address, '/'); n >= 0 {
|
||||
alertsPath = address[n:]
|
||||
address = address[:n]
|
||||
}
|
||||
m.Add("__address__", address)
|
||||
m.Add("__scheme__", scheme)
|
||||
m.Add("__alerts_path__", alertsPath)
|
||||
m.Add("__address__", target)
|
||||
m.Add("__scheme__", cfg.Scheme)
|
||||
m.Add("__alerts_path__", path.Join("/", cfg.PathPrefix, alertManagerPath))
|
||||
m.AddFrom(metaLabels)
|
||||
return m
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ func (cw *configWatcher) reload(path string) error {
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||
targets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
for _, err := range errors {
|
||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
return fmt.Errorf("failed to init notifier for %q: %s", typeK, err)
|
||||
}
|
||||
|
||||
cw.setTargets(typeK, targets)
|
||||
@@ -107,7 +107,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
|
||||
}
|
||||
updateTargets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
for _, err := range errors {
|
||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
logger.Errorf("failed to init notifier for %q: %s", typeK, err)
|
||||
}
|
||||
cw.setTargets(typeK, updateTargets)
|
||||
}
|
||||
@@ -118,7 +118,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
|
||||
func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator) ([]Target, []error) {
|
||||
metaLabels, err := labelsFn()
|
||||
if err != nil {
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %s", err)}
|
||||
}
|
||||
var targets []Target
|
||||
var errors []error
|
||||
@@ -167,11 +167,11 @@ func (cw *configWatcher) start() error {
|
||||
for _, target := range cfg.Targets {
|
||||
address, labels, err := parseLabels(target, nil, cw.cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
||||
return fmt.Errorf("failed to parse labels for target %q: %s", target, err)
|
||||
}
|
||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
||||
return fmt.Errorf("failed to init alertmanager for addr %q: %s", address, err)
|
||||
}
|
||||
targets = append(targets, Target{
|
||||
Notifier: notifier,
|
||||
@@ -189,14 +189,14 @@ func (cw *configWatcher) start() error {
|
||||
sdc := &cw.cfg.ConsulSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
return nil, fmt.Errorf("got labels err: %s", err)
|
||||
}
|
||||
labels = append(labels, targetLabels...)
|
||||
}
|
||||
return labels, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
||||
return fmt.Errorf("failed to start consulSD discovery: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,14 +207,14 @@ func (cw *configWatcher) start() error {
|
||||
sdc := &cw.cfg.DNSSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
return nil, fmt.Errorf("got labels err: %s", err)
|
||||
}
|
||||
labels = append(labels, targetLabels...)
|
||||
}
|
||||
return labels, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
||||
return fmt.Errorf("failed to start DNSSD discovery: %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -318,47 +318,3 @@ func TestMergeHTTPClientConfigs(t *testing.T) {
|
||||
t.Fatalf("expected BasicAuth tp be present")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLabels(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
target string
|
||||
cfg *Config
|
||||
expectedAddress string
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
"invalid address",
|
||||
"invalid:*//url",
|
||||
&Config{},
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"use some default params",
|
||||
"alertmanager:9093",
|
||||
&Config{PathPrefix: "test"},
|
||||
"http://alertmanager:9093/test/api/v2/alerts",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"use target address",
|
||||
"https://alertmanager:9093/api/v1/alerts",
|
||||
&Config{Scheme: "http", PathPrefix: "test"},
|
||||
"https://alertmanager:9093/api/v1/alerts",
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
address, _, err := parseLabels(tc.target, nil, tc.cfg)
|
||||
if err == nil == tc.expectedErr {
|
||||
t.Fatalf("unexpected error; got %t; want %t", err != nil, tc.expectedErr)
|
||||
}
|
||||
if address != tc.expectedAddress {
|
||||
t.Fatalf("unexpected address; got %q; want %q", address, tc.expectedAddress)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,7 +90,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
|
||||
externalLabels = extLabels
|
||||
eu, err := url.Parse(externalURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse external URL: %w", err)
|
||||
return nil, fmt.Errorf("failed to parse external URL: %s", err)
|
||||
}
|
||||
|
||||
templates.UpdateWithFuncs(templates.FuncsWithExternalURL(eu))
|
||||
@@ -116,7 +116,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
|
||||
if len(*addrs) > 0 {
|
||||
notifiers, err := notifiersFromFlags(gen)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||
return nil, fmt.Errorf("failed to create notifier from flag values: %s", err)
|
||||
}
|
||||
staticNotifiersFn = func() []Notifier {
|
||||
return notifiers
|
||||
@@ -126,7 +126,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
|
||||
|
||||
cw, err = newWatcher(*configPath, gen)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init config watcher: %w", err)
|
||||
return nil, fmt.Errorf("failed to init config watcher: %s", err)
|
||||
}
|
||||
return cw.notifiers, nil
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ static_configs:
|
||||
- targets:
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
- https://localhost:9093/test/api/v2/alerts
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: bar
|
||||
|
||||
@@ -3,7 +3,6 @@ package remotewrite
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -118,19 +117,12 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
// Push adds timeseries into queue for writing into remote storage.
|
||||
// Push returns and error if client is stopped or if queue is full.
|
||||
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
|
||||
rwTotal.Inc()
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(s.Samples))
|
||||
droppedBytes.Add(s.Size())
|
||||
return fmt.Errorf("client is closed")
|
||||
case c.input <- s:
|
||||
return nil
|
||||
default:
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(s.Samples))
|
||||
droppedBytes.Add(s.Size())
|
||||
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
|
||||
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
|
||||
c.maxQueueSize)
|
||||
@@ -189,14 +181,11 @@ func (c *Client) run(ctx context.Context) {
|
||||
}
|
||||
|
||||
var (
|
||||
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||
|
||||
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
|
||||
@@ -233,11 +222,6 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
L:
|
||||
for attempts := 0; ; attempts++ {
|
||||
err := c.send(ctx, b)
|
||||
if errors.Is(err, io.EOF) {
|
||||
// Something in the middle between client and destination might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
err = c.send(ctx, b)
|
||||
}
|
||||
if err == nil {
|
||||
sentRows.Add(len(wr.Timeseries))
|
||||
sentBytes.Add(len(b))
|
||||
@@ -275,7 +259,6 @@ L:
|
||||
|
||||
}
|
||||
|
||||
rwErrors.Inc()
|
||||
droppedRows.Add(len(wr.Timeseries))
|
||||
droppedBytes.Add(len(b))
|
||||
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
|
||||
@@ -299,9 +282,7 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
if c.authCfg != nil {
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return &nonRetriableError{
|
||||
err: err,
|
||||
}
|
||||
return &nonRetriableError{err: err}
|
||||
}
|
||||
}
|
||||
if !*disablePathAppend {
|
||||
@@ -320,14 +301,13 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
// Prometheus remote Write compatible receivers MUST
|
||||
switch resp.StatusCode / 100 {
|
||||
case 2:
|
||||
// respond with HTTP 2xx status code when write is successful.
|
||||
// respond with a HTTP 2xx status code when the write is successful.
|
||||
return nil
|
||||
case 4:
|
||||
if resp.StatusCode != http.StatusTooManyRequests {
|
||||
// MUST NOT retry write requests on HTTP 4xx responses other than 429
|
||||
return &nonRetriableError{
|
||||
err: fmt.Errorf("unexpected response code %d for %s. Response body %q", resp.StatusCode, req.URL.Redacted(), body),
|
||||
}
|
||||
return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||
resp.StatusCode, req.URL.Redacted(), body)}
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
|
||||
@@ -30,7 +30,7 @@ var (
|
||||
|
||||
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines max number of timeseries to be flushed at once")
|
||||
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote write endpoint")
|
||||
concurrency = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote querier")
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
|
||||
|
||||
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
|
||||
|
||||
@@ -36,11 +36,11 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
|
||||
}
|
||||
tFrom, err := time.Parse(time.RFC3339, *replayFrom)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %q: %w", *replayFrom, err)
|
||||
return fmt.Errorf("failed to parse %q: %s", *replayFrom, err)
|
||||
}
|
||||
tTo, err := time.Parse(time.RFC3339, *replayTo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %q: %w", *replayTo, err)
|
||||
return fmt.Errorf("failed to parse %q: %s", *replayTo, err)
|
||||
}
|
||||
if !tTo.After(tFrom) {
|
||||
return fmt.Errorf("replay.timeTo must be bigger than replay.timeFrom")
|
||||
|
||||
@@ -91,7 +91,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
entries: make([]StateEntry, entrySize),
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, file=%q, id="%d"`, ar.Name, group.Name, group.File, ar.ID())
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.alertsMu.RLock()
|
||||
@@ -269,7 +269,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
Expr: ar.Expr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.processed[k] = v
|
||||
@@ -295,33 +295,24 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
}
|
||||
|
||||
// execRange executes alerting rule on the given time range similarly to exec.
|
||||
// When making consecutive calls make sure to respect time linearity for start and end params,
|
||||
// as this function modifies AlertingRule alerts state.
|
||||
// It is not thread safe.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as a result.
|
||||
// It doesn't update internal states of the Rule and meant to be used just
|
||||
// to get time series for backfilling.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
||||
func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var result []prompbmarshal.TimeSeries
|
||||
holdAlertState := make(map[uint64]*notifier.Alert)
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
|
||||
}
|
||||
for _, s := range res.Data {
|
||||
ls, err := ar.toLabels(s, qFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
}
|
||||
|
||||
// if alert is instant, For: 0
|
||||
if ar.For == 0 {
|
||||
if ar.For == 0 { // if alert is instant
|
||||
a.State = notifier.StateFiring
|
||||
for i := range s.Values {
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
@@ -333,32 +324,18 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
prevT := time.Time{}
|
||||
for i := range s.Values {
|
||||
at := time.Unix(s.Timestamps[i], 0)
|
||||
// try to restore alert's state on the first iteration
|
||||
if at.Equal(start) {
|
||||
if _, ok := ar.alerts[h]; ok {
|
||||
a = ar.alerts[h]
|
||||
prevT = at
|
||||
}
|
||||
}
|
||||
if at.Sub(prevT) > ar.EvalInterval {
|
||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = at
|
||||
a.Start = time.Time{}
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For {
|
||||
a.State = notifier.StateFiring
|
||||
a.Start = at
|
||||
}
|
||||
prevT = at
|
||||
result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
|
||||
|
||||
// save alert's state on last iteration, so it can be used on the next execRange call
|
||||
if at.Equal(end) {
|
||||
holdAlertState[h] = a
|
||||
}
|
||||
}
|
||||
}
|
||||
ar.alerts = holdAlertState
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -411,7 +388,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
for _, m := range res.Data {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
curState.Err = fmt.Errorf("failed to expand labels: %w", err)
|
||||
curState.Err = fmt.Errorf("failed to expand labels: %s", err)
|
||||
return nil, curState.Err
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
@@ -536,7 +513,7 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
if ls == nil {
|
||||
ls, err = ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
}
|
||||
a := ¬ifier.Alert{
|
||||
@@ -614,41 +591,44 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
||||
return nil
|
||||
}
|
||||
|
||||
nameStr := fmt.Sprintf("%s=%q", alertNameLabel, ar.Name)
|
||||
if !*disableAlertGroupLabel {
|
||||
nameStr = fmt.Sprintf("%s=%q,%s=%q", alertGroupNameLabel, ar.GroupName, alertNameLabel, ar.Name)
|
||||
}
|
||||
var labelsFilter string
|
||||
for k, v := range ar.Labels {
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s%s}[%ds])",
|
||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||
}
|
||||
|
||||
if len(res.Data) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
return nil
|
||||
}
|
||||
for _, series := range res.Data {
|
||||
series.DelLabel("__name__")
|
||||
labelSet := make(map[string]string, len(series.Labels))
|
||||
for _, v := range series.Labels {
|
||||
labelSet[v.Name] = v.Value
|
||||
}
|
||||
id := hash(labelSet)
|
||||
a, ok := ar.alerts[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, a := range ar.alerts {
|
||||
if a.Restored || a.State != notifier.StatePending {
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(series.Values[0]), 0)
|
||||
|
||||
var labelsFilter []string
|
||||
for k, v := range a.Labels {
|
||||
labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
|
||||
}
|
||||
sort.Strings(labelsFilter)
|
||||
expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
|
||||
alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
|
||||
|
||||
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
qMetrics := res.Data
|
||||
if len(qMetrics) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
continue
|
||||
}
|
||||
|
||||
// only one series expected in response
|
||||
m := qMetrics[0]
|
||||
// __name__ supposed to be alertForStateMetricName
|
||||
m.DelLabel("__name__")
|
||||
|
||||
// we assume that restore query contains all label matchers,
|
||||
// so all received labels will match anyway if their number is equal.
|
||||
if len(m.Labels) != len(a.Labels) {
|
||||
ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
|
||||
continue
|
||||
}
|
||||
a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
|
||||
a.Restored = true
|
||||
logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
|
||||
}
|
||||
|
||||
@@ -346,18 +346,15 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||
testCases := []struct {
|
||||
rule *AlertingRule
|
||||
data []datasource.Metric
|
||||
expAlerts []*notifier.Alert
|
||||
expHoldAlertStateAlerts map[uint64]*notifier.Alert
|
||||
rule *AlertingRule
|
||||
data []datasource.Metric
|
||||
expAlerts []*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestAlertingRule("empty", 0),
|
||||
[]datasource.Metric{},
|
||||
nil,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("empty labels", 0),
|
||||
@@ -367,7 +364,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing", 0),
|
||||
@@ -380,7 +376,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
State: notifier.StateFiring,
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("single-firing-on-range", 0),
|
||||
@@ -392,7 +387,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StateFiring},
|
||||
{State: notifier.StateFiring},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-pending", time.Second),
|
||||
@@ -404,16 +398,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-pending"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-pending",
|
||||
Labels: map[string]string{"alertname": "for-pending"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-firing", 3*time.Second),
|
||||
@@ -425,38 +409,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-firing"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-firing",
|
||||
Labels: map[string]string{"alertname": "for-firing"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for-hold-pending", time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 2, 5}},
|
||||
},
|
||||
[]*notifier.Alert{
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-hold-pending"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "for-hold-pending",
|
||||
Labels: map[string]string{"alertname": "for-hold-pending"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: time.Second,
|
||||
}},
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
|
||||
@@ -470,10 +422,9 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
newTestAlertingRule("multi-series", 3*time.Second),
|
||||
newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
|
||||
[]datasource.Metric{
|
||||
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
||||
{
|
||||
@@ -485,6 +436,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
||||
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
||||
//
|
||||
{
|
||||
State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
|
||||
Labels: map[string]string{
|
||||
@@ -498,29 +450,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{"alertname": "multi-series"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "multi-series",
|
||||
Labels: map[string]string{"alertname": "multi-series"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
},
|
||||
hash(map[string]string{"alertname": "multi-series", "foo": "bar"}): {
|
||||
GroupID: fakeGroup.ID(),
|
||||
Name: "multi-series",
|
||||
Labels: map[string]string{"alertname": "multi-series", "foo": "bar"},
|
||||
Annotations: map[string]string{},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
For: 3 * time.Second,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
|
||||
@@ -548,16 +477,16 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
"source": "vm",
|
||||
}},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
tc.rule.q = fq
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
fq.Add(tc.data...)
|
||||
gotTS, err := tc.rule.execRange(context.TODO(), time.Unix(1, 0), time.Unix(5, 0))
|
||||
gotTS, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
@@ -583,11 +512,6 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||
t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
|
||||
}
|
||||
}
|
||||
if tc.expHoldAlertStateAlerts != nil {
|
||||
if !reflect.DeepEqual(tc.expHoldAlertStateAlerts, tc.rule.alerts) {
|
||||
t.Fatalf("expected hold alerts state: \n%v but got \n%v", tc.expHoldAlertStateAlerts, tc.rule.alerts)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -640,9 +564,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
if got.ActiveAt != exp.ActiveAt {
|
||||
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
||||
}
|
||||
if got.Name != exp.Name {
|
||||
t.Fatalf("expected alertname %q; got %q", exp.Name, got.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -658,7 +579,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
@@ -672,7 +592,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -680,7 +599,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||
// two rules, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("bar", ts))
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
||||
@@ -688,11 +607,9 @@ func TestGroup_Restore(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "bar",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -710,11 +627,9 @@ func TestGroup_Restore(t *testing.T) {
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "bar",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -727,7 +642,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
@@ -740,7 +654,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
@@ -753,7 +666,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
|
||||
@@ -31,9 +31,7 @@ var (
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||
"which by default is 4 times evaluationInterval of the parent group")
|
||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the `time` parameter for rule evaluation requests to compensate intentional data delay from the datasource."+
|
||||
"Normally, should be equal to `-search.latencyOffset` (cmd-line flag configured for VictoriaMetrics single-node or vmselect).")
|
||||
"which by default is 4 times evaluationInterval of the parent ")
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
@@ -41,16 +39,13 @@ var (
|
||||
|
||||
// Group is an entity for grouping rules
|
||||
type Group struct {
|
||||
mu sync.RWMutex
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type config.Type
|
||||
Interval time.Duration
|
||||
EvalOffset *time.Duration
|
||||
// EvalDelay will adjust timestamp for rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *time.Duration
|
||||
mu sync.RWMutex
|
||||
Name string
|
||||
File string
|
||||
Rules []Rule
|
||||
Type config.Type
|
||||
Interval time.Duration
|
||||
EvalOffset *time.Duration
|
||||
Limit int
|
||||
Concurrency int
|
||||
Checksum string
|
||||
@@ -144,9 +139,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
if cfg.EvalOffset != nil {
|
||||
g.EvalOffset = &cfg.EvalOffset.D
|
||||
}
|
||||
if cfg.EvalDelay != nil {
|
||||
g.EvalDelay = &cfg.EvalDelay.D
|
||||
}
|
||||
for _, h := range cfg.Headers {
|
||||
g.Headers[h.Key] = h.Value
|
||||
}
|
||||
@@ -339,7 +331,7 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
g.infof("started")
|
||||
@@ -528,7 +520,7 @@ func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
if len(g.Rules) < 1 {
|
||||
return nil
|
||||
@@ -589,14 +581,10 @@ func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||
// to 10:30, to the previous evaluationInterval.
|
||||
return ts.Add(-g.Interval)
|
||||
}
|
||||
// when `eval_offset` is using, ts shouldn't be effect by `eval_alignment` and `eval_delay`
|
||||
// since it should be always aligned.
|
||||
// EvalOffset shouldn't interfere with evalAlignment,
|
||||
// so we return it immediately
|
||||
return ts
|
||||
}
|
||||
|
||||
timestamp = timestamp.Add(-g.getEvalDelay())
|
||||
|
||||
// always apply the alignment as a last step
|
||||
if g.evalAlignment == nil || *g.evalAlignment {
|
||||
// align query time with interval to get similar result with grafana when plotting time series.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
@@ -606,13 +594,6 @@ func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||
return timestamp
|
||||
}
|
||||
|
||||
func (g *Group) getEvalDelay() time.Duration {
|
||||
if g.EvalDelay != nil {
|
||||
return *g.EvalDelay
|
||||
}
|
||||
return *evalDelay
|
||||
}
|
||||
|
||||
// executor contains group's notify and rw configs
|
||||
type executor struct {
|
||||
Notifiers func() []notifier.Notifier
|
||||
@@ -621,11 +602,11 @@ type executor struct {
|
||||
Rw remotewrite.RWClient
|
||||
|
||||
previouslySentSeriesToRWMu sync.Mutex
|
||||
// previouslySentSeriesToRW stores series sent to RW on previous iteration
|
||||
// PreviouslySentSeriesToRW stores series sent to RW on previous iteration
|
||||
// map[ruleID]map[ruleLabels][]prompb.Label
|
||||
// where `ruleID` is ID of the Rule within a Group
|
||||
// and `ruleLabels` is []prompb.Label marshalled to a string
|
||||
previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
|
||||
PreviouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
|
||||
}
|
||||
|
||||
// execConcurrently executes rules concurrently if concurrency>1
|
||||
@@ -663,6 +644,9 @@ var (
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
remoteWriteTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||
)
|
||||
|
||||
func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
|
||||
@@ -683,7 +667,9 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
|
||||
var lastErr error
|
||||
for _, ts := range tss {
|
||||
remoteWriteTotal.Inc()
|
||||
if err := e.Rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
lastErr = fmt.Errorf("rule %q: remote write failure: %w", r, err)
|
||||
}
|
||||
}
|
||||
@@ -737,7 +723,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
|
||||
var staleS []prompbmarshal.TimeSeries
|
||||
// check whether there are series which disappeared and need to be marked as stale
|
||||
e.previouslySentSeriesToRWMu.Lock()
|
||||
for key, labels := range e.previouslySentSeriesToRW[rID] {
|
||||
for key, labels := range e.PreviouslySentSeriesToRW[rID] {
|
||||
if _, ok := ruleLabels[key]; ok {
|
||||
continue
|
||||
}
|
||||
@@ -746,7 +732,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
|
||||
staleS = append(staleS, ss)
|
||||
}
|
||||
// set previous series to current
|
||||
e.previouslySentSeriesToRW[rID] = ruleLabels
|
||||
e.PreviouslySentSeriesToRW[rID] = ruleLabels
|
||||
e.previouslySentSeriesToRWMu.Unlock()
|
||||
|
||||
return staleS
|
||||
@@ -764,14 +750,14 @@ func (e *executor) purgeStaleSeries(activeRules []Rule) {
|
||||
|
||||
for _, rule := range activeRules {
|
||||
id := rule.ID()
|
||||
prev, ok := e.previouslySentSeriesToRW[id]
|
||||
prev, ok := e.PreviouslySentSeriesToRW[id]
|
||||
if ok {
|
||||
// keep previous series for staleness detection
|
||||
newPreviouslySentSeriesToRW[id] = prev
|
||||
}
|
||||
}
|
||||
e.previouslySentSeriesToRW = nil
|
||||
e.previouslySentSeriesToRW = newPreviouslySentSeriesToRW
|
||||
e.PreviouslySentSeriesToRW = nil
|
||||
e.PreviouslySentSeriesToRW = newPreviouslySentSeriesToRW
|
||||
|
||||
e.previouslySentSeriesToRWMu.Unlock()
|
||||
}
|
||||
|
||||
@@ -321,7 +321,7 @@ func TestResolveDuration(t *testing.T) {
|
||||
func TestGetStaleSeries(t *testing.T) {
|
||||
ts := time.Now()
|
||||
e := &executor{
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
|
||||
t.Helper()
|
||||
@@ -414,7 +414,7 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||
f := func(curRules, newRules, expStaleRules []Rule) {
|
||||
t.Helper()
|
||||
e := &executor{
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
// seed executor with series for
|
||||
// current rules
|
||||
@@ -424,13 +424,13 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||
|
||||
e.purgeStaleSeries(newRules)
|
||||
|
||||
if len(e.previouslySentSeriesToRW) != len(expStaleRules) {
|
||||
if len(e.PreviouslySentSeriesToRW) != len(expStaleRules) {
|
||||
t.Fatalf("expected to get %d stale series, got %d",
|
||||
len(expStaleRules), len(e.previouslySentSeriesToRW))
|
||||
len(expStaleRules), len(e.PreviouslySentSeriesToRW))
|
||||
}
|
||||
|
||||
for _, exp := range expStaleRules {
|
||||
if _, ok := e.previouslySentSeriesToRW[exp.ID()]; !ok {
|
||||
if _, ok := e.PreviouslySentSeriesToRW[exp.ID()]; !ok {
|
||||
t.Fatalf("expected to have rule %d; got nil instead", exp.ID())
|
||||
}
|
||||
}
|
||||
@@ -515,7 +515,7 @@ func TestFaultyRW(t *testing.T) {
|
||||
|
||||
e := &executor{
|
||||
Rw: &remotewrite.Client{},
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
err := e.exec(context.Background(), r, time.Now(), 0, 10)
|
||||
@@ -628,7 +628,6 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
|
||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
offset := 30 * time.Minute
|
||||
evalDelay := 1 * time.Minute
|
||||
disableAlign := false
|
||||
testCases := []struct {
|
||||
name string
|
||||
@@ -636,7 +635,7 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
originTS, expTS string
|
||||
}{
|
||||
{
|
||||
"with query align + default evalDelay",
|
||||
"with query align",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
},
|
||||
@@ -644,16 +643,16 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"without query align + default evalDelay",
|
||||
"without query align",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
evalAlignment: &disableAlign,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:10:30+00:00",
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset, find previous offset point + default evalDelay",
|
||||
"with eval_offset, find previous offset point",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
@@ -662,7 +661,7 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
"2023-08-28T10:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset + default evalDelay",
|
||||
"with eval_offset",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
@@ -670,44 +669,14 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"1h interval with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"1m interval with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Minute,
|
||||
},
|
||||
"2023-08-28T11:41:13+00:00",
|
||||
"2023-08-28T11:40:00+00:00",
|
||||
},
|
||||
{
|
||||
"disable alignment with eval_delay",
|
||||
&Group{
|
||||
EvalDelay: &evalDelay,
|
||||
Interval: time.Hour,
|
||||
evalAlignment: &disableAlign,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:40:00+00:00",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
originT, _ := time.Parse(time.RFC3339, tc.originTS)
|
||||
expT, _ := time.Parse(time.RFC3339, tc.expTS)
|
||||
gotTS := tc.g.adjustReqTimestamp(originT)
|
||||
if !gotTS.Equal(expT) {
|
||||
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
|
||||
}
|
||||
})
|
||||
originT, _ := time.Parse(time.RFC3339, tc.originTS)
|
||||
expT, _ := time.Parse(time.RFC3339, tc.expTS)
|
||||
gotTS := tc.g.adjustReqTimestamp(originT)
|
||||
if !gotTS.Equal(expT) {
|
||||
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
entries: make([]StateEntry, entrySize),
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID())
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
|
||||
@@ -166,7 +166,7 @@ func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRul
|
||||
var n int
|
||||
for _, ts := range tss {
|
||||
if err := rw.Push(ts); err != nil {
|
||||
return n, fmt.Errorf("remote write failure: %w", err)
|
||||
return n, fmt.Errorf("remote write failure: %s", err)
|
||||
}
|
||||
n += len(ts.Samples)
|
||||
}
|
||||
|
||||
@@ -147,11 +147,11 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
|
||||
}
|
||||
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
|
||||
if err != nil {
|
||||
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
|
||||
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramRuleID, err)
|
||||
}
|
||||
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||
if err != nil {
|
||||
@@ -163,11 +163,11 @@ func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||
return nil, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
|
||||
}
|
||||
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
|
||||
return nil, fmt.Errorf("failed to read %q param: %s", paramAlertID, err)
|
||||
}
|
||||
a, err := rh.m.alertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
|
||||
@@ -94,10 +94,6 @@ type apiGroup struct {
|
||||
NotifierHeaders []string `json:"notifier_headers,omitempty"`
|
||||
// Labels is a set of label value pairs, that will be added to every rule.
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
// EvalOffset Group will be evaluated at the exact time offset on the range of [0...evaluationInterval]
|
||||
EvalOffset float64 `json:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
EvalDelay float64 `json:"eval_delay,omitempty"`
|
||||
}
|
||||
|
||||
// groupAlerts represents a group of alerts for WEB view
|
||||
@@ -313,12 +309,6 @@ func groupToAPI(g *rule.Group) apiGroup {
|
||||
|
||||
Labels: g.Labels,
|
||||
}
|
||||
if g.EvalOffset != nil {
|
||||
ag.EvalOffset = g.EvalOffset.Seconds()
|
||||
}
|
||||
if g.EvalDelay != nil {
|
||||
ag.EvalDelay = g.EvalDelay.Seconds()
|
||||
}
|
||||
ag.Rules = make([]apiRule, 0)
|
||||
for _, r := range g.Rules {
|
||||
ag.Rules = append(ag.Rules, ruleToAPI(r))
|
||||
|
||||
@@ -32,38 +32,6 @@ Pass `-help` to `vmauth` in order to see all the supported command-line flags wi
|
||||
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML,
|
||||
accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.com/vmgateway.html).
|
||||
|
||||
## Dropping request path prefix
|
||||
|
||||
By default `vmauth` doesn't drop the path prefix from the original request when proxying the request to the matching backend.
|
||||
Sometimes it is needed to drop path prefix before routing the request to the backend. This can be done by specifying the number of `/`-delimited
|
||||
prefix parts to drop from the request path via `drop_src_path_prefix_parts` option at `url_map` level or at `user` level.
|
||||
|
||||
For example, if you need to serve requests to [vmalert](https://docs.victoriametrics.com/vmalert.html) at `/vmalert/` path prefix,
|
||||
while serving requests to [vmagent](https://docs.victoriametrics.com/vmagent.html) at `/vmagent/` path prefix for a particular user,
|
||||
then the following [-auth.config](#auth-config) can be used:
|
||||
|
||||
```yml
|
||||
users:
|
||||
- username: foo
|
||||
url_map:
|
||||
|
||||
# proxy all the requests, which start with `/vmagent/`, to vmagent backend
|
||||
- src_paths:
|
||||
- "/vmagent/.+"
|
||||
|
||||
# drop /vmagent/ path prefix from the original request before proxying it to url_prefix.
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmagent-backend:8429/"
|
||||
|
||||
# proxy all the requests, which start with `/vmalert`, to vmalert backend
|
||||
- src_paths:
|
||||
- "/vmalert/.+"
|
||||
|
||||
# drop /vmalert/ path prefix from the original request before proxying it to url_prefix.
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmalert-backend:8880/"
|
||||
```
|
||||
|
||||
## Load balancing
|
||||
|
||||
Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
|
||||
@@ -133,31 +101,6 @@ The following [metrics](#monitoring) related to concurrency limits are exposed b
|
||||
- `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
|
||||
|
||||
## Backend TLS setup
|
||||
|
||||
By default `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option
|
||||
in the [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config). These settings can be overridden with the following command-line flags:
|
||||
|
||||
- `-backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
|
||||
via `tls_insecure_skip_verify` option. For example:
|
||||
|
||||
```yml
|
||||
- username: "foo"
|
||||
url_prefix: "https://localhost"
|
||||
tls_insecure_skip_verify: true
|
||||
```
|
||||
|
||||
- `-backend.tlsCAFile` allows specifying the path to TLS Root CA, which will be used for TLS verification when connecting to HTTPS backends.
|
||||
The `-backend.tlsCAFile` may point either to local file or to `http` / `https` url.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
|
||||
via `tls_ca_file` option. For example:
|
||||
|
||||
```yml
|
||||
- username: "foo"
|
||||
url_prefix: "https://localhost"
|
||||
tls_ca_file: "/path/to/tls/root/ca"
|
||||
```
|
||||
|
||||
## IP filters
|
||||
|
||||
@@ -238,15 +181,6 @@ users:
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to https://localhost:8428.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
|
||||
# TLS verification is skipped for https://localhost.
|
||||
- username: "local-single-node-with-tls"
|
||||
password: "***"
|
||||
url_prefix: "https://localhost"
|
||||
tls_insecure_skip_verify: true
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
|
||||
@@ -288,8 +222,6 @@ users:
|
||||
# For example, request to http://vmauth:8427/non/existing/path are proxied:
|
||||
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
#
|
||||
# Regular expressions are allowed in `src_paths` entries.
|
||||
- username: "foobar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
@@ -316,8 +248,6 @@ users:
|
||||
# Requests are routed in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the routed requests.
|
||||
# The requests are re-tried if url_prefix backends send 500 or 503 response status codes.
|
||||
# Note that the unauthorized_user section takes precedence when processing a route without credentials,
|
||||
# even if such a route also exists in the users section (see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5236).
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
- http://vmselect-az1/?deny_partial_response=1
|
||||
@@ -478,12 +408,6 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"encoding/base64"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
@@ -15,14 +14,13 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -40,25 +38,20 @@ type AuthConfig struct {
|
||||
|
||||
// UserInfo is user information read from authConfigPath
|
||||
type UserInfo struct {
|
||||
Name string `yaml:"name,omitempty"`
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
URLMaps []URLMap `yaml:"url_map,omitempty"`
|
||||
HeadersConf HeadersConf `yaml:",inline"`
|
||||
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
|
||||
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
TLSInsecureSkipVerify *bool `yaml:"tls_insecure_skip_verify,omitempty"`
|
||||
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
||||
Name string `yaml:"name,omitempty"`
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
URLMaps []URLMap `yaml:"url_map,omitempty"`
|
||||
HeadersConf HeadersConf `yaml:",inline"`
|
||||
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
|
||||
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
|
||||
concurrencyLimitCh chan struct{}
|
||||
concurrencyLimitReached *metrics.Counter
|
||||
|
||||
httpTransport *http.Transport
|
||||
|
||||
requests *metrics.Counter
|
||||
requestsDuration *metrics.Summary
|
||||
}
|
||||
@@ -120,11 +113,10 @@ func (h *Header) MarshalYAML() (interface{}, error) {
|
||||
|
||||
// URLMap is a mapping from source paths to target urls.
|
||||
type URLMap struct {
|
||||
SrcPaths []*SrcPath `yaml:"src_paths,omitempty"`
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
HeadersConf HeadersConf `yaml:",inline"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
SrcPaths []*SrcPath `yaml:"src_paths,omitempty"`
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
HeadersConf HeadersConf `yaml:",inline"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
}
|
||||
|
||||
// SrcPath represents an src path
|
||||
@@ -428,18 +420,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
}
|
||||
ui := ac.UnauthorizedUser
|
||||
if ui != nil {
|
||||
if ui.Username != "" {
|
||||
return nil, fmt.Errorf("field username can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.Password != "" {
|
||||
return nil, fmt.Errorf("field password can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.BearerToken != "" {
|
||||
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.Name != "" {
|
||||
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total`)
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds`)
|
||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||
@@ -450,11 +430,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_current`, func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
|
||||
}
|
||||
ui.httpTransport = tr
|
||||
}
|
||||
return &ac, nil
|
||||
}
|
||||
@@ -525,12 +500,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
|
||||
}
|
||||
ui.httpTransport = tr
|
||||
|
||||
byAuthToken[at1] = ui
|
||||
byAuthToken[at2] = ui
|
||||
}
|
||||
|
||||
@@ -221,26 +221,22 @@ func TestParseAuthConfigSuccess(t *testing.T) {
|
||||
}
|
||||
|
||||
// Single user
|
||||
insecureSkipVerifyTrue := true
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: http://aaa:343/bbb
|
||||
max_concurrent_requests: 5
|
||||
tls_insecure_skip_verify: true
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("", "foo", "bar"): {
|
||||
Username: "foo",
|
||||
Password: "bar",
|
||||
URLPrefix: mustParseURL("http://aaa:343/bbb"),
|
||||
MaxConcurrentRequests: 5,
|
||||
TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
|
||||
},
|
||||
})
|
||||
|
||||
// Multiple url_prefix entries
|
||||
insecureSkipVerifyFalse := false
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
@@ -248,9 +244,6 @@ users:
|
||||
url_prefix:
|
||||
- http://node1:343/bbb
|
||||
- http://node2:343/bbb
|
||||
tls_insecure_skip_verify: false
|
||||
retry_status_codes: [500, 501]
|
||||
drop_src_path_prefix_parts: 1
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("", "foo", "bar"): {
|
||||
Username: "foo",
|
||||
@@ -259,9 +252,6 @@ users:
|
||||
"http://node1:343/bbb",
|
||||
"http://node2:343/bbb",
|
||||
}),
|
||||
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
|
||||
RetryStatusCodes: []int{500, 501},
|
||||
DropSrcPathPrefixParts: 1,
|
||||
},
|
||||
})
|
||||
|
||||
@@ -458,47 +448,6 @@ users:
|
||||
|
||||
}
|
||||
|
||||
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
|
||||
c := `
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: https://aaa/bbb
|
||||
max_concurrent_requests: 5
|
||||
tls_insecure_skip_verify: true
|
||||
|
||||
unauthorized_user:
|
||||
url_prefix: http://aaa:343/bbb
|
||||
max_concurrent_requests: 5
|
||||
tls_insecure_skip_verify: false
|
||||
`
|
||||
|
||||
ac, err := parseAuthConfig([]byte(c))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
m, err := parseAuthConfigUsers(ac)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
ui := m[getAuthToken("", "foo", "bar")]
|
||||
if !isSetBool(ui.TLSInsecureSkipVerify, true) || !ui.httpTransport.TLSClientConfig.InsecureSkipVerify {
|
||||
t.Fatalf("unexpected TLSInsecureSkipVerify value for user foo")
|
||||
}
|
||||
|
||||
if !isSetBool(ac.UnauthorizedUser.TLSInsecureSkipVerify, false) || ac.UnauthorizedUser.httpTransport.TLSClientConfig.InsecureSkipVerify {
|
||||
t.Fatalf("unexpected TLSInsecureSkipVerify value for unauthorized_user")
|
||||
}
|
||||
}
|
||||
|
||||
func isSetBool(boolP *bool, expectedValue bool) bool {
|
||||
if boolP == nil {
|
||||
return false
|
||||
}
|
||||
return *boolP == expectedValue
|
||||
}
|
||||
|
||||
func getSrcPaths(paths []string) []*SrcPath {
|
||||
var sps []*SrcPath
|
||||
for _, path := range paths {
|
||||
|
||||
@@ -42,15 +42,6 @@ users:
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to https://localhost:8428
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
|
||||
# TLS verification is ignored for https://localhost.
|
||||
- username: "local-single-node-with-tls"
|
||||
password: "***"
|
||||
url_prefix: "https://localhost"
|
||||
tls_insecure_skip_verify: true
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
|
||||
@@ -91,8 +82,6 @@ users:
|
||||
# For example, request to http://vmauth:8427/non/existing/path are proxied:
|
||||
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
#
|
||||
# Regular expressions are allowed in `src_paths` entries.
|
||||
- username: "foobar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
|
||||
@@ -20,8 +20,6 @@ users:
|
||||
# For example, request to http://vmauth:8427/non/existing/path are proxied:
|
||||
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
#
|
||||
# Regular expressions are allowed in `src_paths` entries.
|
||||
- username: "foobar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
|
||||
@@ -2,8 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
@@ -17,19 +15,16 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -51,10 +46,6 @@ var (
|
||||
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
|
||||
"Bigger values may require more memory")
|
||||
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
|
||||
"See https://docs.victoriametrics.com/vmauth.html#backend-tls-setup")
|
||||
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
|
||||
"See https://docs.victoriametrics.com/vmauth.html#backend-tls-setup")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -164,19 +155,11 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
u := normalizeURL(r.URL)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u)
|
||||
isDefault := false
|
||||
if up == nil {
|
||||
missingRouteRequests.Inc()
|
||||
if ui.DefaultURL == nil {
|
||||
// Authorization should be requested for http requests without credentials
|
||||
// to a route that is not in the configuration for unauthorized user.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5236
|
||||
if ui.BearerToken == "" && ui.Username == "" && len(*authUsers.Load()) > 0 {
|
||||
w.Header().Set("WWW-Authenticate", `Basic realm="Restricted"`)
|
||||
http.Error(w, "missing `Authorization` request header", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
missingRouteRequests.Inc()
|
||||
httpserver.Errorf(w, r, "missing route for %q", u.String())
|
||||
return
|
||||
}
|
||||
@@ -198,9 +181,9 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
query.Set("request_path", u.Path)
|
||||
targetURL.RawQuery = query.Encode()
|
||||
} else { // Update path for regular routes.
|
||||
targetURL = mergeURLs(targetURL, u, dropSrcPathPrefixParts)
|
||||
targetURL = mergeURLs(targetURL, u)
|
||||
}
|
||||
ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes, ui.httpTransport)
|
||||
ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes)
|
||||
bu.put()
|
||||
if ok {
|
||||
return
|
||||
@@ -214,12 +197,12 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
}
|
||||
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, transport *http.Transport) bool {
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int) bool {
|
||||
// This code has been copied from net/http/httputil/reverseproxy.go
|
||||
req := sanitizeRequestHeaders(r)
|
||||
req.URL = targetURL
|
||||
req.Host = targetURL.Host
|
||||
updateHeadersByConfig(req.Header, hc.RequestHeaders)
|
||||
transportOnce.Do(transportInit)
|
||||
res, err := transport.RoundTrip(req)
|
||||
rtb, rtbOK := req.Body.(*readTrackingBody)
|
||||
if err != nil {
|
||||
@@ -362,77 +345,23 @@ var (
|
||||
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
|
||||
)
|
||||
|
||||
func getTransport(insecureSkipVerifyP *bool, caFile string) (*http.Transport, error) {
|
||||
if insecureSkipVerifyP == nil {
|
||||
insecureSkipVerifyP = backendTLSInsecureSkipVerify
|
||||
}
|
||||
insecureSkipVerify := *insecureSkipVerifyP
|
||||
if caFile == "" {
|
||||
caFile = *backendTLSCAFile
|
||||
}
|
||||
var (
|
||||
transport *http.Transport
|
||||
transportOnce sync.Once
|
||||
)
|
||||
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
bb.B = appendTransportKey(bb.B[:0], insecureSkipVerify, caFile)
|
||||
|
||||
transportMapLock.Lock()
|
||||
defer transportMapLock.Unlock()
|
||||
|
||||
tr := transportMap[string(bb.B)]
|
||||
if tr == nil {
|
||||
trLocal, err := newTransport(insecureSkipVerify, caFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
transportMap[string(bb.B)] = trLocal
|
||||
tr = trLocal
|
||||
}
|
||||
|
||||
return tr, nil
|
||||
}
|
||||
|
||||
var transportMap = make(map[string]*http.Transport)
|
||||
var transportMapLock sync.Mutex
|
||||
|
||||
func appendTransportKey(dst []byte, insecureSkipVerify bool, caFile string) []byte {
|
||||
dst = encoding.MarshalBool(dst, insecureSkipVerify)
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(caFile))
|
||||
return dst
|
||||
}
|
||||
|
||||
var bbPool bytesutil.ByteBufferPool
|
||||
|
||||
func newTransport(insecureSkipVerify bool, caFile string) (*http.Transport, error) {
|
||||
func transportInit() {
|
||||
tr := http.DefaultTransport.(*http.Transport).Clone()
|
||||
tr.ResponseHeaderTimeout = *responseTimeout
|
||||
// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
|
||||
tr.DisableCompression = true
|
||||
// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
|
||||
tr.ForceAttemptHTTP2 = false
|
||||
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
|
||||
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
|
||||
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
|
||||
}
|
||||
tlsCfg := tr.TLSClientConfig
|
||||
if tlsCfg == nil {
|
||||
tlsCfg = &tls.Config{}
|
||||
tr.TLSClientConfig = tlsCfg
|
||||
}
|
||||
if insecureSkipVerify || caFile != "" {
|
||||
tlsCfg.ClientSessionCache = tls.NewLRUClientSessionCache(0)
|
||||
tlsCfg.InsecureSkipVerify = insecureSkipVerify
|
||||
if caFile != "" {
|
||||
data, err := fs.ReadFileOrHTTP(caFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read tls_ca_file: %w", err)
|
||||
}
|
||||
rootCA := x509.NewCertPool()
|
||||
if !rootCA.AppendCertsFromPEM(data) {
|
||||
return nil, fmt.Errorf("cannot parse data read from tls_ca_file %q", caFile)
|
||||
}
|
||||
tlsCfg.RootCAs = rootCA
|
||||
}
|
||||
}
|
||||
return tr, nil
|
||||
transport = tr
|
||||
}
|
||||
|
||||
var (
|
||||
|
||||
@@ -6,13 +6,12 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int) *url.URL {
|
||||
func mergeURLs(uiURL, requestURI *url.URL) *url.URL {
|
||||
targetURL := *uiURL
|
||||
srcPath := dropPrefixParts(requestURI.Path, dropSrcPathPrefixParts)
|
||||
if strings.HasPrefix(srcPath, "/") {
|
||||
if strings.HasPrefix(requestURI.Path, "/") {
|
||||
targetURL.Path = strings.TrimSuffix(targetURL.Path, "/")
|
||||
}
|
||||
targetURL.Path += srcPath
|
||||
targetURL.Path += requestURI.Path
|
||||
requestParams := requestURI.Query()
|
||||
// fast path
|
||||
if len(requestParams) == 0 {
|
||||
@@ -33,34 +32,18 @@ func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int) *url.URL
|
||||
return &targetURL
|
||||
}
|
||||
|
||||
func dropPrefixParts(path string, parts int) string {
|
||||
if parts <= 0 {
|
||||
return path
|
||||
}
|
||||
for parts > 0 {
|
||||
path = strings.TrimPrefix(path, "/")
|
||||
n := strings.IndexByte(path, '/')
|
||||
if n < 0 {
|
||||
return ""
|
||||
}
|
||||
path = path[n:]
|
||||
parts--
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int, int) {
|
||||
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int) {
|
||||
for _, e := range ui.URLMaps {
|
||||
for _, sp := range e.SrcPaths {
|
||||
if sp.match(u.Path) {
|
||||
return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes, e.DropSrcPathPrefixParts
|
||||
return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes
|
||||
}
|
||||
}
|
||||
}
|
||||
if ui.URLPrefix != nil {
|
||||
return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes, ui.DropSrcPathPrefixParts
|
||||
return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes
|
||||
}
|
||||
return nil, HeadersConf{}, nil, 0
|
||||
return nil, HeadersConf{}, nil
|
||||
}
|
||||
|
||||
func normalizeURL(uOrig *url.URL) *url.URL {
|
||||
|
||||
@@ -7,91 +7,20 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDropPrefixParts(t *testing.T) {
|
||||
f := func(path string, parts int, expectedResult string) {
|
||||
t.Helper()
|
||||
|
||||
result := dropPrefixParts(path, parts)
|
||||
if result != expectedResult {
|
||||
t.Fatalf("unexpected result; got %q; want %q", result, expectedResult)
|
||||
}
|
||||
}
|
||||
|
||||
f("", 0, "")
|
||||
f("", 1, "")
|
||||
f("", 10, "")
|
||||
f("foo", 0, "foo")
|
||||
f("foo", -1, "foo")
|
||||
f("foo", 1, "")
|
||||
|
||||
f("/foo", 0, "/foo")
|
||||
f("/foo/bar", 0, "/foo/bar")
|
||||
f("/foo/bar/baz", 0, "/foo/bar/baz")
|
||||
|
||||
f("foo", 0, "foo")
|
||||
f("foo/bar", 0, "foo/bar")
|
||||
f("foo/bar/baz", 0, "foo/bar/baz")
|
||||
|
||||
f("/foo/", 0, "/foo/")
|
||||
f("/foo/bar/", 0, "/foo/bar/")
|
||||
f("/foo/bar/baz/", 0, "/foo/bar/baz/")
|
||||
|
||||
f("/foo", 1, "")
|
||||
f("/foo/bar", 1, "/bar")
|
||||
f("/foo/bar/baz", 1, "/bar/baz")
|
||||
|
||||
f("foo", 1, "")
|
||||
f("foo/bar", 1, "/bar")
|
||||
f("foo/bar/baz", 1, "/bar/baz")
|
||||
|
||||
f("/foo/", 1, "/")
|
||||
f("/foo/bar/", 1, "/bar/")
|
||||
f("/foo/bar/baz/", 1, "/bar/baz/")
|
||||
|
||||
f("/foo", 2, "")
|
||||
f("/foo/bar", 2, "")
|
||||
f("/foo/bar/baz", 2, "/baz")
|
||||
|
||||
f("foo", 2, "")
|
||||
f("foo/bar", 2, "")
|
||||
f("foo/bar/baz", 2, "/baz")
|
||||
|
||||
f("/foo/", 2, "")
|
||||
f("/foo/bar/", 2, "/")
|
||||
f("/foo/bar/baz/", 2, "/baz/")
|
||||
|
||||
f("/foo", 3, "")
|
||||
f("/foo/bar", 3, "")
|
||||
f("/foo/bar/baz", 3, "")
|
||||
|
||||
f("foo", 3, "")
|
||||
f("foo/bar", 3, "")
|
||||
f("foo/bar/baz", 3, "")
|
||||
|
||||
f("/foo/", 3, "")
|
||||
f("/foo/bar/", 3, "")
|
||||
f("/foo/bar/baz/", 3, "/")
|
||||
|
||||
f("/foo/", 4, "")
|
||||
f("/foo/bar/", 4, "")
|
||||
f("/foo/bar/baz/", 4, "")
|
||||
}
|
||||
|
||||
func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string,
|
||||
expectedRetryStatusCodes []int, expectedDropSrcPathPrefixParts int) {
|
||||
f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string, expectedRetryStatusCodes []int) {
|
||||
t.Helper()
|
||||
u, err := url.Parse(requestURI)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
u = normalizeURL(u)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u)
|
||||
if up == nil {
|
||||
t.Fatalf("cannot determie backend: %s", err)
|
||||
}
|
||||
bu := up.getLeastLoadedBackendURL()
|
||||
target := mergeURLs(bu.url, u, dropSrcPathPrefixParts)
|
||||
target := mergeURLs(bu.url, u)
|
||||
bu.put()
|
||||
if target.String() != expectedTarget {
|
||||
t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
|
||||
@@ -103,14 +32,11 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
if !reflect.DeepEqual(retryStatusCodes, expectedRetryStatusCodes) {
|
||||
t.Fatalf("unexpected retryStatusCodes; got %d; want %d", retryStatusCodes, expectedRetryStatusCodes)
|
||||
}
|
||||
if dropSrcPathPrefixParts != expectedDropSrcPathPrefixParts {
|
||||
t.Fatalf("unexpected dropSrcPathPrefixParts; got %d; want %d", dropSrcPathPrefixParts, expectedDropSrcPathPrefixParts)
|
||||
}
|
||||
}
|
||||
// Simple routing with `url_prefix`
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "", "http://foo.bar/.", "[]", "[]", nil, 0)
|
||||
}, "", "http://foo.bar/.", "[]", "[]", nil)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
HeadersConf: HeadersConf{
|
||||
@@ -119,30 +45,29 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
Value: "aaa",
|
||||
}},
|
||||
},
|
||||
RetryStatusCodes: []int{503, 501},
|
||||
DropSrcPathPrefixParts: 2,
|
||||
}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, 2)
|
||||
RetryStatusCodes: []int{503, 501},
|
||||
}, "/", "http://foo.bar", `[{"bb" "aaa"}]`, `[]`, []int{503, 501})
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/federate"),
|
||||
}, "/", "http://foo.bar/federate", "[]", "[]", nil, 0)
|
||||
}, "/", "http://foo.bar/federate", "[]", "[]", nil)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, 0)
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, 0)
|
||||
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, 0)
|
||||
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, 0)
|
||||
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil)
|
||||
|
||||
// Complex routing with `url_map`
|
||||
ui := &UserInfo{
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/vmsingle/api/v1/query"}),
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
|
||||
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
|
||||
HeadersConf: HeadersConf{
|
||||
RequestHeaders: []Header{
|
||||
@@ -162,8 +87,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
RetryStatusCodes: []int{503, 500, 501},
|
||||
DropSrcPathPrefixParts: 1,
|
||||
RetryStatusCodes: []int{503, 500, 501},
|
||||
},
|
||||
{
|
||||
SrcPaths: getSrcPaths([]string{"/api/v1/write"}),
|
||||
@@ -181,12 +105,11 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
Value: "y",
|
||||
}},
|
||||
},
|
||||
RetryStatusCodes: []int{502},
|
||||
DropSrcPathPrefixParts: 2,
|
||||
RetryStatusCodes: []int{502},
|
||||
}
|
||||
f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, 1)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
|
||||
f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, 2)
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501})
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil)
|
||||
f(ui, "/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502})
|
||||
|
||||
// Complex routing regexp paths in `url_map`
|
||||
ui = &UserInfo{
|
||||
@@ -202,17 +125,17 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
URLPrefix: mustParseURL("http://default-server"),
|
||||
}
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil)
|
||||
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil)
|
||||
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil)
|
||||
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"),
|
||||
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, 0)
|
||||
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"),
|
||||
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, 0)
|
||||
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil)
|
||||
}
|
||||
|
||||
func TestCreateTargetURLFailure(t *testing.T) {
|
||||
@@ -223,7 +146,7 @@ func TestCreateTargetURLFailure(t *testing.T) {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
u = normalizeURL(u)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u)
|
||||
if up != nil {
|
||||
t.Fatalf("unexpected non-empty up=%#v", up)
|
||||
}
|
||||
@@ -236,9 +159,6 @@ func TestCreateTargetURLFailure(t *testing.T) {
|
||||
if retryStatusCodes != nil {
|
||||
t.Fatalf("unexpected non-empty retryStatusCodes=%d", retryStatusCodes)
|
||||
}
|
||||
if dropSrcPathPrefixParts != 0 {
|
||||
t.Fatalf("unexpected non-zero dropSrcPathPrefixParts=%d", dropSrcPathPrefixParts)
|
||||
}
|
||||
}
|
||||
f(&UserInfo{}, "/foo/bar")
|
||||
f(&UserInfo{
|
||||
|
||||
@@ -316,12 +316,6 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
|
||||
@@ -168,7 +168,7 @@ See the docs at https://docs.victoriametrics.com/vmbackup.html .
|
||||
|
||||
func newSrcFS() (*fslocal.FS, error) {
|
||||
if err := snapshot.Validate(*snapshotName); err != nil {
|
||||
return nil, fmt.Errorf("invalid -snapshotName=%q: %w", *snapshotName, err)
|
||||
return nil, fmt.Errorf("invalid -snapshotName=%q: %s", *snapshotName, err)
|
||||
}
|
||||
snapshotPath := filepath.Join(*storageDataPath, "snapshots", *snapshotName)
|
||||
|
||||
|
||||
@@ -450,12 +450,6 @@ command-line flags:
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//go:build aix || linux || solaris || zos
|
||||
// +build aix linux solaris zos
|
||||
|
||||
package terminal
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//go:build darwin || freebsd || openbsd
|
||||
// +build darwin freebsd openbsd
|
||||
|
||||
package terminal
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
package terminal
|
||||
|
||||
|
||||
@@ -353,12 +353,6 @@ The shortlist of configuration flags include the following:
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
|
||||
@@ -211,3 +211,7 @@ func pushAggregateSeries(tss []prompbmarshal.TimeSeries) {
|
||||
logger.Errorf("cannot flush aggregate series: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func GetAggregators() map[string]*streamaggr.Aggregators {
|
||||
return map[string]*streamaggr.Aggregators{"default": sasGlobal.Load()}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"embed"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
@@ -326,6 +327,9 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/stream-agg":
|
||||
streamaggr.WriteHumanReadableState(w, r, vminsertCommon.GetAggregators())
|
||||
return true
|
||||
case "/ready":
|
||||
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
|
||||
errMsg := fmt.Sprintf("waiting for scrape config to init targets, configs left: %d", rdy)
|
||||
|
||||
@@ -113,12 +113,6 @@ i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/q
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
|
||||
@@ -81,7 +81,7 @@ var funcs = func() map[string]*funcInfo {
|
||||
var m map[string]*funcInfo
|
||||
if err := json.Unmarshal(funcsJSON, &m); err != nil {
|
||||
// Do not use logger.Panicf, since it isn't ready yet.
|
||||
panic(fmt.Errorf("cannot parse funcsJSON: %w", err))
|
||||
panic(fmt.Errorf("cannot parse funcsJSON: %s", err))
|
||||
}
|
||||
return m
|
||||
}()
|
||||
|
||||
@@ -197,7 +197,7 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
|
||||
return err
|
||||
}
|
||||
if err := b.UnmarshalData(); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal block during export: %w", err)
|
||||
return fmt.Errorf("cannot unmarshal block during export: %s", err)
|
||||
}
|
||||
xb := exportBlockPool.Get().(*exportBlock)
|
||||
xb.mn = mn
|
||||
@@ -407,7 +407,7 @@ func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, cp *commonPara
|
||||
return err
|
||||
}
|
||||
if err := b.UnmarshalData(); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal block during export: %w", err)
|
||||
return fmt.Errorf("cannot unmarshal block during export: %s", err)
|
||||
}
|
||||
xb := exportBlockPool.Get().(*exportBlock)
|
||||
xb.mn = mn
|
||||
|
||||
@@ -29,12 +29,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
|
||||
]
|
||||
},
|
||||
"stats":{
|
||||
{% code
|
||||
// seriesFetched is string instead of int because of historical reasons.
|
||||
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
|
||||
%}
|
||||
"seriesFetched": "{%dl qs.SeriesFetched %}",
|
||||
"executionTimeMsec": {%dl qs.ExecutionTimeMsec %}
|
||||
"seriesFetched": "{%d qs.SeriesFetched %}"
|
||||
}
|
||||
{% code
|
||||
qt.Printf("generate /api/v1/query_range response for series=%d, points=%d", seriesCount, pointsCount)
|
||||
|
||||
@@ -60,95 +60,85 @@ func StreamQueryRangeResponse(qw422016 *qt422016.Writer, rs []netstorage.Result,
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:28
|
||||
}
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:28
|
||||
qw422016.N().S(`]},"stats":{`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:33
|
||||
// seriesFetched is string instead of int because of historical reasons.
|
||||
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
|
||||
|
||||
qw422016.N().S(`]},"stats":{"seriesFetched": "`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:32
|
||||
qw422016.N().D(qs.SeriesFetched)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:32
|
||||
qw422016.N().S(`"}`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:35
|
||||
qw422016.N().S(`"seriesFetched": "`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:36
|
||||
qw422016.N().DL(qs.SeriesFetched)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:36
|
||||
qw422016.N().S(`","executionTimeMsec":`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:37
|
||||
qw422016.N().DL(qs.ExecutionTimeMsec)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:37
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
qt.Printf("generate /api/v1/query_range response for series=%d, points=%d", seriesCount, pointsCount)
|
||||
qtDone()
|
||||
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:43
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:38
|
||||
streamdumpQueryTrace(qw422016, qt)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:43
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:38
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
func WriteQueryRangeResponse(qq422016 qtio422016.Writer, rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) {
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
StreamQueryRangeResponse(qw422016, rs, qt, qtDone, qs)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
func QueryRangeResponse(rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) string {
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
WriteQueryRangeResponse(qb422016, rs, qt, qtDone, qs)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:40
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:42
|
||||
func streamqueryRangeLine(qw422016 *qt422016.Writer, r *netstorage.Result) {
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:42
|
||||
qw422016.N().S(`{"metric":`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:49
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:44
|
||||
streammetricNameObject(qw422016, &r.MetricName)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:49
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:44
|
||||
qw422016.N().S(`,"values":`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:50
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
streamvaluesWithTimestamps(qw422016, r.Values, r.Timestamps)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:50
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:45
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
func writequeryRangeLine(qq422016 qtio422016.Writer, r *netstorage.Result) {
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
streamqueryRangeLine(qw422016, r)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
func queryRangeLine(r *netstorage.Result) string {
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
writequeryRangeLine(qb422016, r)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:52
|
||||
//line app/vmselect/prometheus/query_range_response.qtpl:47
|
||||
}
|
||||
|
||||
@@ -31,12 +31,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
|
||||
]
|
||||
},
|
||||
"stats":{
|
||||
{% code
|
||||
// seriesFetched is string instead of int because of historical reasons.
|
||||
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
|
||||
%}
|
||||
"seriesFetched": "{%dl qs.SeriesFetched %}",
|
||||
"executionTimeMsec": {%dl qs.ExecutionTimeMsec %}
|
||||
"seriesFetched": "{%d qs.SeriesFetched %}"
|
||||
}
|
||||
{% code
|
||||
qt.Printf("generate /api/v1/query response for series=%d", seriesCount)
|
||||
|
||||
@@ -70,54 +70,44 @@ func StreamQueryResponse(qw422016 *qt422016.Writer, rs []netstorage.Result, qt *
|
||||
//line app/vmselect/prometheus/query_response.qtpl:30
|
||||
}
|
||||
//line app/vmselect/prometheus/query_response.qtpl:30
|
||||
qw422016.N().S(`]},"stats":{`)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:35
|
||||
// seriesFetched is string instead of int because of historical reasons.
|
||||
// It cannot be converted to int without breaking backwards compatibility at vmalert :(
|
||||
|
||||
qw422016.N().S(`]},"stats":{"seriesFetched": "`)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:34
|
||||
qw422016.N().D(qs.SeriesFetched)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:34
|
||||
qw422016.N().S(`"}`)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:37
|
||||
qw422016.N().S(`"seriesFetched": "`)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:38
|
||||
qw422016.N().DL(qs.SeriesFetched)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:38
|
||||
qw422016.N().S(`","executionTimeMsec":`)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:39
|
||||
qw422016.N().DL(qs.ExecutionTimeMsec)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:39
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
qt.Printf("generate /api/v1/query response for series=%d", seriesCount)
|
||||
qtDone()
|
||||
|
||||
//line app/vmselect/prometheus/query_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_response.qtpl:40
|
||||
streamdumpQueryTrace(qw422016, qt)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:45
|
||||
//line app/vmselect/prometheus/query_response.qtpl:40
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
func WriteQueryResponse(qq422016 qtio422016.Writer, rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) {
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
StreamQueryResponse(qw422016, rs, qt, qtDone, qs)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
func QueryResponse(rs []netstorage.Result, qt *querytracer.Tracer, qtDone func(), qs *promql.QueryStats) string {
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
WriteQueryResponse(qb422016, rs, qt, qtDone, qs)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/query_response.qtpl:47
|
||||
//line app/vmselect/prometheus/query_response.qtpl:42
|
||||
}
|
||||
|
||||
@@ -38,7 +38,6 @@ var aggrFuncs = map[string]aggrFunc{
|
||||
"median": aggrFuncMedian,
|
||||
"min": newAggrFunc(aggrFuncMin),
|
||||
"mode": newAggrFunc(aggrFuncMode),
|
||||
"outliers_iqr": aggrFuncOutliersIQR,
|
||||
"outliers_mad": aggrFuncOutliersMAD,
|
||||
"outliersk": aggrFuncOutliersK,
|
||||
"quantile": aggrFuncQuantile,
|
||||
@@ -945,58 +944,6 @@ func aggrFuncMAD(tss []*timeseries) []*timeseries {
|
||||
return tss[:1]
|
||||
}
|
||||
|
||||
func aggrFuncOutliersIQR(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
|
||||
// Calculate lower and upper bounds for interquartile range per each point across tss
|
||||
// according to Outliers section at https://en.wikipedia.org/wiki/Interquartile_range
|
||||
lower, upper := getPerPointIQRBounds(tss)
|
||||
// Leave only time series with outliers above upper bound or below lower bound
|
||||
tssDst := tss[:0]
|
||||
for _, ts := range tss {
|
||||
values := ts.Values
|
||||
for i, v := range values {
|
||||
if v > upper[i] || v < lower[i] {
|
||||
tssDst = append(tssDst, ts)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return tssDst
|
||||
}
|
||||
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, true)
|
||||
}
|
||||
|
||||
func getPerPointIQRBounds(tss []*timeseries) ([]float64, []float64) {
|
||||
if len(tss) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
pointsLen := len(tss[0].Values)
|
||||
values := make([]float64, 0, len(tss))
|
||||
var qs []float64
|
||||
lower := make([]float64, pointsLen)
|
||||
upper := make([]float64, pointsLen)
|
||||
for i := 0; i < pointsLen; i++ {
|
||||
values = values[:0]
|
||||
for _, ts := range tss {
|
||||
v := ts.Values[i]
|
||||
if !math.IsNaN(v) {
|
||||
values = append(values, v)
|
||||
}
|
||||
}
|
||||
qs := quantiles(qs[:0], iqrPhis, values)
|
||||
iqr := 1.5 * (qs[1] - qs[0])
|
||||
lower[i] = qs[0] - iqr
|
||||
upper[i] = qs[1] + iqr
|
||||
}
|
||||
return lower, upper
|
||||
}
|
||||
|
||||
var iqrPhis = []float64{0.25, 0.75}
|
||||
|
||||
func aggrFuncOutliersMAD(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||
|
||||
@@ -79,13 +79,6 @@ type incrementalAggrFuncContext struct {
|
||||
callbacks *incrementalAggrFuncCallbacks
|
||||
}
|
||||
|
||||
func (iafc *incrementalAggrFuncContext) resetState() {
|
||||
byWorkerID := iafc.byWorkerID
|
||||
for i := range byWorkerID {
|
||||
byWorkerID[i].m = make(map[string]*incrementalAggrContext, len(byWorkerID[i].m))
|
||||
}
|
||||
}
|
||||
|
||||
func newIncrementalAggrFuncContext(ae *metricsql.AggrFuncExpr, callbacks *incrementalAggrFuncCallbacks) *incrementalAggrFuncContext {
|
||||
return &incrementalAggrFuncContext{
|
||||
ae: ae,
|
||||
@@ -161,8 +154,6 @@ func (iafc *incrementalAggrFuncContext) finalizeTimeseries() []*timeseries {
|
||||
finalizeAggrFunc(iac)
|
||||
tss = append(tss, iac.ts)
|
||||
}
|
||||
// reset iafc state, so it could be re-used
|
||||
iafc.resetState()
|
||||
return tss
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
@@ -17,13 +16,11 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
@@ -36,14 +33,11 @@ var (
|
||||
"Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated "+
|
||||
"as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests . "+
|
||||
"See also -search.logQueryMemoryUsage")
|
||||
logQueryMemoryUsage = flagutil.NewBytes("search.logQueryMemoryUsage", 0, "Log query and increment vm_memory_intensive_queries_total metric each time "+
|
||||
"the query requires more memory than specified by this flag. "+
|
||||
logQueryMemoryUsage = flagutil.NewBytes("search.logQueryMemoryUsage", 0, "Log queries, which require more memory than specified by this flag. "+
|
||||
"This may help detecting and optimizing heavy queries. Query logging is disabled by default. "+
|
||||
"See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery")
|
||||
noStaleMarkers = flag.Bool("search.noStaleMarkers", false, "Set this flag to true if the database doesn't contain Prometheus stale markers, "+
|
||||
"so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets")
|
||||
minWindowForInstantRollupOptimization = flagutil.NewDuration("search.minWindowForInstantRollupOptimization", "3h", "Enable cache-based optimization for repeated queries "+
|
||||
"to /api/v1/query (aka instant queries), which contain rollup functions with lookbehind window exceeding the given value")
|
||||
)
|
||||
|
||||
// The minimum number of points per timeseries for enabling time rounding.
|
||||
@@ -66,7 +60,7 @@ func ValidateMaxPointsPerSeries(start, end, step int64, maxPoints int) error {
|
||||
|
||||
// AdjustStartEnd adjusts start and end values, so response caching may be enabled.
|
||||
//
|
||||
// See EvalConfig.mayCache() for details.
|
||||
// See EvalConfig.mayCache for details.
|
||||
func AdjustStartEnd(start, end, step int64) (int64, int64) {
|
||||
if *disableCache {
|
||||
// Do not adjust start and end values when cache is disabled.
|
||||
@@ -140,7 +134,8 @@ type EvalConfig struct {
|
||||
|
||||
// QueryStats contains various stats for the currently executed query.
|
||||
//
|
||||
// The caller must initialize QueryStats, otherwise it isn't collected.
|
||||
// The caller must initialize the QueryStats if it needs the stats.
|
||||
// Otherwise the stats isn't collected.
|
||||
QueryStats *QueryStats
|
||||
|
||||
timestamps []int64
|
||||
@@ -170,24 +165,13 @@ func copyEvalConfig(src *EvalConfig) *EvalConfig {
|
||||
// QueryStats contains various stats for the query.
|
||||
type QueryStats struct {
|
||||
// SeriesFetched contains the number of series fetched from storage during the query evaluation.
|
||||
SeriesFetched int64
|
||||
// ExecutionTimeMsec contains the number of milliseconds the query took to execute.
|
||||
ExecutionTimeMsec int64
|
||||
SeriesFetched int
|
||||
}
|
||||
|
||||
func (qs *QueryStats) addSeriesFetched(n int) {
|
||||
if qs == nil {
|
||||
return
|
||||
if qs != nil {
|
||||
qs.SeriesFetched += n
|
||||
}
|
||||
atomic.AddInt64(&qs.SeriesFetched, int64(n))
|
||||
}
|
||||
|
||||
func (qs *QueryStats) addExecutionTimeMsec(startTime time.Time) {
|
||||
if qs == nil {
|
||||
return
|
||||
}
|
||||
d := time.Since(startTime).Milliseconds()
|
||||
atomic.AddInt64(&qs.ExecutionTimeMsec, d)
|
||||
}
|
||||
|
||||
func (ec *EvalConfig) validate() {
|
||||
@@ -206,11 +190,6 @@ func (ec *EvalConfig) mayCache() bool {
|
||||
if !ec.MayCache {
|
||||
return false
|
||||
}
|
||||
if ec.Start == ec.End {
|
||||
// There is no need in aligning start and end to step for instant query
|
||||
// in order to cache its results.
|
||||
return true
|
||||
}
|
||||
if ec.Start%ec.Step != 0 {
|
||||
return false
|
||||
}
|
||||
@@ -260,7 +239,7 @@ func getTimestamps(start, end, step int64, maxPointsPerSeries int) []int64 {
|
||||
func evalExpr(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
|
||||
if qt.Enabled() {
|
||||
query := string(e.AppendString(nil))
|
||||
query = stringsutil.LimitStringLen(query, 300)
|
||||
query = bytesutil.LimitStringLen(query, 300)
|
||||
mayCache := ec.mayCache()
|
||||
qt = qt.NewChild("eval: query=%s, timeRange=%s, step=%d, mayCache=%v", query, ec.timeRangeString(), ec.Step, mayCache)
|
||||
}
|
||||
@@ -1059,595 +1038,46 @@ func removeNanValues(dstValues []float64, dstTimestamps []int64, values []float6
|
||||
return dstValues, dstTimestamps
|
||||
}
|
||||
|
||||
// evalInstantRollup evaluates instant rollup where ec.Start == ec.End.
|
||||
func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc,
|
||||
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, window int64) ([]*timeseries, error) {
|
||||
if ec.Start != ec.End {
|
||||
logger.Panicf("BUG: evalInstantRollup cannot be called on non-empty time range; got %s", ec.timeRangeString())
|
||||
}
|
||||
timestamp := ec.Start
|
||||
if qt.Enabled() {
|
||||
qt = qt.NewChild("instant rollup %s; time=%s, window=%d", expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
|
||||
defer qt.Done()
|
||||
}
|
||||
|
||||
evalAt := func(qt *querytracer.Tracer, timestamp, window int64) ([]*timeseries, error) {
|
||||
ecCopy := copyEvalConfig(ec)
|
||||
ecCopy.Start = timestamp
|
||||
ecCopy.End = timestamp
|
||||
pointsPerSeries := int64(1)
|
||||
return evalRollupFuncNoCache(qt, ecCopy, funcName, rf, expr, me, iafc, window, pointsPerSeries)
|
||||
}
|
||||
tooBigOffset := func(offset int64) bool {
|
||||
maxOffset := window / 2
|
||||
if maxOffset > 1800*1000 {
|
||||
maxOffset = 1800 * 1000
|
||||
}
|
||||
return offset >= maxOffset
|
||||
}
|
||||
deleteCachedSeries := func(qt *querytracer.Tracer) {
|
||||
rollupResultCacheV.DeleteInstantValues(qt, expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
}
|
||||
getCachedSeries := func(qt *querytracer.Tracer) ([]*timeseries, int64, error) {
|
||||
again:
|
||||
offset := int64(0)
|
||||
tssCached := rollupResultCacheV.GetInstantValues(qt, expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
ec.QueryStats.addSeriesFetched(len(tssCached))
|
||||
if len(tssCached) == 0 {
|
||||
// Cache miss. Re-populate the missing data.
|
||||
start := int64(fasttime.UnixTimestamp()*1000) - cacheTimestampOffset.Milliseconds()
|
||||
offset = timestamp - start
|
||||
if offset < 0 {
|
||||
start = timestamp
|
||||
offset = 0
|
||||
}
|
||||
if tooBigOffset(offset) {
|
||||
qt.Printf("cannot apply instant rollup optimization because the -search.cacheTimestampOffset=%s is too big "+
|
||||
"for the requested time=%s and window=%d", cacheTimestampOffset, storage.TimestampToHumanReadableFormat(timestamp), window)
|
||||
tss, err := evalAt(qt, timestamp, window)
|
||||
return tss, 0, err
|
||||
}
|
||||
qt.Printf("calculating the rollup at time=%s, because it is missing in the cache", storage.TimestampToHumanReadableFormat(start))
|
||||
tss, err := evalAt(qt, start, window)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if hasDuplicateSeries(tss) {
|
||||
qt.Printf("cannot apply instant rollup optimization because the result contains duplicate series")
|
||||
tss, err := evalAt(qt, timestamp, window)
|
||||
return tss, 0, err
|
||||
}
|
||||
rollupResultCacheV.PutInstantValues(qt, expr, window, ec.Step, ec.EnforcedTagFilterss, tss)
|
||||
return tss, offset, nil
|
||||
}
|
||||
// Cache hit. Verify whether it is OK to use the cached data.
|
||||
offset = timestamp - tssCached[0].Timestamps[0]
|
||||
if offset < 0 {
|
||||
qt.Printf("do not apply instant rollup optimization because the cached values have bigger timestamp=%s than the requested one=%s",
|
||||
storage.TimestampToHumanReadableFormat(tssCached[0].Timestamps[0]), storage.TimestampToHumanReadableFormat(timestamp))
|
||||
// Delete the outdated cached values, so the cache could be re-populated with newer values.
|
||||
deleteCachedSeries(qt)
|
||||
goto again
|
||||
}
|
||||
if tooBigOffset(offset) {
|
||||
qt.Printf("do not apply instant rollup optimization because the offset=%d between the requested timestamp "+
|
||||
"and the cached values is too big comparing to window=%d", offset, window)
|
||||
// Delete the outdated cached values, so the cache could be re-populated with newer values.
|
||||
deleteCachedSeries(qt)
|
||||
goto again
|
||||
}
|
||||
return tssCached, offset, nil
|
||||
}
|
||||
|
||||
if !ec.mayCache() {
|
||||
qt.Printf("do not apply instant rollup optimization because of disabled cache")
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
if window < minWindowForInstantRollupOptimization.Milliseconds() {
|
||||
qt.Printf("do not apply instant rollup optimization because of too small window=%d; must be equal or bigger than %d",
|
||||
window, minWindowForInstantRollupOptimization.Milliseconds())
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
switch funcName {
|
||||
case "avg_over_time":
|
||||
if iafc != nil {
|
||||
qt.Printf("do not apply instant rollup optimization for incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
qt.Printf("optimized calculation for instant rollup avg_over_time(m[d]) as (sum_over_time(m[d]) / count_over_time(m[d]))")
|
||||
fe := expr.(*metricsql.FuncExpr)
|
||||
feSum := *fe
|
||||
feSum.Name = "sum_over_time"
|
||||
feCount := *fe
|
||||
feCount.Name = "count_over_time"
|
||||
be := &metricsql.BinaryOpExpr{
|
||||
Op: "/",
|
||||
KeepMetricNames: fe.KeepMetricNames,
|
||||
Left: &feSum,
|
||||
Right: &feCount,
|
||||
}
|
||||
return evalExpr(qt, ec, be)
|
||||
case "rate":
|
||||
if iafc != nil {
|
||||
if strings.ToLower(iafc.ae.Name) != "sum" {
|
||||
qt.Printf("do not apply instant rollup optimization for incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
qt.Printf("optimized calculation for sum(rate(m[d])) as (sum(increase(m[d])) / d)")
|
||||
afe := expr.(*metricsql.AggrFuncExpr)
|
||||
fe := afe.Args[0].(*metricsql.FuncExpr)
|
||||
feIncrease := *fe
|
||||
feIncrease.Name = "increase"
|
||||
re := fe.Args[0].(*metricsql.RollupExpr)
|
||||
d := re.Window.Duration(ec.Step)
|
||||
if d == 0 {
|
||||
d = ec.Step
|
||||
}
|
||||
afeIncrease := *afe
|
||||
afeIncrease.Args = []metricsql.Expr{&feIncrease}
|
||||
be := &metricsql.BinaryOpExpr{
|
||||
Op: "/",
|
||||
KeepMetricNames: true,
|
||||
Left: &afeIncrease,
|
||||
Right: &metricsql.NumberExpr{
|
||||
N: float64(d) / 1000,
|
||||
},
|
||||
}
|
||||
return evalExpr(qt, ec, be)
|
||||
}
|
||||
qt.Printf("optimized calculation for instant rollup rate(m[d]) as (increase(m[d]) / d)")
|
||||
fe := expr.(*metricsql.FuncExpr)
|
||||
feIncrease := *fe
|
||||
feIncrease.Name = "increase"
|
||||
re := fe.Args[0].(*metricsql.RollupExpr)
|
||||
d := re.Window.Duration(ec.Step)
|
||||
if d == 0 {
|
||||
d = ec.Step
|
||||
}
|
||||
be := &metricsql.BinaryOpExpr{
|
||||
Op: "/",
|
||||
KeepMetricNames: fe.KeepMetricNames,
|
||||
Left: &feIncrease,
|
||||
Right: &metricsql.NumberExpr{
|
||||
N: float64(d) / 1000,
|
||||
},
|
||||
}
|
||||
return evalExpr(qt, ec, be)
|
||||
case "max_over_time":
|
||||
if iafc != nil {
|
||||
if strings.ToLower(iafc.ae.Name) != "max" {
|
||||
qt.Printf("do not apply instant rollup optimization for non-max incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate
|
||||
//
|
||||
// max_over_time(m[window] @ timestamp)
|
||||
//
|
||||
// as the maximum of
|
||||
//
|
||||
// - max_over_time(m[window] @ (timestamp-offset))
|
||||
// - max_over_time(m[offset] @ timestamp)
|
||||
//
|
||||
// if max_over_time(m[offset] @ (timestamp-window)) < max_over_time(m[window] @ (timestamp-offset))
|
||||
// otherwise do not apply the optimization
|
||||
//
|
||||
// where
|
||||
//
|
||||
// - max_over_time(m[window] @ (timestamp-offset)) is obtained from cache
|
||||
// - max_over_time(m[offset] @ timestamp) and max_over_time(m[offset] @ (timestamp-window)) are calculated from the storage
|
||||
// These rollups are calculated faster than max_over_time(m[window]) because offset is smaller than window.
|
||||
qtChild := qt.NewChild("optimized calculation for instant rollup %s at time=%s with lookbehind window=%d",
|
||||
expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
|
||||
defer qtChild.Done()
|
||||
|
||||
tssCached, offset, err := getCachedSeries(qtChild)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if offset == 0 {
|
||||
return tssCached, nil
|
||||
}
|
||||
// Calculate max_over_time(m[offset] @ timestamp)
|
||||
tssStart, err := evalAt(qtChild, timestamp, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if hasDuplicateSeries(tssStart) {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssStart contains duplicate series")
|
||||
return evalAt(qtChild, timestamp, window)
|
||||
}
|
||||
// Calculate max_over_time(m[offset] @ (timestamp - window))
|
||||
tssEnd, err := evalAt(qtChild, timestamp-window, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if hasDuplicateSeries(tssEnd) {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains duplicate series")
|
||||
return evalAt(qtChild, timestamp, window)
|
||||
}
|
||||
// Calculate the result
|
||||
tss, ok := getMaxInstantValues(qtChild, tssCached, tssStart, tssEnd)
|
||||
if !ok {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains bigger values than tssCached")
|
||||
deleteCachedSeries(qtChild)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
return tss, nil
|
||||
case "min_over_time":
|
||||
if iafc != nil {
|
||||
if strings.ToLower(iafc.ae.Name) != "min" {
|
||||
qt.Printf("do not apply instant rollup optimization for non-min incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate
|
||||
//
|
||||
// min_over_time(m[window] @ timestamp)
|
||||
//
|
||||
// as the minimum of
|
||||
//
|
||||
// - min_over_time(m[window] @ (timestamp-offset))
|
||||
// - min_over_time(m[offset] @ timestamp)
|
||||
//
|
||||
// if min_over_time(m[offset] @ (timestamp-window)) > min_over_time(m[window] @ (timestamp-offset))
|
||||
// otherwise do not apply the optimization
|
||||
//
|
||||
// where
|
||||
//
|
||||
// - min_over_time(m[window] @ (timestamp-offset)) is obtained from cache
|
||||
// - min_over_time(m[offset] @ timestamp) and min_over_time(m[offset] @ (timestamp-window)) are calculated from the storage
|
||||
// These rollups are calculated faster than min_over_time(m[window]) because offset is smaller than window.
|
||||
qtChild := qt.NewChild("optimized calculation for instant rollup %s at time=%s with lookbehind window=%d",
|
||||
expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
|
||||
defer qtChild.Done()
|
||||
|
||||
tssCached, offset, err := getCachedSeries(qtChild)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if offset == 0 {
|
||||
return tssCached, nil
|
||||
}
|
||||
// Calculate min_over_time(m[offset] @ timestamp)
|
||||
tssStart, err := evalAt(qtChild, timestamp, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if hasDuplicateSeries(tssStart) {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssStart contains duplicate series")
|
||||
return evalAt(qtChild, timestamp, window)
|
||||
}
|
||||
// Calculate min_over_time(m[offset] @ (timestamp - window))
|
||||
tssEnd, err := evalAt(qtChild, timestamp-window, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if hasDuplicateSeries(tssEnd) {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains duplicate series")
|
||||
return evalAt(qtChild, timestamp, window)
|
||||
}
|
||||
// Calculate the result
|
||||
tss, ok := getMinInstantValues(qtChild, tssCached, tssStart, tssEnd)
|
||||
if !ok {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains smaller values than tssCached")
|
||||
deleteCachedSeries(qtChild)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
return tss, nil
|
||||
case
|
||||
"count_eq_over_time",
|
||||
"count_gt_over_time",
|
||||
"count_le_over_time",
|
||||
"count_ne_over_time",
|
||||
"count_over_time",
|
||||
"increase",
|
||||
"increase_pure",
|
||||
"sum_over_time":
|
||||
if iafc != nil && strings.ToLower(iafc.ae.Name) != "sum" {
|
||||
qt.Printf("do not apply instant rollup optimization for non-sum incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
|
||||
// Calculate
|
||||
//
|
||||
// rf(m[window] @ timestamp)
|
||||
//
|
||||
// as
|
||||
//
|
||||
// rf(m[window] @ (timestamp-offset)) + rf(m[offset] @ timestamp) - rf(m[offset] @ (timestamp-window))
|
||||
//
|
||||
// where
|
||||
//
|
||||
// - rf is count_over_time, sum_over_time or increase
|
||||
// - rf(m[window] @ (timestamp-offset)) is obtained from cache
|
||||
// - rf(m[offset] @ timestamp) and rf(m[offset] @ (timestamp-window)) are calculated from the storage
|
||||
// These rollups are calculated faster than rf(m[window]) because offset is smaller than window.
|
||||
qtChild := qt.NewChild("optimized calculation for instant rollup %s at time=%s with lookbehind window=%d",
|
||||
expr.AppendString(nil), storage.TimestampToHumanReadableFormat(timestamp), window)
|
||||
defer qtChild.Done()
|
||||
|
||||
tssCached, offset, err := getCachedSeries(qtChild)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if offset == 0 {
|
||||
return tssCached, nil
|
||||
}
|
||||
// Calculate rf(m[offset] @ timestamp)
|
||||
tssStart, err := evalAt(qtChild, timestamp, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if hasDuplicateSeries(tssStart) {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssStart contains duplicate series")
|
||||
return evalAt(qtChild, timestamp, window)
|
||||
}
|
||||
// Calculate rf(m[offset] @ (timestamp - window))
|
||||
tssEnd, err := evalAt(qtChild, timestamp-window, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if hasDuplicateSeries(tssEnd) {
|
||||
qtChild.Printf("cannot apply instant rollup optimization, since tssEnd contains duplicate series")
|
||||
return evalAt(qtChild, timestamp, window)
|
||||
}
|
||||
// Calculate the result
|
||||
tss := getSumInstantValues(qtChild, tssCached, tssStart, tssEnd)
|
||||
return tss, nil
|
||||
default:
|
||||
qt.Printf("instant rollup optimization isn't implemented for %s()", funcName)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
}
|
||||
|
||||
func hasDuplicateSeries(tss []*timeseries) bool {
|
||||
if len(tss) <= 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
m := make(map[string]struct{}, len(tss))
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
for _, ts := range tss {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
if _, ok := m[string(bb.B)]; ok {
|
||||
return true
|
||||
}
|
||||
m[string(bb.B)] = struct{}{}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getMinInstantValues(qt *querytracer.Tracer, tssCached, tssStart, tssEnd []*timeseries) ([]*timeseries, bool) {
|
||||
qt = qt.NewChild("calculate the minimum for instant values across series; cached=%d, start=%d, end=%d", len(tssCached), len(tssStart), len(tssEnd))
|
||||
defer qt.Done()
|
||||
|
||||
getMin := func(a, b float64) float64 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
tss, ok := getMinMaxInstantValues(tssCached, tssStart, tssEnd, getMin)
|
||||
qt.Printf("resulting series=%d; ok=%v", len(tss), ok)
|
||||
return tss, ok
|
||||
}
|
||||
|
||||
func getMaxInstantValues(qt *querytracer.Tracer, tssCached, tssStart, tssEnd []*timeseries) ([]*timeseries, bool) {
|
||||
qt = qt.NewChild("calculate the maximum for instant values across series; cached=%d, start=%d, end=%d", len(tssCached), len(tssStart), len(tssEnd))
|
||||
defer qt.Done()
|
||||
|
||||
getMax := func(a, b float64) float64 {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
tss, ok := getMinMaxInstantValues(tssCached, tssStart, tssEnd, getMax)
|
||||
qt.Printf("resulting series=%d", len(tss))
|
||||
return tss, ok
|
||||
}
|
||||
|
||||
func getMinMaxInstantValues(tssCached, tssStart, tssEnd []*timeseries, f func(a, b float64) float64) ([]*timeseries, bool) {
|
||||
assertInstantValues(tssCached)
|
||||
assertInstantValues(tssStart)
|
||||
assertInstantValues(tssEnd)
|
||||
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
m := make(map[string]*timeseries, len(tssCached))
|
||||
for _, ts := range tssCached {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
if _, ok := m[string(bb.B)]; ok {
|
||||
logger.Panicf("BUG: duplicate series found: %s", &ts.MetricName)
|
||||
}
|
||||
m[string(bb.B)] = ts
|
||||
}
|
||||
|
||||
mStart := make(map[string]*timeseries, len(tssStart))
|
||||
for _, ts := range tssStart {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
if _, ok := mStart[string(bb.B)]; ok {
|
||||
logger.Panicf("BUG: duplicate series found: %s", &ts.MetricName)
|
||||
}
|
||||
mStart[string(bb.B)] = ts
|
||||
tsCached := m[string(bb.B)]
|
||||
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) {
|
||||
if !math.IsNaN(ts.Values[0]) {
|
||||
tsCached.Values[0] = f(ts.Values[0], tsCached.Values[0])
|
||||
}
|
||||
} else {
|
||||
m[string(bb.B)] = ts
|
||||
}
|
||||
}
|
||||
|
||||
for _, ts := range tssEnd {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
tsCached := m[string(bb.B)]
|
||||
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) && !math.IsNaN(ts.Values[0]) {
|
||||
if ts.Values[0] == f(ts.Values[0], tsCached.Values[0]) {
|
||||
tsStart := mStart[string(bb.B)]
|
||||
if tsStart == nil || math.IsNaN(tsStart.Values[0]) || tsStart.Values[0] != f(ts.Values[0], tsStart.Values[0]) {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rvs := make([]*timeseries, 0, len(m))
|
||||
for _, ts := range m {
|
||||
rvs = append(rvs, ts)
|
||||
}
|
||||
return rvs, true
|
||||
}
|
||||
|
||||
// getSumInstantValues calculates tssCached + tssStart - tssEnd
|
||||
func getSumInstantValues(qt *querytracer.Tracer, tssCached, tssStart, tssEnd []*timeseries) []*timeseries {
|
||||
qt = qt.NewChild("calculate the sum for instant values across series; cached=%d, start=%d, end=%d", len(tssCached), len(tssStart), len(tssEnd))
|
||||
defer qt.Done()
|
||||
|
||||
assertInstantValues(tssCached)
|
||||
assertInstantValues(tssStart)
|
||||
assertInstantValues(tssEnd)
|
||||
|
||||
m := make(map[string]*timeseries, len(tssCached))
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
for _, ts := range tssCached {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
if _, ok := m[string(bb.B)]; ok {
|
||||
logger.Panicf("BUG: duplicate series found: %s", &ts.MetricName)
|
||||
}
|
||||
m[string(bb.B)] = ts
|
||||
}
|
||||
|
||||
for _, ts := range tssStart {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
tsCached := m[string(bb.B)]
|
||||
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) {
|
||||
if !math.IsNaN(ts.Values[0]) {
|
||||
tsCached.Values[0] += ts.Values[0]
|
||||
}
|
||||
} else {
|
||||
m[string(bb.B)] = ts
|
||||
}
|
||||
}
|
||||
|
||||
for _, ts := range tssEnd {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
tsCached := m[string(bb.B)]
|
||||
if tsCached != nil && !math.IsNaN(tsCached.Values[0]) {
|
||||
if !math.IsNaN(ts.Values[0]) {
|
||||
tsCached.Values[0] -= ts.Values[0]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rvs := make([]*timeseries, 0, len(m))
|
||||
for _, ts := range m {
|
||||
rvs = append(rvs, ts)
|
||||
}
|
||||
qt.Printf("resulting series=%d", len(rvs))
|
||||
return rvs
|
||||
}
|
||||
|
||||
func assertInstantValues(tss []*timeseries) {
|
||||
for _, ts := range tss {
|
||||
if len(ts.Values) != 1 {
|
||||
logger.Panicf("BUG: instant series must contain a single value; got %d values", len(ts.Values))
|
||||
}
|
||||
if len(ts.Timestamps) != 1 {
|
||||
logger.Panicf("BUG: instant series must contain a single timestamp; got %d timestamps", len(ts.Timestamps))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
rollupResultCacheFullHits = metrics.NewCounter(`vm_rollup_result_cache_full_hits_total`)
|
||||
rollupResultCachePartialHits = metrics.NewCounter(`vm_rollup_result_cache_partial_hits_total`)
|
||||
rollupResultCacheMiss = metrics.NewCounter(`vm_rollup_result_cache_miss_total`)
|
||||
|
||||
memoryIntensiveQueries = metrics.NewCounter(`vm_memory_intensive_queries_total`)
|
||||
)
|
||||
|
||||
func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc,
|
||||
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowExpr *metricsql.DurationExpr) ([]*timeseries, error) {
|
||||
var rollupMemorySize int64
|
||||
window, err := windowExpr.NonNegativeDuration(ec.Step)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse lookbehind window in square brackets at %s: %w", expr.AppendString(nil), err)
|
||||
}
|
||||
if qt.Enabled() {
|
||||
qt = qt.NewChild("rollup %s(): timeRange=%s, step=%d, window=%d", funcName, ec.timeRangeString(), ec.Step, window)
|
||||
defer func() {
|
||||
qt.Donef("neededMemoryBytes=%d", rollupMemorySize)
|
||||
}()
|
||||
}
|
||||
if me.IsEmpty() {
|
||||
return evalNumber(ec, nan), nil
|
||||
}
|
||||
|
||||
if ec.Start == ec.End {
|
||||
rvs, err := evalInstantRollup(qt, ec, funcName, rf, expr, me, iafc, window)
|
||||
if err != nil {
|
||||
err = &UserReadableError{
|
||||
Err: err,
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
// Search for partial results in cache.
|
||||
tssCached, start := rollupResultCacheV.GetSeries(qt, ec, expr, window)
|
||||
ec.QueryStats.addSeriesFetched(len(tssCached))
|
||||
tssCached, start := rollupResultCacheV.Get(qt, ec, expr, window)
|
||||
if start > ec.End {
|
||||
qt.Printf("the result is fully cached")
|
||||
// The result is fully cached.
|
||||
rollupResultCacheFullHits.Inc()
|
||||
return tssCached, nil
|
||||
}
|
||||
if start > ec.Start {
|
||||
qt.Printf("partial cache hit")
|
||||
rollupResultCachePartialHits.Inc()
|
||||
} else {
|
||||
qt.Printf("cache miss")
|
||||
rollupResultCacheMiss.Inc()
|
||||
}
|
||||
|
||||
ecCopy := copyEvalConfig(ec)
|
||||
ecCopy.Start = start
|
||||
pointsPerSeries := 1 + (ec.End-ec.Start)/ec.Step
|
||||
tss, err := evalRollupFuncNoCache(qt, ecCopy, funcName, rf, expr, me, iafc, window, pointsPerSeries)
|
||||
if err != nil {
|
||||
err = &UserReadableError{
|
||||
Err: err,
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
rvs, err := mergeTimeseries(qt, tssCached, tss, start, ec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot merge series: %w", err)
|
||||
}
|
||||
if tss != nil {
|
||||
rollupResultCacheV.PutSeries(qt, ec, expr, window, tss)
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
// evalRollupFuncNoCache calculates the given rf with the given lookbehind window.
|
||||
//
|
||||
// pointsPerSeries is used only for estimating the needed memory for query processing
|
||||
func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc,
|
||||
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, window, pointsPerSeries int64) ([]*timeseries, error) {
|
||||
if qt.Enabled() {
|
||||
qt = qt.NewChild("rollup %s: timeRange=%s, step=%d, window=%d", expr.AppendString(nil), ec.timeRangeString(), ec.Step, window)
|
||||
defer qt.Done()
|
||||
}
|
||||
if window < 0 {
|
||||
return nil, nil
|
||||
}
|
||||
// Obtain rollup configs before fetching data from db, so type errors could be caught earlier.
|
||||
sharedTimestamps := getTimestamps(ec.Start, ec.End, ec.Step, ec.MaxPointsPerSeries)
|
||||
preFunc, rcs, err := getRollupConfigs(funcName, rf, expr, ec.Start, ec.End, ec.Step, ec.MaxPointsPerSeries, window, ec.LookbackDelta, sharedTimestamps)
|
||||
// Obtain rollup configs before fetching data from db,
|
||||
// so type errors can be caught earlier.
|
||||
sharedTimestamps := getTimestamps(start, ec.End, ec.Step, ec.MaxPointsPerSeries)
|
||||
preFunc, rcs, err := getRollupConfigs(funcName, rf, expr, start, ec.End, ec.Step, ec.MaxPointsPerSeries, window, ec.LookbackDelta, sharedTimestamps)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1655,10 +1085,7 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
|
||||
// Fetch the remaining part of the result.
|
||||
tfss := searchutils.ToTagFilterss(me.LabelFilterss)
|
||||
tfss = searchutils.JoinTagFilterss(tfss, ec.EnforcedTagFilterss)
|
||||
minTimestamp := ec.Start
|
||||
if needSilenceIntervalForRollupFunc(funcName) {
|
||||
minTimestamp -= maxSilenceInterval
|
||||
}
|
||||
minTimestamp := start - maxSilenceInterval
|
||||
if window > ec.Step {
|
||||
minTimestamp -= window
|
||||
} else {
|
||||
@@ -1670,16 +1097,21 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
|
||||
sq := storage.NewSearchQuery(minTimestamp, ec.End, tfss, ec.MaxSeries)
|
||||
rss, err := netstorage.ProcessSearchQuery(qt, sq, ec.Deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, &UserReadableError{
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
rssLen := rss.Len()
|
||||
if rssLen == 0 {
|
||||
rss.Cancel()
|
||||
return nil, nil
|
||||
tss := mergeTimeseries(tssCached, nil, start, ec)
|
||||
return tss, nil
|
||||
}
|
||||
ec.QueryStats.addSeriesFetched(rssLen)
|
||||
|
||||
// Verify timeseries fit available memory during rollup calculations.
|
||||
// Verify timeseries fit available memory after the rollup.
|
||||
// Take into account points from tssCached.
|
||||
pointsPerTimeseries := 1 + (ec.End-ec.Start)/ec.Step
|
||||
timeseriesLen := rssLen
|
||||
if iafc != nil {
|
||||
// Incremental aggregates require holding only GOMAXPROCS timeseries in memory.
|
||||
@@ -1699,10 +1131,9 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
|
||||
timeseriesLen = rssLen
|
||||
}
|
||||
}
|
||||
rollupPoints := mulNoOverflow(pointsPerSeries, int64(timeseriesLen*len(rcs)))
|
||||
rollupMemorySize := sumNoOverflow(mulNoOverflow(int64(rssLen), 1000), mulNoOverflow(rollupPoints, 16))
|
||||
rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs)))
|
||||
rollupMemorySize = sumNoOverflow(mulNoOverflow(int64(rssLen), 1000), mulNoOverflow(rollupPoints, 16))
|
||||
if maxMemory := int64(logQueryMemoryUsage.N); maxMemory > 0 && rollupMemorySize > maxMemory {
|
||||
memoryIntensiveQueries.Inc()
|
||||
requestURI := ec.GetRequestURI()
|
||||
logger.Warnf("remoteAddr=%s, requestURI=%s: the %s requires %d bytes of memory for processing; "+
|
||||
"logging this query, since it exceeds the -search.logQueryMemoryUsage=%d; "+
|
||||
@@ -1711,33 +1142,44 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
|
||||
}
|
||||
if maxMemory := int64(maxMemoryPerQuery.N); maxMemory > 0 && rollupMemorySize > maxMemory {
|
||||
rss.Cancel()
|
||||
err := fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series "+
|
||||
"according to -search.maxMemoryPerQuery=%d; requested memory: %d bytes; "+
|
||||
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
|
||||
"increasing -search.maxMemoryPerQuery",
|
||||
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerSeries, maxMemory, rollupMemorySize, float64(ec.Step)/1e3)
|
||||
return nil, err
|
||||
return nil, &UserReadableError{
|
||||
Err: fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series "+
|
||||
"according to -search.maxMemoryPerQuery=%d; requested memory: %d bytes; "+
|
||||
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
|
||||
"increasing -search.maxMemoryPerQuery",
|
||||
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, maxMemory, rollupMemorySize, float64(ec.Step)/1e3),
|
||||
}
|
||||
}
|
||||
rml := getRollupMemoryLimiter()
|
||||
if !rml.Get(uint64(rollupMemorySize)) {
|
||||
rss.Cancel()
|
||||
err := fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series; "+
|
||||
"total available memory for concurrent requests: %d bytes; requested memory: %d bytes; "+
|
||||
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
|
||||
"switching to node with more RAM; increasing -memory.allowedPercent",
|
||||
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerSeries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3)
|
||||
return nil, err
|
||||
return nil, &UserReadableError{
|
||||
Err: fmt.Errorf("not enough memory for processing %s, which returns %d data points across %d time series with %d points in each time series; "+
|
||||
"total available memory for concurrent requests: %d bytes; "+
|
||||
"requested memory: %d bytes; "+
|
||||
"possible solutions are: reducing the number of matching time series; increasing `step` query arg (step=%gs); "+
|
||||
"switching to node with more RAM; increasing -memory.allowedPercent",
|
||||
expr.AppendString(nil), rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3),
|
||||
}
|
||||
}
|
||||
defer rml.Put(uint64(rollupMemorySize))
|
||||
qt.Printf("the rollup evaluation needs an estimated %d bytes of RAM for %d series and %d points per series (summary %d points)",
|
||||
rollupMemorySize, timeseriesLen, pointsPerSeries, rollupPoints)
|
||||
|
||||
// Evaluate rollup
|
||||
keepMetricNames := getKeepMetricNames(expr)
|
||||
var tss []*timeseries
|
||||
if iafc != nil {
|
||||
return evalRollupWithIncrementalAggregate(qt, funcName, keepMetricNames, iafc, rss, rcs, preFunc, sharedTimestamps)
|
||||
tss, err = evalRollupWithIncrementalAggregate(qt, funcName, keepMetricNames, iafc, rss, rcs, preFunc, sharedTimestamps)
|
||||
} else {
|
||||
tss, err = evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps)
|
||||
}
|
||||
return evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps)
|
||||
if err != nil {
|
||||
return nil, &UserReadableError{
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
tss = mergeTimeseries(tssCached, tss, start, ec)
|
||||
rollupResultCacheV.Put(qt, ec, expr, window, tss)
|
||||
return tss, nil
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -1752,53 +1194,6 @@ func getRollupMemoryLimiter() *memoryLimiter {
|
||||
return &rollupMemoryLimiter
|
||||
}
|
||||
|
||||
func needSilenceIntervalForRollupFunc(funcName string) bool {
|
||||
// All rollup the functions, which do not rely on the previous sample
|
||||
// before the lookbehind window (aka prevValue), do not need silence interval.
|
||||
switch strings.ToLower(funcName) {
|
||||
case
|
||||
"absent_over_time",
|
||||
"avg_over_time",
|
||||
"count_eq_over_time",
|
||||
"count_gt_over_time",
|
||||
"count_le_over_time",
|
||||
"count_ne_over_time",
|
||||
"count_over_time",
|
||||
"default_rollup",
|
||||
"first_over_time",
|
||||
"histogram_over_time",
|
||||
"hoeffding_bound_lower",
|
||||
"hoeffding_bound_upper",
|
||||
"last_over_time",
|
||||
"mad_over_time",
|
||||
"max_over_time",
|
||||
"median_over_time",
|
||||
"min_over_time",
|
||||
"predict_linear",
|
||||
"present_over_time",
|
||||
"quantile_over_time",
|
||||
"quantiles_over_time",
|
||||
"range_over_time",
|
||||
"share_gt_over_time",
|
||||
"share_le_over_time",
|
||||
"share_eq_over_time",
|
||||
"stale_samples_over_time",
|
||||
"stddev_over_time",
|
||||
"stdvar_over_time",
|
||||
"sum_over_time",
|
||||
"tfirst_over_time",
|
||||
"timestamp",
|
||||
"timestamp_with_name",
|
||||
"tlast_over_time",
|
||||
"tmax_over_time",
|
||||
"tmin_over_time",
|
||||
"zscore_over_time":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func evalRollupWithIncrementalAggregate(qt *querytracer.Tracer, funcName string, keepMetricNames bool,
|
||||
iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
|
||||
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64) ([]*timeseries, error) {
|
||||
|
||||
@@ -48,10 +48,7 @@ func (ure *UserReadableError) Error() string {
|
||||
func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) {
|
||||
if querystats.Enabled() {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
querystats.RegisterQuery(q, ec.End-ec.Start, startTime)
|
||||
ec.QueryStats.addExecutionTimeMsec(startTime)
|
||||
}()
|
||||
defer querystats.RegisterQuery(q, ec.End-ec.Start, startTime)
|
||||
}
|
||||
|
||||
ec.validate()
|
||||
|
||||
@@ -6910,30 +6910,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`outliers_iqr()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(outliers_iqr((
|
||||
alias(time(), "m1"),
|
||||
alias(time()*1.5, "m2"),
|
||||
alias(time()*10, "m3"),
|
||||
alias(time()*1.2, "m4"),
|
||||
alias(time()*0.1, "m5"),
|
||||
)))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{100, 120, 140, 160, 180, 200},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.MetricGroup = []byte("m5")
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10000, 12000, 14000, 16000, 18000, 20000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.MetricGroup = []byte("m3")
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`outliers_mad(1)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `outliers_mad(1, (
|
||||
|
||||
12
app/vmselect/promql/parser_test.go
Normal file
12
app/vmselect/promql/parser_test.go
Normal file
@@ -0,0 +1,12 @@
|
||||
package promql
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestIsMetricSelectorWithRollup(t *testing.T) {
|
||||
childQuery, _, _ := IsMetricSelectorWithRollup("metric_name{label='value'}[365d] or vector(0)")
|
||||
if childQuery != "" {
|
||||
t.Fatalf("AAAAA: %v", childQuery)
|
||||
} else {
|
||||
t.Fatalf("BBBBB")
|
||||
}
|
||||
}
|
||||
@@ -62,7 +62,6 @@ var rollupFuncs = map[string]newRollupFunc{
|
||||
"median_over_time": newRollupFuncOneArg(rollupMedian),
|
||||
"min_over_time": newRollupFuncOneArg(rollupMin),
|
||||
"mode_over_time": newRollupFuncOneArg(rollupModeOverTime),
|
||||
"outlier_iqr_over_time": newRollupFuncOneArg(rollupOutlierIQR),
|
||||
"predict_linear": newRollupPredictLinear,
|
||||
"present_over_time": newRollupFuncOneArg(rollupPresent),
|
||||
"quantile_over_time": newRollupQuantile,
|
||||
@@ -123,7 +122,6 @@ var rollupAggrFuncs = map[string]rollupFunc{
|
||||
"increases_over_time": rollupIncreases,
|
||||
"integrate": rollupIntegrate,
|
||||
"irate": rollupIderiv,
|
||||
"iqr_over_time": rollupOutlierIQR,
|
||||
"lag": rollupLag,
|
||||
"last_over_time": rollupLast,
|
||||
"lifetime": rollupLifetime,
|
||||
@@ -227,7 +225,6 @@ var rollupFuncsKeepMetricName = map[string]bool{
|
||||
"hoeffding_bound_lower": true,
|
||||
"hoeffding_bound_upper": true,
|
||||
"holt_winters": true,
|
||||
"iqr_over_time": true,
|
||||
"last_over_time": true,
|
||||
"max_over_time": true,
|
||||
"median_over_time": true,
|
||||
@@ -955,11 +952,11 @@ func newRollupHoltWinters(args []interface{}) (rollupFunc, error) {
|
||||
return rfa.prevValue
|
||||
}
|
||||
sf := sfs[rfa.idx]
|
||||
if sf < 0 || sf > 1 {
|
||||
if sf <= 0 || sf >= 1 {
|
||||
return nan
|
||||
}
|
||||
tf := tfs[rfa.idx]
|
||||
if tf < 0 || tf > 1 {
|
||||
if tf <= 0 || tf >= 1 {
|
||||
return nan
|
||||
}
|
||||
|
||||
@@ -1290,29 +1287,6 @@ func newRollupQuantiles(args []interface{}) (rollupFunc, error) {
|
||||
return rf, nil
|
||||
}
|
||||
|
||||
func rollupOutlierIQR(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
|
||||
// See Outliers section at https://en.wikipedia.org/wiki/Interquartile_range
|
||||
values := rfa.values
|
||||
if len(values) < 2 {
|
||||
return nan
|
||||
}
|
||||
qs := getFloat64s()
|
||||
qs.A = quantiles(qs.A[:0], iqrPhis, values)
|
||||
q25 := qs.A[0]
|
||||
q75 := qs.A[1]
|
||||
iqr := 1.5 * (q75 - q25)
|
||||
putFloat64s(qs)
|
||||
|
||||
v := values[len(values)-1]
|
||||
if v > q75+iqr || v < q25-iqr {
|
||||
return v
|
||||
}
|
||||
return nan
|
||||
}
|
||||
|
||||
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
|
||||
if err := expectRollupArgsNum(args, 2); err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -17,7 +17,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
|
||||
"github.com/VictoriaMetrics/fastcache"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -203,74 +202,11 @@ func ResetRollupResultCache() {
|
||||
logger.Infof("rollupResult cache has been cleared")
|
||||
}
|
||||
|
||||
func (rrc *rollupResultCache) GetInstantValues(qt *querytracer.Tracer, expr metricsql.Expr, window, step int64, etfss [][]storage.TagFilter) []*timeseries {
|
||||
func (rrc *rollupResultCache) Get(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) {
|
||||
if qt.Enabled() {
|
||||
query := string(expr.AppendString(nil))
|
||||
query = stringsutil.LimitStringLen(query, 300)
|
||||
qt = qt.NewChild("rollup cache get instant values: query=%s, window=%d, step=%d", query, window, step)
|
||||
defer qt.Done()
|
||||
}
|
||||
|
||||
// Obtain instant values from the cache
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
bb.B = marshalRollupResultCacheKeyForInstantValues(bb.B[:0], expr, window, step, etfss)
|
||||
tss, ok := rrc.getSeriesFromCache(qt, bb.B)
|
||||
if !ok || len(tss) == 0 {
|
||||
return nil
|
||||
}
|
||||
assertInstantValues(tss)
|
||||
qt.Printf("found %d series for time=%s", len(tss), storage.TimestampToHumanReadableFormat(tss[0].Timestamps[0]))
|
||||
return tss
|
||||
}
|
||||
|
||||
func (rrc *rollupResultCache) PutInstantValues(qt *querytracer.Tracer, expr metricsql.Expr, window, step int64, etfss [][]storage.TagFilter, tss []*timeseries) {
|
||||
if qt.Enabled() {
|
||||
query := string(expr.AppendString(nil))
|
||||
query = stringsutil.LimitStringLen(query, 300)
|
||||
startStr := ""
|
||||
if len(tss) > 0 {
|
||||
startStr = storage.TimestampToHumanReadableFormat(tss[0].Timestamps[0])
|
||||
}
|
||||
qt = qt.NewChild("rollup cache put instant values: query=%s, window=%d, step=%d, series=%d, time=%s", query, window, step, len(tss), startStr)
|
||||
defer qt.Done()
|
||||
}
|
||||
if len(tss) == 0 {
|
||||
qt.Printf("do not cache empty series list")
|
||||
return
|
||||
}
|
||||
|
||||
assertInstantValues(tss)
|
||||
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
bb.B = marshalRollupResultCacheKeyForInstantValues(bb.B[:0], expr, window, step, etfss)
|
||||
_ = rrc.putSeriesToCache(qt, bb.B, step, tss)
|
||||
}
|
||||
|
||||
func (rrc *rollupResultCache) DeleteInstantValues(qt *querytracer.Tracer, expr metricsql.Expr, window, step int64, etfss [][]storage.TagFilter) {
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
bb.B = marshalRollupResultCacheKeyForInstantValues(bb.B[:0], expr, window, step, etfss)
|
||||
if !rrc.putSeriesToCache(qt, bb.B, step, nil) {
|
||||
logger.Panicf("BUG: cannot store zero series to cache")
|
||||
}
|
||||
|
||||
if qt.Enabled() {
|
||||
query := string(expr.AppendString(nil))
|
||||
query = stringsutil.LimitStringLen(query, 300)
|
||||
qt.Printf("rollup result cache delete instant values: query=%s, window=%d, step=%d", query, window, step)
|
||||
}
|
||||
}
|
||||
|
||||
func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) {
|
||||
if qt.Enabled() {
|
||||
query := string(expr.AppendString(nil))
|
||||
query = stringsutil.LimitStringLen(query, 300)
|
||||
qt = qt.NewChild("rollup cache get series: query=%s, timeRange=%s, window=%d, step=%d", query, ec.timeRangeString(), window, ec.Step)
|
||||
query = bytesutil.LimitStringLen(query, 300)
|
||||
qt = qt.NewChild("rollup cache get: query=%s, timeRange=%s, step=%d, window=%d", query, ec.timeRangeString(), ec.Step, window)
|
||||
defer qt.Done()
|
||||
}
|
||||
if !ec.mayCache() {
|
||||
@@ -282,7 +218,7 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
metainfoBuf := rrc.c.Get(nil, bb.B)
|
||||
if len(metainfoBuf) == 0 {
|
||||
qt.Printf("nothing found")
|
||||
@@ -297,17 +233,31 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
qt.Printf("nothing found on the timeRange")
|
||||
return nil, ec.Start
|
||||
}
|
||||
|
||||
var ok bool
|
||||
bb.B = key.Marshal(bb.B[:0])
|
||||
tss, ok = rrc.getSeriesFromCache(qt, bb.B)
|
||||
if !ok {
|
||||
compressedResultBuf := resultBufPool.Get()
|
||||
defer resultBufPool.Put(compressedResultBuf)
|
||||
compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], bb.B)
|
||||
if len(compressedResultBuf.B) == 0 {
|
||||
mi.RemoveKey(key)
|
||||
metainfoBuf = mi.Marshal(metainfoBuf[:0])
|
||||
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
rrc.c.Set(bb.B, metainfoBuf)
|
||||
qt.Printf("missing cache entry")
|
||||
return nil, ec.Start
|
||||
}
|
||||
// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
|
||||
// refers to the byte slice, so it cannot be returned to the resultBufPool.
|
||||
qt.Printf("load compressed entry from cache with size %d bytes", len(compressedResultBuf.B))
|
||||
resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
|
||||
}
|
||||
qt.Printf("unpack the entry into %d bytes", len(resultBuf))
|
||||
tss, err = unmarshalTimeseriesFast(resultBuf)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
|
||||
}
|
||||
qt.Printf("unmarshal %d series", len(tss))
|
||||
|
||||
// Extract values for the matching timestamps
|
||||
timestamps := tss[0].Timestamps
|
||||
@@ -316,10 +266,12 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
i++
|
||||
}
|
||||
if i == len(timestamps) {
|
||||
// no matches.
|
||||
qt.Printf("no datapoints found in the cached series on the given timeRange")
|
||||
return nil, ec.Start
|
||||
}
|
||||
if timestamps[i] != ec.Start {
|
||||
// The cached range doesn't cover the requested range.
|
||||
qt.Printf("cached series don't cover the given timeRange")
|
||||
return nil, ec.Start
|
||||
}
|
||||
@@ -330,7 +282,7 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
}
|
||||
j++
|
||||
if j <= i {
|
||||
qt.Printf("no matching samples for the given timeRange")
|
||||
// no matches.
|
||||
return nil, ec.Start
|
||||
}
|
||||
|
||||
@@ -351,21 +303,17 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
|
||||
var resultBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func (rrc *rollupResultCache) PutSeries(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) {
|
||||
func (rrc *rollupResultCache) Put(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) {
|
||||
if qt.Enabled() {
|
||||
query := string(expr.AppendString(nil))
|
||||
query = stringsutil.LimitStringLen(query, 300)
|
||||
qt = qt.NewChild("rollup cache put series: query=%s, timeRange=%s, step=%d, window=%d, series=%d", query, ec.timeRangeString(), ec.Step, window, len(tss))
|
||||
query = bytesutil.LimitStringLen(query, 300)
|
||||
qt = qt.NewChild("rollup cache put: query=%s, timeRange=%s, step=%d, window=%d, series=%d", query, ec.timeRangeString(), ec.Step, window, len(tss))
|
||||
defer qt.Done()
|
||||
}
|
||||
if !ec.mayCache() {
|
||||
if len(tss) == 0 || !ec.mayCache() {
|
||||
qt.Printf("do not store series to cache, since it is disabled in the current context")
|
||||
return
|
||||
}
|
||||
if len(tss) == 0 {
|
||||
qt.Printf("do not store empty series list")
|
||||
return
|
||||
}
|
||||
|
||||
// Remove values up to currentTime - step - cacheTimestampOffset,
|
||||
// since these values may be added later.
|
||||
@@ -398,7 +346,7 @@ func (rrc *rollupResultCache) PutSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
metainfoBuf := bbPool.Get()
|
||||
defer bbPool.Put(metainfoBuf)
|
||||
|
||||
metainfoKey.B = marshalRollupResultCacheKeyForSeries(metainfoKey.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
metainfoKey.B = marshalRollupResultCacheKey(metainfoKey.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
metainfoBuf.B = rrc.c.Get(metainfoBuf.B[:0], metainfoKey.B)
|
||||
var mi rollupResultCacheMetainfo
|
||||
if len(metainfoBuf.B) > 0 {
|
||||
@@ -417,17 +365,31 @@ func (rrc *rollupResultCache) PutSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
return
|
||||
}
|
||||
|
||||
maxMarshaledSize := getRollupResultCacheSize() / 4
|
||||
resultBuf := resultBufPool.Get()
|
||||
defer resultBufPool.Put(resultBuf)
|
||||
resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, ec.Step)
|
||||
if len(resultBuf.B) == 0 {
|
||||
tooBigRollupResults.Inc()
|
||||
qt.Printf("cannot store series in the cache, since they would occupy more than %d bytes", maxMarshaledSize)
|
||||
return
|
||||
}
|
||||
if qt.Enabled() {
|
||||
startString := storage.TimestampToHumanReadableFormat(start)
|
||||
endString := storage.TimestampToHumanReadableFormat(end)
|
||||
qt.Printf("marshal %d series on a timeRange=[%s..%s] into %d bytes", len(tss), startString, endString, len(resultBuf.B))
|
||||
}
|
||||
compressedResultBuf := resultBufPool.Get()
|
||||
defer resultBufPool.Put(compressedResultBuf)
|
||||
compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)
|
||||
qt.Printf("compress %d bytes into %d bytes", len(resultBuf.B), len(compressedResultBuf.B))
|
||||
|
||||
var key rollupResultCacheKey
|
||||
key.prefix = rollupResultCacheKeyPrefix
|
||||
key.suffix = atomic.AddUint64(&rollupResultCacheKeySuffix, 1)
|
||||
|
||||
bb := bbPool.Get()
|
||||
bb.B = key.Marshal(bb.B[:0])
|
||||
ok := rrc.putSeriesToCache(qt, bb.B, ec.Step, tss)
|
||||
bbPool.Put(bb)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
rollupResultKey := key.Marshal(nil)
|
||||
rrc.c.SetBig(rollupResultKey, compressedResultBuf.B)
|
||||
qt.Printf("store %d bytes in the cache", len(compressedResultBuf.B))
|
||||
|
||||
mi.AddKey(key, timestamps[0], timestamps[len(timestamps)-1])
|
||||
metainfoBuf.B = mi.Marshal(metainfoBuf.B[:0])
|
||||
@@ -439,52 +401,6 @@ var (
|
||||
rollupResultCacheKeySuffix = uint64(time.Now().UnixNano())
|
||||
)
|
||||
|
||||
func (rrc *rollupResultCache) getSeriesFromCache(qt *querytracer.Tracer, key []byte) ([]*timeseries, bool) {
|
||||
compressedResultBuf := resultBufPool.Get()
|
||||
compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], key)
|
||||
if len(compressedResultBuf.B) == 0 {
|
||||
qt.Printf("nothing found in the cache")
|
||||
resultBufPool.Put(compressedResultBuf)
|
||||
return nil, false
|
||||
}
|
||||
qt.Printf("load compressed entry from cache with size %d bytes", len(compressedResultBuf.B))
|
||||
// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
|
||||
// refers to the byte slice, so it cannot be re-used.
|
||||
resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
|
||||
}
|
||||
resultBufPool.Put(compressedResultBuf)
|
||||
qt.Printf("unpack the entry into %d bytes", len(resultBuf))
|
||||
tss, err := unmarshalTimeseriesFast(resultBuf)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
|
||||
}
|
||||
qt.Printf("unmarshal %d series", len(tss))
|
||||
return tss, true
|
||||
}
|
||||
|
||||
func (rrc *rollupResultCache) putSeriesToCache(qt *querytracer.Tracer, key []byte, step int64, tss []*timeseries) bool {
|
||||
maxMarshaledSize := getRollupResultCacheSize() / 4
|
||||
resultBuf := resultBufPool.Get()
|
||||
defer resultBufPool.Put(resultBuf)
|
||||
resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, step)
|
||||
if len(resultBuf.B) == 0 {
|
||||
tooBigRollupResults.Inc()
|
||||
qt.Printf("cannot store %d series in the cache, since they would occupy more than %d bytes", len(tss), maxMarshaledSize)
|
||||
return false
|
||||
}
|
||||
qt.Printf("marshal %d series into %d bytes", len(tss), len(resultBuf.B))
|
||||
compressedResultBuf := resultBufPool.Get()
|
||||
defer resultBufPool.Put(compressedResultBuf)
|
||||
compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)
|
||||
qt.Printf("compress %d bytes into %d bytes", len(resultBuf.B), len(compressedResultBuf.B))
|
||||
|
||||
rrc.c.SetBig(key, compressedResultBuf.B)
|
||||
qt.Printf("store %d bytes in the cache", len(compressedResultBuf.B))
|
||||
return true
|
||||
}
|
||||
|
||||
func newRollupResultCacheKeyPrefix() uint64 {
|
||||
var buf [8]byte
|
||||
if _, err := rand.Read(buf[:]); err != nil {
|
||||
@@ -523,36 +439,14 @@ func mustSaveRollupResultCacheKeyPrefix(path string) {
|
||||
var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")
|
||||
|
||||
// Increment this value every time the format of the cache changes.
|
||||
const rollupResultCacheVersion = 10
|
||||
const rollupResultCacheVersion = 9
|
||||
|
||||
const (
|
||||
rollupResultCacheTypeSeries = 0
|
||||
rollupResultCacheTypeInstantValues = 1
|
||||
)
|
||||
|
||||
func marshalRollupResultCacheKeyForSeries(dst []byte, expr metricsql.Expr, window, step int64, etfs [][]storage.TagFilter) []byte {
|
||||
func marshalRollupResultCacheKey(dst []byte, expr metricsql.Expr, window, step int64, etfs [][]storage.TagFilter) []byte {
|
||||
dst = append(dst, rollupResultCacheVersion)
|
||||
dst = encoding.MarshalUint64(dst, rollupResultCacheKeyPrefix)
|
||||
dst = append(dst, rollupResultCacheTypeSeries)
|
||||
dst = encoding.MarshalInt64(dst, window)
|
||||
dst = encoding.MarshalInt64(dst, step)
|
||||
dst = marshalTagFiltersForRollupResultCacheKey(dst, etfs)
|
||||
dst = expr.AppendString(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
func marshalRollupResultCacheKeyForInstantValues(dst []byte, expr metricsql.Expr, window, step int64, etfs [][]storage.TagFilter) []byte {
|
||||
dst = append(dst, rollupResultCacheVersion)
|
||||
dst = encoding.MarshalUint64(dst, rollupResultCacheKeyPrefix)
|
||||
dst = append(dst, rollupResultCacheTypeInstantValues)
|
||||
dst = encoding.MarshalInt64(dst, window)
|
||||
dst = encoding.MarshalInt64(dst, step)
|
||||
dst = marshalTagFiltersForRollupResultCacheKey(dst, etfs)
|
||||
dst = expr.AppendString(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
func marshalTagFiltersForRollupResultCacheKey(dst []byte, etfs [][]storage.TagFilter) []byte {
|
||||
for i, etf := range etfs {
|
||||
for _, f := range etf {
|
||||
dst = f.Marshal(dst)
|
||||
@@ -567,15 +461,12 @@ func marshalTagFiltersForRollupResultCacheKey(dst []byte, etfs [][]storage.TagFi
|
||||
// mergeTimeseries concatenates b with a and returns the result.
|
||||
//
|
||||
// Preconditions:
|
||||
// - a mustn't intersect with b by timestamps.
|
||||
// - a mustn't intersect with b.
|
||||
// - a timestamps must be smaller than b timestamps.
|
||||
//
|
||||
// Postconditions:
|
||||
// - a and b cannot be used after returning from the call.
|
||||
func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, ec *EvalConfig) ([]*timeseries, error) {
|
||||
qt = qt.NewChild("merge series len(a)=%d, len(b)=%d", len(a), len(b))
|
||||
defer qt.Done()
|
||||
|
||||
func mergeTimeseries(a, b []*timeseries, bStart int64, ec *EvalConfig) []*timeseries {
|
||||
sharedTimestamps := ec.getSharedTimestamps()
|
||||
if bStart == ec.Start {
|
||||
// Nothing to merge - b covers all the time range.
|
||||
@@ -587,7 +478,7 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
|
||||
logger.Panicf("BUG: unexpected number of values in b; got %d; want %d", len(tsB.Values), len(tsB.Timestamps))
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
return b
|
||||
}
|
||||
|
||||
m := make(map[string]*timeseries, len(a))
|
||||
@@ -595,9 +486,6 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
|
||||
defer bbPool.Put(bb)
|
||||
for _, ts := range a {
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
if _, ok := m[string(bb.B)]; ok {
|
||||
return nil, fmt.Errorf("duplicate series found: %s", &ts.MetricName)
|
||||
}
|
||||
m[string(bb.B)] = ts
|
||||
}
|
||||
|
||||
@@ -624,8 +512,7 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
|
||||
}
|
||||
tmp.Values = append(tmp.Values, tsB.Values...)
|
||||
if len(tmp.Values) != len(tmp.Timestamps) {
|
||||
logger.Panicf("BUG: unexpected values after merging new values; got %d; want %d; len(a.Values)=%d; len(b.Values)=%d",
|
||||
len(tmp.Values), len(tmp.Timestamps), len(tsA.Values), len(tsB.Values))
|
||||
logger.Panicf("BUG: unexpected values after merging new values; got %d; want %d", len(tmp.Values), len(tmp.Timestamps))
|
||||
}
|
||||
rvs = append(rvs, &tmp)
|
||||
}
|
||||
@@ -649,8 +536,7 @@ func mergeTimeseries(qt *querytracer.Tracer, a, b []*timeseries, bStart int64, e
|
||||
}
|
||||
rvs = append(rvs, &tmp)
|
||||
}
|
||||
qt.Printf("resulting series=%d", len(rvs))
|
||||
return rvs, nil
|
||||
return rvs
|
||||
}
|
||||
|
||||
type rollupResultCacheMetainfo struct {
|
||||
|
||||
@@ -61,7 +61,7 @@ func TestRollupResultCache(t *testing.T) {
|
||||
|
||||
// Try obtaining an empty value.
|
||||
t.Run("empty", func(t *testing.T) {
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != ec.Start {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, ec.Start)
|
||||
}
|
||||
@@ -79,8 +79,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{0, 1, 2},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 1400 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
|
||||
}
|
||||
@@ -100,8 +100,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{0, 1, 2},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, ae, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, ae, window)
|
||||
rollupResultCacheV.Put(nil, ec, ae, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, ae, window)
|
||||
if newStart != 1400 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
|
||||
}
|
||||
@@ -123,8 +123,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{333, 0, 1, 2},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 1000 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
|
||||
}
|
||||
@@ -142,8 +142,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{0, 1, 2},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 1000 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
|
||||
}
|
||||
@@ -161,8 +161,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{0, 1, 2},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 1000 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
|
||||
}
|
||||
@@ -180,8 +180,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{0, 1, 2},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 1000 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
|
||||
}
|
||||
@@ -199,8 +199,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{0, 1, 2, 3, 4, 5, 6, 7},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 2200 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
|
||||
}
|
||||
@@ -222,8 +222,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{1, 2, 3, 4, 5, 6},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 2200 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
|
||||
}
|
||||
@@ -247,8 +247,8 @@ func TestRollupResultCache(t *testing.T) {
|
||||
}
|
||||
tss = append(tss, ts)
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
tssResult, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss)
|
||||
tssResult, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 2200 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
|
||||
}
|
||||
@@ -276,10 +276,10 @@ func TestRollupResultCache(t *testing.T) {
|
||||
Values: []float64{0, 1, 2},
|
||||
},
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss1)
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss2)
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss3)
|
||||
tss, newStart := rollupResultCacheV.GetSeries(nil, ec, fe, window)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss1)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss2)
|
||||
rollupResultCacheV.Put(nil, ec, fe, window, tss3)
|
||||
tss, newStart := rollupResultCacheV.Get(nil, ec, fe, window)
|
||||
if newStart != 1400 {
|
||||
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
|
||||
}
|
||||
@@ -311,10 +311,7 @@ func TestMergeTimeseries(t *testing.T) {
|
||||
Values: []float64{1, 2, 3, 4, 5, 6},
|
||||
},
|
||||
}
|
||||
tss, err := mergeTimeseries(nil, a, b, 1000, ec)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tss := mergeTimeseries(a, b, 1000, ec)
|
||||
tssExpected := []*timeseries{
|
||||
{
|
||||
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
@@ -331,10 +328,7 @@ func TestMergeTimeseries(t *testing.T) {
|
||||
Values: []float64{3, 4, 5, 6},
|
||||
},
|
||||
}
|
||||
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tss := mergeTimeseries(a, b, bStart, ec)
|
||||
tssExpected := []*timeseries{
|
||||
{
|
||||
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
@@ -351,10 +345,7 @@ func TestMergeTimeseries(t *testing.T) {
|
||||
},
|
||||
}
|
||||
b := []*timeseries{}
|
||||
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tss := mergeTimeseries(a, b, bStart, ec)
|
||||
tssExpected := []*timeseries{
|
||||
{
|
||||
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
@@ -376,10 +367,7 @@ func TestMergeTimeseries(t *testing.T) {
|
||||
Values: []float64{3, 4, 5, 6},
|
||||
},
|
||||
}
|
||||
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tss := mergeTimeseries(a, b, bStart, ec)
|
||||
tssExpected := []*timeseries{
|
||||
{
|
||||
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
@@ -403,10 +391,7 @@ func TestMergeTimeseries(t *testing.T) {
|
||||
},
|
||||
}
|
||||
b[0].MetricName.MetricGroup = []byte("foo")
|
||||
tss, err := mergeTimeseries(nil, a, b, bStart, ec)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tss := mergeTimeseries(a, b, bStart, ec)
|
||||
tssExpected := []*timeseries{
|
||||
{
|
||||
MetricName: storage.MetricName{
|
||||
|
||||
@@ -12,35 +12,6 @@ var (
|
||||
testTimestamps = []int64{5, 15, 24, 36, 49, 60, 78, 80, 97, 115, 120, 130}
|
||||
)
|
||||
|
||||
func TestRollupOutlierIQR(t *testing.T) {
|
||||
f := func(values []float64, resultExpected float64) {
|
||||
t.Helper()
|
||||
rfa := &rollupFuncArg{
|
||||
values: values,
|
||||
timestamps: nil,
|
||||
}
|
||||
result := rollupOutlierIQR(rfa)
|
||||
if math.IsNaN(result) {
|
||||
if !math.IsNaN(resultExpected) {
|
||||
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
|
||||
}
|
||||
} else {
|
||||
if math.IsNaN(resultExpected) {
|
||||
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f([]float64{1, 2, 3, 4, 5}, nan)
|
||||
f([]float64{1, 2, 3, 4, 7}, nan)
|
||||
f([]float64{1, 2, 3, 4, 8}, 8)
|
||||
f([]float64{1, 2, 3, 4, -2}, nan)
|
||||
f([]float64{1, 2, 3, 4, -3}, -3)
|
||||
}
|
||||
|
||||
func TestRollupIderivDuplicateTimestamps(t *testing.T) {
|
||||
rfa := &rollupFuncArg{
|
||||
values: []float64{1, 2, 3, 4, 5},
|
||||
@@ -189,7 +160,7 @@ func TestDerivValues(t *testing.T) {
|
||||
testRowsEqual(t, values, timestamps, valuesExpected, timestamps)
|
||||
}
|
||||
|
||||
func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected float64) {
|
||||
func testRollupFuncWithValues(t *testing.T, funcName string, args []interface{}, vInput []float64, vTimestamps []int64, vExpected float64) {
|
||||
t.Helper()
|
||||
nrf := getRollupFunc(funcName)
|
||||
if nrf == nil {
|
||||
@@ -201,9 +172,11 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected
|
||||
}
|
||||
var rfa rollupFuncArg
|
||||
rfa.prevValue = nan
|
||||
rfa.realPrevValue = nan
|
||||
rfa.realNextValue = nan
|
||||
rfa.prevTimestamp = 0
|
||||
rfa.values = append(rfa.values, testValues...)
|
||||
rfa.timestamps = append(rfa.timestamps, testTimestamps...)
|
||||
rfa.values = append(rfa.values, vInput...)
|
||||
rfa.timestamps = append(rfa.timestamps, vTimestamps...)
|
||||
rfa.window = rfa.timestamps[len(rfa.timestamps)-1] - rfa.timestamps[0]
|
||||
if rollupFuncsRemoveCounterResets[funcName] {
|
||||
removeCounterResets(rfa.values)
|
||||
@@ -215,9 +188,6 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected
|
||||
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
|
||||
}
|
||||
} else {
|
||||
if math.IsNaN(v) {
|
||||
t.Fatalf("unexpected value; got %v want %v", v, vExpected)
|
||||
}
|
||||
eps := math.Abs(v - vExpected)
|
||||
if eps > 1e-14 {
|
||||
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
|
||||
@@ -226,6 +196,10 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected
|
||||
}
|
||||
}
|
||||
|
||||
func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected float64) {
|
||||
testRollupFuncWithValues(t, funcName, args, testValues, testTimestamps, vExpected)
|
||||
}
|
||||
|
||||
func TestRollupDurationOverTime(t *testing.T) {
|
||||
f := func(maxInterval, dExpected float64) {
|
||||
t.Helper()
|
||||
@@ -470,12 +444,12 @@ func TestRollupHoltWinters(t *testing.T) {
|
||||
}
|
||||
|
||||
f(-1, 0.5, nan)
|
||||
f(0, 0.5, -856)
|
||||
f(1, 0.5, 34)
|
||||
f(0, 0.5, nan)
|
||||
f(1, 0.5, nan)
|
||||
f(2, 0.5, nan)
|
||||
f(0.5, -1, nan)
|
||||
f(0.5, 0, -54.1474609375)
|
||||
f(0.5, 1, 25.25)
|
||||
f(0.5, 0, nan)
|
||||
f(0.5, 1, nan)
|
||||
f(0.5, 2, nan)
|
||||
f(0.5, 0.5, 34.97794532775879)
|
||||
f(0.1, 0.5, -131.30529492371622)
|
||||
@@ -546,7 +520,6 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
|
||||
f("increase", 398)
|
||||
f("increase_prometheus", 275)
|
||||
f("irate", 0)
|
||||
f("outlier_iqr_over_time", nan)
|
||||
f("rate", 2200)
|
||||
f("resets", 5)
|
||||
f("range_over_time", 111)
|
||||
@@ -1507,3 +1480,121 @@ func TestRollupDelta(t *testing.T) {
|
||||
f(1, nan, nan, nil, 0)
|
||||
f(100, nan, nan, nil, 0)
|
||||
}
|
||||
|
||||
func TestIncrease(t *testing.T) {
|
||||
f := func(funcName string, vInput []float64, vExpected float64) {
|
||||
t.Helper()
|
||||
var me metricsql.MetricExpr
|
||||
args := []interface{}{&metricsql.RollupExpr{Expr: &me}}
|
||||
testRollupFuncWithValues(t, funcName, args, vInput, []int64{1, 2}, vExpected)
|
||||
}
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 100},
|
||||
0,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 90},
|
||||
0,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 88},
|
||||
0,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 87},
|
||||
|
||||
187,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 187},
|
||||
|
||||
187,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 87, 200, 287},
|
||||
|
||||
387,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 187, 200, 287},
|
||||
|
||||
287,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 87, 200, 87},
|
||||
|
||||
387,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 87, 200, 187},
|
||||
|
||||
300,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 87, 200, 177},
|
||||
|
||||
300,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 13},
|
||||
113,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 9},
|
||||
9,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 1},
|
||||
1,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, 0},
|
||||
0,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, -1},
|
||||
-1,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, -10},
|
||||
90,
|
||||
)
|
||||
|
||||
f(
|
||||
"increase",
|
||||
[]float64{100, -90},
|
||||
10,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -79,10 +79,7 @@ var timeseriesPool sync.Pool
|
||||
|
||||
func marshalTimeseriesFast(dst []byte, tss []*timeseries, maxSize int, step int64) []byte {
|
||||
if len(tss) == 0 {
|
||||
// marshal zero timeseries and zero timestamps
|
||||
dst = encoding.MarshalUint64(dst, 0)
|
||||
dst = encoding.MarshalUint64(dst, 0)
|
||||
return dst
|
||||
logger.Panicf("BUG: tss cannot be empty")
|
||||
}
|
||||
|
||||
// timestamps are stored only once for all the tss, since they must be identical
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//go:build go1.15
|
||||
// +build go1.15
|
||||
|
||||
package promql
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
{
|
||||
"files": {
|
||||
"main.css": "./static/css/main.349e6522.css",
|
||||
"main.js": "./static/js/main.c93073e5.js",
|
||||
"static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
|
||||
"main.css": "./static/css/main.d9ac05de.css",
|
||||
"main.js": "./static/js/main.70434a4f.js",
|
||||
"static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.957b90ab4cb4852eec26.md",
|
||||
"index.html": "./index.html"
|
||||
},
|
||||
"entrypoints": [
|
||||
"static/css/main.349e6522.css",
|
||||
"static/js/main.c93073e5.js"
|
||||
"static/css/main.d9ac05de.css",
|
||||
"static/js/main.70434a4f.js"
|
||||
]
|
||||
}
|
||||
BIN
app/vmselect/vmui/favicon.ico
Normal file
BIN
app/vmselect/vmui/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
@@ -1 +1 @@
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.c93073e5.js"></script><link href="./static/css/main.349e6522.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.70434a4f.js"></script><link href="./static/css/main.d9ac05de.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
File diff suppressed because one or more lines are too long
1
app/vmselect/vmui/static/css/main.d9ac05de.css
Normal file
1
app/vmselect/vmui/static/css/main.d9ac05de.css
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
2
app/vmselect/vmui/static/js/main.70434a4f.js
Normal file
2
app/vmselect/vmui/static/js/main.70434a4f.js
Normal file
File diff suppressed because one or more lines are too long
@@ -7,7 +7,7 @@
|
||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
||||
|
||||
/**
|
||||
* @remix-run/router v1.10.0
|
||||
* @remix-run/router v1.7.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router DOM v6.17.0
|
||||
* React Router DOM v6.14.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
@@ -29,7 +29,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* React Router v6.17.0
|
||||
* React Router v6.14.2
|
||||
*
|
||||
* Copyright (c) Remix Software Inc.
|
||||
*
|
||||
File diff suppressed because one or more lines are too long
@@ -1,11 +1,11 @@
|
||||
---
|
||||
sort: 23
|
||||
weight: 23
|
||||
sort: 14
|
||||
weight: 14
|
||||
title: MetricsQL
|
||||
menu:
|
||||
docs:
|
||||
parent: 'victoriametrics'
|
||||
weight: 23
|
||||
parent: "victoriametrics"
|
||||
weight: 14
|
||||
aliases:
|
||||
- /ExtendedPromQL.html
|
||||
- /MetricsQL.html
|
||||
@@ -21,8 +21,7 @@ However, there are some [intentional differences](https://medium.com/@romanhavro
|
||||
|
||||
[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/metricsql) can be used for parsing MetricsQL in external apps.
|
||||
|
||||
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085)
|
||||
and introduction into [basic querying via MetricsQL](https://docs.victoriametrics.com/keyConcepts.html#metricsql).
|
||||
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
|
||||
|
||||
The following functionality is implemented differently in MetricsQL compared to PromQL. This improves user experience:
|
||||
|
||||
@@ -110,7 +109,7 @@ The list of MetricsQL features on top of PromQL:
|
||||
* [histogram_quantile](#histogram_quantile) accepts optional third arg - `boundsLabel`.
|
||||
In this case it returns `lower` and `upper` bounds for the estimated percentile.
|
||||
See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
|
||||
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`. See also [drop_empty_series](#drop_empty_series).
|
||||
* `default` binary operator. `q1 default q2` fills gaps in `q1` with the corresponding values from `q2`.
|
||||
* `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`.
|
||||
* `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`.
|
||||
* `WITH` templates. This feature simplifies writing and managing complex queries.
|
||||
@@ -532,7 +531,7 @@ See also [duration_over_time](#duration_over_time) and [lag](#lag).
|
||||
`mad_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation)
|
||||
over raw samples on the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
|
||||
|
||||
See also [mad](#mad), [range_mad](#range_mad) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
See also [mad](#mad) and [range_mad](#range_mad).
|
||||
|
||||
#### max_over_time
|
||||
|
||||
@@ -562,18 +561,6 @@ This function is supported by PromQL. See also [tmin_over_time](#tmin_over_time)
|
||||
for raw samples on the given lookbehind window `d`. It is calculated individually per each time series returned
|
||||
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). It is expected that raw sample values are discrete.
|
||||
|
||||
#### outlier_iqr_over_time
|
||||
|
||||
`outlier_iqr_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns the last sample on the given lookbehind window `d`
|
||||
if its value is either smaller than the `q25-1.5*iqr` or bigger than `q75+1.5*iqr` where:
|
||||
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) over raw samples on the lookbehind window `d`
|
||||
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) over raw samples on the lookbehind window `d`.
|
||||
|
||||
The `outlier_iqr_over_time()` is useful for detecting anomalies in gauge values based on the previous history of values.
|
||||
For example, `outlier_iqr_over_time(memory_usage_bytes[1h])` triggers when `memory_usage_bytes` suddenly goes outside the usual value range for the last 24 hours.
|
||||
|
||||
See also [outliers_iqr](#outliers_iqr).
|
||||
|
||||
#### predict_linear
|
||||
|
||||
`predict_linear(series_selector[d], t)` is a [rollup function](#rollup-functions), which calculates the value `t` seconds in the future using
|
||||
@@ -878,7 +865,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
|
||||
|
||||
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
See also [zscore](#zscore), [range_trim_zscore](#range_trim_zscore) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
|
||||
### Transform functions
|
||||
@@ -1068,17 +1055,6 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
||||
|
||||
This function is supported by PromQL. See also [rad](#rad).
|
||||
|
||||
#### drop_empty_series
|
||||
|
||||
`drop_empty_series(q)` is a [transform function](#transform-functions), which drops empty series from `q`.
|
||||
|
||||
This function can be used when `default` operator should be applied only to non-empty series. For example,
|
||||
`drop_empty_series(temperature < 30) default 42` returns series, which have at least a single sample smaller than 30 on the selected time range,
|
||||
while filling gaps in the returned series with 42.
|
||||
|
||||
On the other hand `(temperature < 30) default 40` returns all the `temperature` series, even if they have no samples smaller than 30,
|
||||
by replacing all the values bigger or equal to 30 with 40.
|
||||
|
||||
#### end
|
||||
|
||||
`end()` is a [transform function](#transform-functions), which returns the unix timestamp in seconds for the last point.
|
||||
@@ -1615,7 +1591,7 @@ which maps `label` values from `src_*` to `dst*` for all the time series returne
|
||||
which drops time series from `q` with `label` not matching the given `regexp`.
|
||||
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
|
||||
|
||||
See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
|
||||
See also [label_mismatch](#label_mismatch).
|
||||
|
||||
#### label_mismatch
|
||||
|
||||
@@ -1623,7 +1599,7 @@ See also [label_mismatch](#label_mismatch) and [labels_equal](#labels_equal).
|
||||
which drops time series from `q` with `label` matching the given `regexp`.
|
||||
This function can be useful after [rollup](#rollup)-like functions, which may return multiple time series for every input series.
|
||||
|
||||
See also [label_match](#label_match) and [labels_equal](#labels_equal).
|
||||
See also [label_match](#label_match).
|
||||
|
||||
#### label_move
|
||||
|
||||
@@ -1666,30 +1642,23 @@ for the given `label` for every time series returned by `q`.
|
||||
For example, if `label_value(foo, "bar")` is applied to `foo{bar="1.234"}`, then it will return a time series
|
||||
`foo{bar="1.234"}` with `1.234` value. Function will return no data for non-numeric label values.
|
||||
|
||||
#### labels_equal
|
||||
|
||||
`labels_equal(q, "label1", "label2", ...)` is [label manipulation function](#label-manipulation-functions), which returns `q` series with identical values for the listed labels
|
||||
"label1", "label2", etc.
|
||||
|
||||
See also [label_match](#label_match) and [label_mismatch](#label_mismatch).
|
||||
|
||||
#### sort_by_label
|
||||
|
||||
`sort_by_label(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
|
||||
`sort_by_label(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels.
|
||||
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
|
||||
|
||||
See also [sort_by_label_desc](#sort_by_label_desc) and [sort_by_label_numeric](#sort_by_label_numeric).
|
||||
|
||||
#### sort_by_label_desc
|
||||
|
||||
`sort_by_label_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
|
||||
`sort_by_label_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order by the given set of labels.
|
||||
For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series.
|
||||
|
||||
See also [sort_by_label](#sort_by_label) and [sort_by_label_numeric_desc](#sort_by_label_numeric_desc).
|
||||
|
||||
#### sort_by_label_numeric
|
||||
|
||||
`sort_by_label_numeric(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
|
||||
`sort_by_label_numeric(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in ascending order by the given set of labels
|
||||
using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
|
||||
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")` would return series
|
||||
in the following order of `bar` label values: `1`, `2`, `15` and `101`.
|
||||
@@ -1698,7 +1667,7 @@ See also [sort_by_label_numeric_desc](#sort_by_label_numeric_desc) and [sort_by_
|
||||
|
||||
#### sort_by_label_numeric_desc
|
||||
|
||||
`sort_by_label_numeric_desc(q, "label1", ... "labelN")` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
|
||||
`sort_by_label_numeric_desc(q, label1, ... labelN)` is [label manipulation function](#label-manipulation-functions), which sorts series in descending order
|
||||
by the given set of labels using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html).
|
||||
For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")`
|
||||
would return series in the following order of `bar` label values: `101`, `15`, `2` and `1`.
|
||||
@@ -1870,33 +1839,20 @@ This function is supported by PromQL.
|
||||
`mode(q) by (group_labels)` is [aggregate function](#aggregate-functions), which returns [mode](https://en.wikipedia.org/wiki/Mode_(statistics))
|
||||
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
|
||||
|
||||
#### outliers_iqr
|
||||
|
||||
`outliers_iqr(q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least a single point
|
||||
outside e.g. [Interquartile range outlier bounds](https://en.wikipedia.org/wiki/Interquartile_range) `[q25-1.5*iqr .. q75+1.5*iqr]`
|
||||
comparing to other time series at the given point, where:
|
||||
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) calculated independently per each point on the graph across `q` series.
|
||||
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) calculated independently per each point on the graph across `q` series.
|
||||
|
||||
The `outliers_iqr()` is useful for detecting anomalous series in the group of series. For example, `outliers_iqr(temperature) by (country)` returns
|
||||
per-country series with anomalous outlier values comparing to the rest of per-country series.
|
||||
|
||||
See also [outliers_mad](#outliers_mad), [outliersk](#outliersk) and [outlier_iqr_over_time](#outlier_iqr_over_time).
|
||||
|
||||
#### outliers_mad
|
||||
|
||||
`outliers_mad(tolerance, q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least
|
||||
a single point outside [Median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) (aka MAD) multiplied by `tolerance`.
|
||||
E.g. it returns time series with at least a single point below `median(q) - mad(q)` or a single point above `median(q) + mad(q)`.
|
||||
|
||||
See also [outliers_iqr](#outliers_iqr), [outliersk](#outliersk) and [mad](#mad).
|
||||
See also [outliersk](#outliersk) and [mad](#mad).
|
||||
|
||||
#### outliersk
|
||||
|
||||
`outliersk(k, q)` is [aggregate function](#aggregate-functions), which returns up to `k` time series with the biggest standard deviation (aka outliers)
|
||||
out of time series returned by `q`.
|
||||
|
||||
See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad).
|
||||
See also [outliers_mad](#outliers_mad).
|
||||
|
||||
#### quantile
|
||||
|
||||
@@ -2016,7 +1972,7 @@ See also [bottomk_min](#bottomk_min).
|
||||
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
|
||||
This function is useful for detecting anomalies in the group of related time series.
|
||||
|
||||
See also [zscore_over_time](#zscore_over_time), [range_trim_zscore](#range_trim_zscore) and [outliers_iqr](#outliers_iqr).
|
||||
See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
## Subqueries
|
||||
|
||||
2
app/vmui/.gitignore
vendored
2
app/vmui/.gitignore
vendored
@@ -105,3 +105,5 @@ dist
|
||||
|
||||
# WebStorm etc
|
||||
.idea/
|
||||
|
||||
MetricsQL.md
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM golang:1.21.4 as build-web-stage
|
||||
FROM golang:1.21.3 as build-web-stage
|
||||
COPY build /build
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user