mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-09 11:54:31 +03:00
Compare commits
106 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3bfa41a95c | ||
|
|
90670cb55e | ||
|
|
303905cd84 | ||
|
|
36fa3078c2 | ||
|
|
95942f1ac6 | ||
|
|
b768bc9a6a | ||
|
|
de59703a16 | ||
|
|
b4afe562c1 | ||
|
|
0224071ebe | ||
|
|
fcf57f9883 | ||
|
|
6954d0edb7 | ||
|
|
fb967ae6c8 | ||
|
|
2c18548e08 | ||
|
|
5f61d43db9 | ||
|
|
eeadfccdc5 | ||
|
|
d7c1ff8b0c | ||
|
|
1f3fd93b58 | ||
|
|
66af7e40f3 | ||
|
|
491b31b369 | ||
|
|
4b84c592e9 | ||
|
|
a596aec82c | ||
|
|
7b8008e0bd | ||
|
|
6d3567d65c | ||
|
|
9ef5935552 | ||
|
|
b80e6b4d56 | ||
|
|
5f9c23226a | ||
|
|
ac43075cc9 | ||
|
|
3157fb0186 | ||
|
|
e48822942d | ||
|
|
77bea69fab | ||
|
|
24461153bf | ||
|
|
00e897119f | ||
|
|
a9a7a7175e | ||
|
|
a9b83bf512 | ||
|
|
a87ca3bdf0 | ||
|
|
1c5d14a2eb | ||
|
|
a714568374 | ||
|
|
364db13c9c | ||
|
|
01e33be34a | ||
|
|
78ff5f2aa5 | ||
|
|
2dc5593b75 | ||
|
|
9ebc937685 | ||
|
|
fe57d46687 | ||
|
|
6cc6ec6d2e | ||
|
|
5454b518a6 | ||
|
|
5ecb50d7c2 | ||
|
|
851946af1e | ||
|
|
2de76bca96 | ||
|
|
94ad531bfe | ||
|
|
936fb0eac3 | ||
|
|
43375df923 | ||
|
|
43bbffebb3 | ||
|
|
79fb595732 | ||
|
|
546d26523c | ||
|
|
f41e6a7bd9 | ||
|
|
830538e290 | ||
|
|
5d1537a395 | ||
|
|
600490131f | ||
|
|
bd4c6d21dd | ||
|
|
95da8d410c | ||
|
|
bcec5c5429 | ||
|
|
467279acd2 | ||
|
|
e0d213f82b | ||
|
|
2fd2dec5eb | ||
|
|
071fdf5518 | ||
|
|
30b401ebbf | ||
|
|
a59a7bcc5e | ||
|
|
ccb887c0f6 | ||
|
|
6f7f64f757 | ||
|
|
426a0567c4 | ||
|
|
6e2f6574b8 | ||
|
|
c1de3f67b4 | ||
|
|
8a25c1ed71 | ||
|
|
067c7afebc | ||
|
|
ac35635b71 | ||
|
|
78863d7066 | ||
|
|
c64f003cfb | ||
|
|
4718a5d951 | ||
|
|
257521a634 | ||
|
|
6a75c95194 | ||
|
|
01d7d799dc | ||
|
|
0b76c27fa1 | ||
|
|
2e4e202c2b | ||
|
|
2814b1490f | ||
|
|
90b4a6dd12 | ||
|
|
2eed6c393f | ||
|
|
948f8b6b5f | ||
|
|
8fca5f2819 | ||
|
|
7c9405f53d | ||
|
|
9f8cc8ae1b | ||
|
|
90de3086b3 | ||
|
|
830d5fb1e0 | ||
|
|
66d8086a5e | ||
|
|
a30c98c0bc | ||
|
|
4de6c6bbf0 | ||
|
|
ded0c0d3c7 | ||
|
|
7d73623c69 | ||
|
|
e62afc7366 | ||
|
|
0681b4c27a | ||
|
|
f86947d55c | ||
|
|
f94a090020 | ||
|
|
8064775c02 | ||
|
|
520a704606 | ||
|
|
105f0c78d9 | ||
|
|
b099d84271 | ||
|
|
407bdbf2b9 |
13
.github/workflows/main.yml
vendored
13
.github/workflows/main.yml
vendored
@@ -14,18 +14,19 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v1
|
||||
uses: actions/setup-go@master
|
||||
with:
|
||||
go-version: 1.13
|
||||
go-version: 1.14
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v1
|
||||
- name: Dependencies
|
||||
env:
|
||||
GO111MODULE: off
|
||||
GO111MODULE: on
|
||||
run: |
|
||||
go get -v golang.org/x/lint/golint
|
||||
go get -u golang.org/x/lint/golint
|
||||
go get -u github.com/kisielk/errcheck
|
||||
go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
- name: Build
|
||||
env:
|
||||
GO111MODULE: on
|
||||
|
||||
10
Makefile
10
Makefile
@@ -13,6 +13,7 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date
|
||||
all: \
|
||||
victoria-metrics-prod \
|
||||
vmagent-prod \
|
||||
vmalert-prot \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod
|
||||
|
||||
@@ -25,17 +26,20 @@ clean:
|
||||
publish: \
|
||||
publish-victoria-metrics \
|
||||
publish-vmagent \
|
||||
publish-vmalert \
|
||||
publish-vmbackup \
|
||||
publish-vmrestore
|
||||
|
||||
package: \
|
||||
package-victoria-metrics \
|
||||
package-vmagent \
|
||||
package-vmalert \
|
||||
package-vmbackup \
|
||||
package-vmrestore
|
||||
|
||||
vmutils: \
|
||||
vmagent \
|
||||
vmalert \
|
||||
vmbackup \
|
||||
vmrestore
|
||||
|
||||
@@ -49,9 +53,10 @@ release-victoria-metrics: victoria-metrics-prod
|
||||
|
||||
release-vmutils: \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmbackup-prod \
|
||||
vmrestore-prod
|
||||
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmbackup-prod vmrestore-prod && \
|
||||
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmalert-prod vmbackup-prod vmrestore-prod && \
|
||||
sha256sum vmutils-$(PKG_TAG).tar.gz > vmutils-$(PKG_TAG)_checksums.txt
|
||||
|
||||
pprof-cpu:
|
||||
@@ -78,6 +83,7 @@ errcheck: install-errcheck
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmselect/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmstorage/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmagent/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
|
||||
@@ -130,7 +136,7 @@ install-qtc:
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
golangci-lint run --exclude '(SA4003|SA1019):' -D errcheck -D structcheck
|
||||
golangci-lint run --exclude '(SA4003|SA1019):' -D errcheck -D structcheck --timeout 2m
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint || GO111MODULE=off go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
|
||||
|
||||
58
README.md
58
README.md
@@ -27,6 +27,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
|
||||
* [MHI Vestas Offshore Wind](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#mhi-vestas-offshore-wind)
|
||||
* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
|
||||
* [Brandwatch](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#brandwatch)
|
||||
* [Adsterra](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adsterra)
|
||||
* [ARNES](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#arnes)
|
||||
|
||||
|
||||
## Prominent features
|
||||
@@ -206,6 +208,10 @@ Read more about tuning remote write for Prometheus [here](https://prometheus.io/
|
||||
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
|
||||
since the previous versions may have issues with `remote_write`.
|
||||
|
||||
Take a look also at [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md),
|
||||
which can be used as faster and less resource-hungry alternative to Prometheus in certain cases.
|
||||
|
||||
|
||||
### Grafana setup
|
||||
|
||||
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
|
||||
@@ -253,6 +259,9 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
|
||||
|
||||
* [static_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config)
|
||||
* [file_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config)
|
||||
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
|
||||
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
|
||||
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
|
||||
|
||||
In the future other `*_sd_config` types will be supported.
|
||||
|
||||
@@ -270,7 +279,8 @@ For instance, put the following lines into `Telegraf` config, so it sends data t
|
||||
|
||||
Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
|
||||
|
||||
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag.
|
||||
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag
|
||||
and stream plain Influx line protocol data to the configured TCP and/or UDP addresses.
|
||||
|
||||
VictoriaMetrics maps Influx data using the following rules:
|
||||
|
||||
@@ -441,10 +451,10 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
|
||||
|
||||
* `<column_pos>` is the position of the CSV column (field). Column numbering starts from 1. The order of parsing rules may be arbitrary.
|
||||
* `<type>` describes the column type. Supported types are:
|
||||
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value. The metric name is read from the `<context>`.
|
||||
CSV line must have at least a single metric field.
|
||||
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value, which must be integer or floating-point number.
|
||||
The metric name is read from the `<context>`. CSV line must have at least a single metric field. Multiple metric fields per CSV line is OK.
|
||||
* `label` - the corresponding CSV column at `<column_pos>` contains label value. The label name is read from the `<context>`.
|
||||
CSV line may have arbitrary number of label fields. All these fields are attached to all the configured metrics.
|
||||
CSV line may have arbitrary number of label fields. All these labels are attached to all the configured metrics.
|
||||
* `time` - the corresponding CSV column at `<column_pos>` contains metric time. CSV line may contain either one or zero columns with time.
|
||||
If CSV line has no time, then the current time is used. The time is applied to all the configured metrics.
|
||||
The format of the time is configured via `<context>`. Supported time formats are:
|
||||
@@ -454,7 +464,7 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
|
||||
* `rfc3339` - timestamp in [RFC3339](https://tools.ietf.org/html/rfc3339) format, i.e. `2006-01-02T15:04:05Z`.
|
||||
* `custom:<layout>` - custom layout for the timestamp. The `<layout>` may contain arbitrary time layout according to [time.Parse rules in Go](https://golang.org/pkg/time/#Parse).
|
||||
|
||||
Each request to `/api/v1/import/csv` can contain arbitrary number of CSV lines.
|
||||
Each request to `/api/v1/import/csv` may contain arbitrary number of CSV lines.
|
||||
|
||||
Example for importing CSV data via `/api/v1/import/csv`:
|
||||
|
||||
@@ -489,6 +499,7 @@ VictoriaMetrics supports the following handlers from [Prometheus querying API](h
|
||||
* [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
|
||||
* [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
|
||||
* [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
|
||||
* [/api/v1/status/tsdb](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats)
|
||||
|
||||
These handlers can be queried from Prometheus-compatible clients such as Grafana or curl.
|
||||
|
||||
@@ -555,6 +566,13 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-victoria-metrics
|
||||
```
|
||||
|
||||
### Start with docker-compose
|
||||
|
||||
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
|
||||
@@ -599,11 +617,13 @@ Steps for restoring from a snapshot:
|
||||
Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
|
||||
where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
|
||||
for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
|
||||
the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
|
||||
the deleted time series isn't freed instantly - it is freed during subsequent [background merges of data files](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
|
||||
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
|
||||
before actually deleting the metrics.
|
||||
|
||||
The `/api/v1/admin/tsdb/delete_series` handler may be protected with `authKey` if `-deleteAuthKey` command-line flag is set.
|
||||
|
||||
The delete API is intended mainly for the following cases:
|
||||
|
||||
* One-off deleting of accidentally written invalid (or undesired) time series.
|
||||
@@ -611,10 +631,11 @@ The delete API is intended mainly for the following cases:
|
||||
|
||||
It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
|
||||
|
||||
* Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
|
||||
* Regular cleanups for unneeded data. Just prevent writing unneeded data into VictoriaMetrics.
|
||||
This can be done with relabeling in [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
|
||||
See [this article](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts) for details.
|
||||
* Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
|
||||
time series occupy disk space until the next merge operation, which can never occur.
|
||||
* Reducing disk space usage by deleting unneeded time series. This doesn't work as expected, since the deleted
|
||||
time series occupy disk space until the next merge operation, which can never occur when deleting too old data.
|
||||
|
||||
It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
|
||||
|
||||
@@ -836,6 +857,7 @@ Consider setting the following command-line flags:
|
||||
with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
|
||||
* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
|
||||
* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
|
||||
* `-search.resetCacheAuthKey` for protecting `/internal/resetRollupResultCache` endpoint. See [backfilling](#backfilling) for more details.
|
||||
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
|
||||
@@ -905,9 +927,22 @@ The most interesting metrics are:
|
||||
If this removes gaps on the graphs, then it is likely data with timestamps older than `-search.cacheTimestampOffset`
|
||||
is ingested into VictoriaMetrics. Make sure that data sources have synchronized time with VictoriaMetrics.
|
||||
|
||||
If the gaps are related to irregular intervals between samples, then try adjusting `-search.minStalenessInterval` command-line flag
|
||||
to value close to the maximum interval between samples.
|
||||
|
||||
* If you are switching from InfluxDB or TimescaleDB, then take a look at `-search.maxStalenessInterval` command-line flag.
|
||||
It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes
|
||||
each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals.
|
||||
|
||||
* Metrics and labels leading to high cardinality or high churn rate can be determined at `/api/v1/status/tsdb` page.
|
||||
See [these docs](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) for details.
|
||||
VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
|
||||
while `topN` equals to 10.
|
||||
|
||||
|
||||
### Backfilling
|
||||
|
||||
VictoriaMetrics accepts historical data in arbitrary order of time.
|
||||
VictoriaMetrics accepts historical data in arbitrary order of time via [any supported ingestion method](#how-to-import-time-series-data).
|
||||
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
|
||||
|
||||
It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
|
||||
@@ -946,7 +981,8 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
|
||||
See [these docs](https://github.com/netdata/netdata#integrations).
|
||||
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend.
|
||||
See [this example](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
|
||||
* [Ansible role for installing VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
|
||||
* [Ansible role for installing single-node VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
|
||||
* [Ansible role for installing cluster VictoriaMetrics](https://github.com/Slapper/ansible-victoriametrics-cluster-role).
|
||||
|
||||
## Third-party contributions
|
||||
|
||||
|
||||
@@ -131,12 +131,21 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
|
||||
* `static_configs` - for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
|
||||
* `file_sd_configs` - for scraping targets defined in external files aka file-based service discover.
|
||||
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
|
||||
* `kubernetes_sd_configs` - for scraping targets in Kubernetes (k8s).
|
||||
See [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) for details.
|
||||
* `ec2_sd_configs` - for scraping targets in Amazone EC2.
|
||||
See [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) for details.
|
||||
`vmagent` doesn't support `role_arn` config param yet.
|
||||
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
|
||||
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
|
||||
`vmagent` provides the following additional functionality `gce_sd_config`:
|
||||
* if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
|
||||
* if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
|
||||
* if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
|
||||
* `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
|
||||
|
||||
The following service discovery mechanisms will be added to `vmagent` soon:
|
||||
|
||||
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
|
||||
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
|
||||
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
|
||||
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
|
||||
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
|
||||
|
||||
@@ -182,7 +191,7 @@ Read more about relabeling in the following articles:
|
||||
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
|
||||
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format. This page also exports information on improperly configured scrape configs.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
@@ -190,11 +199,14 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
|
||||
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
|
||||
since `vmagent` establishes at least a single TCP connection per each target.
|
||||
|
||||
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
|
||||
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.
|
||||
|
||||
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
and `vmagent_remotewrite_pending_data_bytes` metric exported by `vmagent` at `/metrics` page constantly grows.
|
||||
|
||||
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow big when remote storage is unvailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
The directory can grow big when remote storage is unavailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
|
||||
|
||||
|
||||
@@ -205,7 +217,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmagent` from the root folder of the repository.
|
||||
It builds `vmagent` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -220,3 +232,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmagent
|
||||
```
|
||||
|
||||
@@ -233,8 +233,7 @@ again:
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
// There is no need in calling DoTimeout, since the timeout is set in c.hc.ReadTimeout.
|
||||
err := c.hc.Do(req, resp)
|
||||
err := doRequestWithPossibleRetry(c.hc, req, resp)
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
@@ -267,3 +266,16 @@ again:
|
||||
fasthttp.ReleaseResponse(resp)
|
||||
fasthttp.ReleaseRequest(req)
|
||||
}
|
||||
|
||||
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response) error {
|
||||
// There is no need in calling DoTimeout, since the timeout must be already set in hc.ReadTimeout.
|
||||
err := hc.Do(req, resp)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if err != fasthttp.ErrConnectionClosed {
|
||||
return err
|
||||
}
|
||||
// Retry request if the server closed the keep-alive connection during the first attempt.
|
||||
return hc.Do(req, resp)
|
||||
}
|
||||
|
||||
78
app/vmalert/Makefile
Normal file
78
app/vmalert/Makefile
Normal file
@@ -0,0 +1,78 @@
|
||||
# All these commands must run from repository root.
|
||||
|
||||
vmalert:
|
||||
APP_NAME=vmalert $(MAKE) app-local
|
||||
|
||||
vmalert-race:
|
||||
APP_NAME=vmalert RACE=-race $(MAKE) app-local
|
||||
|
||||
vmalert-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker
|
||||
|
||||
vmalert-pure-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-pure
|
||||
|
||||
vmalert-amd64-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-amd64
|
||||
|
||||
vmalert-arm-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-arm
|
||||
|
||||
vmalert-arm64-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-arm64
|
||||
|
||||
vmalert-ppc64le-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-ppc64le
|
||||
|
||||
vmalert-386-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-386
|
||||
|
||||
package-vmalert:
|
||||
APP_NAME=vmalert $(MAKE) package-via-docker
|
||||
|
||||
package-vmalert-pure:
|
||||
APP_NAME=vmalert $(MAKE) package-via-docker-pure
|
||||
|
||||
package-vmalert-amd64:
|
||||
APP_NAME=vmalert $(MAKE) package-via-docker-amd64
|
||||
|
||||
package-vmalert-arm:
|
||||
APP_NAME=vmalert $(MAKE) package-via-docker-arm
|
||||
|
||||
package-vmalert-arm64:
|
||||
APP_NAME=vmalert $(MAKE) package-via-docker-arm64
|
||||
|
||||
package-vmalert-ppc64le:
|
||||
APP_NAME=vmalert $(MAKE) package-via-docker-ppc64le
|
||||
|
||||
package-vmalert-386:
|
||||
APP_NAME=vmalert $(MAKE) package-via-docker-386
|
||||
|
||||
publish-vmalert:
|
||||
APP_NAME=vmalert $(MAKE) publish-via-docker
|
||||
|
||||
test-vmalert:
|
||||
go test -race -cover ./app/vmalert
|
||||
|
||||
run-vmalert: vmalert
|
||||
./bin/vmalert -rule=app/vmalert/testdata/rules0-good.rules \
|
||||
-datasource.url=http://localhost:8428 -notifier.url=http://localhost:9093 \
|
||||
-evaluationInterval=3s
|
||||
|
||||
vmalert-amd64:
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-amd64 ./app/vmalert
|
||||
|
||||
vmalert-arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm ./app/vmalert
|
||||
|
||||
vmalert-arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm64 ./app/vmalert
|
||||
|
||||
vmalert-ppc64le:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-ppc64le ./app/vmalert
|
||||
|
||||
vmalert-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-386 ./app/vmalert
|
||||
|
||||
vmalert-pure:
|
||||
APP_NAME=vmalert $(MAKE) app-local-pure
|
||||
@@ -1,41 +1,95 @@
|
||||
## VM Alert
|
||||
|
||||
#### Abstract
|
||||
The application which accepts the alert rules, executes them on given source, sends(fires) an alert to(in) alert management system
|
||||
`vmalert` executes a list of given MetricsQL expressions (rules) and
|
||||
sends alerts to [Alert Manager](https://github.com/prometheus/alertmanager).
|
||||
|
||||
### Components
|
||||
NOTE: `vmalert` is in early alpha and wasn't tested in production systems yet.
|
||||
|
||||
#### Alert Config Reader
|
||||
It accepts yaml config as input parameter in Prometheus format, parses it into Go struct.
|
||||
### Features:
|
||||
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
|
||||
* VictoriaMetrics [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL)
|
||||
expressions validation;
|
||||
* Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
|
||||
support;
|
||||
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager);
|
||||
* Lightweight without extra dependencies.
|
||||
|
||||
#### Source Caller
|
||||
Create own watchdog for every alert group (goroutines), which executes alert query on given source and issues an alert if source returns non-empty result.
|
||||
Source can be any service which supports PromQL (MetricsQL).
|
||||
### TODO:
|
||||
* Persist alerts state as timeseries in TSDB. Currently, alerts state is stored
|
||||
in process memory only and will be lost on restart;
|
||||
* Configuration hot reload.
|
||||
|
||||
#### Alert Management System Provider
|
||||
Send positive alert to alert management system, provides interface for every concrete implementation.
|
||||
Should be ingratiated with Prometheus alertmanager.
|
||||
### QuickStart
|
||||
|
||||
open questions:
|
||||
- do we really need alert group or can just run every alert in own goroutine?
|
||||
To build `vmalert` from sources:
|
||||
```
|
||||
git clone https://github.com/VictoriaMetrics/VictoriaMetrics
|
||||
cd VictoriaMetrics
|
||||
make vmalert
|
||||
```
|
||||
The build binary will be placed to `VictoriaMetrics/bin` folder.
|
||||
|
||||
#### Web Server
|
||||
Expose metrics
|
||||
To start using `vmalert` you will need the following things:
|
||||
* list of alert rules - PromQL/MetricsQL expressions to execute;
|
||||
* datasource address - reachable VictoriaMetrics instance for rules execution;
|
||||
* notifier address - reachable Alertmanager instance for processing,
|
||||
aggregating alerts and sending notifications.
|
||||
|
||||
open questions:
|
||||
- should the tool provide API or UI for managing alerting rules? Where to store config updated via the API or UI?
|
||||
- should the tool provide “alerting rules validation mode” for validating and debugging alerting rules? This mode is useful when creating and debugging alerting rules.
|
||||
Then configure `vmalert` accordingly:
|
||||
```
|
||||
./bin/vmalert -rule=alert.rules \
|
||||
-datasource.url=http://localhost:8428 \
|
||||
-notifier.url=http://localhost:9093
|
||||
```
|
||||
|
||||
#### Requirements:
|
||||
- Stateless
|
||||
- Avoid external dependencies if possible
|
||||
- Reuse existing code from VictoriaMetrics repo
|
||||
- Makefile rules for common tasks – see Makefiles for other apps in the app/ dir
|
||||
- Every package should be covered by tests
|
||||
- Dockerfile
|
||||
- Graceful shutdown
|
||||
- Helm template
|
||||
- Application uses command line flags for configuration
|
||||
Example for `.rules` file may be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/testdata/rules0-good.rules)
|
||||
|
||||
`vmalert` runs evaluation for every group in a separate goroutine.
|
||||
Rules in group evaluated one-by-one sequentially.
|
||||
|
||||
<img alt="VM Alert" src="vmalert.png">
|
||||
`vmalert` also runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
|
||||
* `http://<vmalert-addr>/api/v1/<groupName>/<alertID>/status" ` - get alert status by ID.
|
||||
Used as alert source in AlertManager.
|
||||
* `http://<vmalert-addr>/metrics` - application metrics.
|
||||
|
||||
### Configuration
|
||||
|
||||
The shortlist of configuration flags is the following:
|
||||
```
|
||||
Usage of vmalert:
|
||||
-datasource.url string
|
||||
Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428
|
||||
-datasource.basicAuth.password string
|
||||
Optional basic auth password to use for -datasource.url
|
||||
-datasource.basicAuth.username string
|
||||
Optional basic auth username to use for -datasource.url
|
||||
-evaluationInterval duration
|
||||
How often to evaluate the rules. Default 1m (default 1m0s)
|
||||
-external.url string
|
||||
External URL is used as alert's source for sent alerts to the notifier
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8880")
|
||||
-notifier.url string
|
||||
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
|
||||
-remotewrite.url string
|
||||
Optional URL to remote-write compatible storage where to write timeseriesbased on active alerts. E.g. http://127.0.0.1:8428
|
||||
-rule value
|
||||
Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule /path/to/file. Path to a single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.
|
||||
-rule.validateTemplates
|
||||
Indicates to validate annotation and label templates (default true)
|
||||
```
|
||||
|
||||
Pass `-help` to `vmalert` in order to see the full list of supported
|
||||
command-line flags with their descriptions.
|
||||
|
||||
### Contributing
|
||||
|
||||
`vmalert` is mostly designed and built by VictoriaMetrics community.
|
||||
Feel free to share your experience and ideas for improving this
|
||||
software. Please keep simplicity as the main priority.
|
||||
@@ -1,132 +0,0 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// Alert the triggered alert
|
||||
type Alert struct {
|
||||
Group string
|
||||
Name string
|
||||
Labels []datasource.Label
|
||||
Annotations map[string]string
|
||||
|
||||
Start time.Time
|
||||
End time.Time
|
||||
Value float64
|
||||
}
|
||||
|
||||
type alertTplData struct {
|
||||
Labels map[string]string
|
||||
ExternalLabels map[string]string
|
||||
Value float64
|
||||
}
|
||||
|
||||
const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $externalLabels := .ExternalLabels }}`
|
||||
|
||||
// AlertsFromMetrics converts metrics to alerts by alert Rule
|
||||
func AlertsFromMetrics(metrics []datasource.Metric, group string, rule Rule, start, end time.Time) []Alert {
|
||||
alerts := make([]Alert, 0, len(metrics))
|
||||
var err error
|
||||
for i, m := range metrics {
|
||||
a := Alert{
|
||||
Group: group,
|
||||
Name: rule.Name,
|
||||
Start: start,
|
||||
End: end,
|
||||
Value: m.Value,
|
||||
}
|
||||
tplData := alertTplData{Value: m.Value, ExternalLabels: make(map[string]string)}
|
||||
tplData.Labels, a.Labels = mergeLabels(metrics[i].Labels, rule.Labels)
|
||||
a.Annotations, err = templateAnnotations(rule.Annotations, tplHeader, tplData)
|
||||
if err != nil {
|
||||
logger.Errorf("%s", err)
|
||||
}
|
||||
alerts = append(alerts, a)
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func mergeLabels(ml []datasource.Label, rl map[string]string) (map[string]string, []datasource.Label) {
|
||||
set := make(map[string]string, len(ml)+len(rl))
|
||||
sl := append([]datasource.Label(nil), ml...)
|
||||
for _, i := range ml {
|
||||
set[i.Name] = i.Value
|
||||
}
|
||||
for name, value := range rl {
|
||||
if _, ok := set[name]; ok {
|
||||
continue
|
||||
}
|
||||
set[name] = value
|
||||
sl = append(sl, datasource.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
return set, sl
|
||||
}
|
||||
|
||||
func templateAnnotations(annotations map[string]string, header string, data alertTplData) (map[string]string, error) {
|
||||
var builder strings.Builder
|
||||
var buf bytes.Buffer
|
||||
eg := errGroup{}
|
||||
r := make(map[string]string, len(annotations))
|
||||
for key, text := range annotations {
|
||||
r[key] = text
|
||||
buf.Reset()
|
||||
builder.Reset()
|
||||
builder.Grow(len(header) + len(text))
|
||||
builder.WriteString(header)
|
||||
builder.WriteString(text)
|
||||
if err := templateAnnotation(&buf, builder.String(), data); err != nil {
|
||||
eg.errs = append(eg.errs, fmt.Sprintf("key %s, template %s:%s", key, text, err))
|
||||
continue
|
||||
}
|
||||
r[key] = buf.String()
|
||||
}
|
||||
return r, eg.err()
|
||||
}
|
||||
|
||||
// ValidateAnnotations validate annotations for possible template error, uses empty data for template population
|
||||
func ValidateAnnotations(annotations map[string]string) error {
|
||||
_, err := templateAnnotations(annotations, tplHeader, alertTplData{
|
||||
Labels: map[string]string{},
|
||||
ExternalLabels: map[string]string{},
|
||||
Value: 0,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
|
||||
tpl, err := template.New("").Funcs(tmplFunc).Option("missingkey=zero").Parse(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing annotation:%w", err)
|
||||
}
|
||||
if err = tpl.Execute(dst, data); err != nil {
|
||||
return fmt.Errorf("error evaluating annotation template:%w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type errGroup struct {
|
||||
errs []string
|
||||
}
|
||||
|
||||
func (eg *errGroup) err() error {
|
||||
if eg == nil || len(eg.errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return eg
|
||||
}
|
||||
|
||||
func (eg *errGroup) Error() string {
|
||||
return fmt.Sprintf("errors:%s", strings.Join(eg.errs, "\n"))
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
)
|
||||
|
||||
func TestAlertsFromMetrics(t *testing.T) {
|
||||
now := time.Now()
|
||||
metrics := []datasource.Metric{
|
||||
{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "__name__", Value: "foo"},
|
||||
{Name: "label", Value: "value"},
|
||||
},
|
||||
Timestamp: 10,
|
||||
Value: 20,
|
||||
},
|
||||
{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "__name__", Value: "bar"},
|
||||
{Name: "label", Value: "value"},
|
||||
},
|
||||
Timestamp: 10,
|
||||
Value: 30,
|
||||
},
|
||||
}
|
||||
rule := Rule{
|
||||
Name: "alertname",
|
||||
Expr: "up==0",
|
||||
Labels: map[string]string{
|
||||
"label2": "value",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"tpl": "{{$value}} {{ $labels.label}}",
|
||||
},
|
||||
}
|
||||
alerts := AlertsFromMetrics(metrics, "group", rule, now, now)
|
||||
if len(alerts) != 2 {
|
||||
t.Fatalf("expecting 2 alerts got %d", len(alerts))
|
||||
}
|
||||
|
||||
f := func(got, exp Alert) {
|
||||
t.Helper()
|
||||
if got.Group != exp.Group ||
|
||||
got.Value != exp.Value ||
|
||||
got.End != exp.End ||
|
||||
got.Name != exp.Name ||
|
||||
got.Start != exp.Start {
|
||||
t.Errorf("alerts are not equal: \nwant %#v \ngot %#v", exp, got)
|
||||
}
|
||||
sort.Slice(got.Labels, func(i, j int) bool {
|
||||
return got.Labels[i].Name < got.Labels[j].Name
|
||||
})
|
||||
sort.Slice(exp.Labels, func(i, j int) bool {
|
||||
return got.Labels[i].Name < got.Labels[j].Name
|
||||
})
|
||||
if !reflect.DeepEqual(got.Labels, exp.Labels) {
|
||||
t.Errorf("alerts labels are not equal: want %+v got %+v", exp.Labels, got.Labels)
|
||||
}
|
||||
if !reflect.DeepEqual(got.Annotations, exp.Annotations) {
|
||||
t.Errorf("alerts annotations are not equal: want %+v got %+v", exp.Annotations, got.Annotations)
|
||||
}
|
||||
}
|
||||
f(alerts[0], Alert{
|
||||
Group: "group",
|
||||
Name: "alertname",
|
||||
Labels: []datasource.Label{
|
||||
{Name: "__name__", Value: "foo"},
|
||||
{Name: "label", Value: "value"},
|
||||
{Name: "label2", Value: "value"},
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"tpl": "20 value",
|
||||
},
|
||||
Start: now,
|
||||
End: now,
|
||||
Value: 20,
|
||||
})
|
||||
f(alerts[1], Alert{
|
||||
Group: "group",
|
||||
Name: "alertname",
|
||||
Labels: []datasource.Label{
|
||||
{Name: "__name__", Value: "bar"},
|
||||
{Name: "label", Value: "value"},
|
||||
{Name: "label2", Value: "value"},
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"tpl": "30 value",
|
||||
},
|
||||
Start: now,
|
||||
End: now,
|
||||
Value: 30,
|
||||
})
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
|
||||
)
|
||||
|
||||
// Rule is basic alert entity
|
||||
type Rule struct {
|
||||
Name string `yaml:"alert"`
|
||||
Expr string `yaml:"expr"`
|
||||
For time.Duration `yaml:"for"`
|
||||
Labels map[string]string `yaml:"labels"`
|
||||
Annotations map[string]string `yaml:"annotations"`
|
||||
}
|
||||
|
||||
// Validate validates rule
|
||||
func (r Rule) Validate() error {
|
||||
if r.Name == "" {
|
||||
return errors.New("rule name can not be empty")
|
||||
}
|
||||
if r.Expr == "" {
|
||||
return fmt.Errorf("rule %s expression can not be empty", r.Name)
|
||||
}
|
||||
if _, err := metricsql.Parse(r.Expr); err != nil {
|
||||
return fmt.Errorf("rule %s invalid expression: %w", r.Name, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Group grouping array of alert
|
||||
type Group struct {
|
||||
Name string
|
||||
Rules []Rule
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
package common
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRule_Validate(t *testing.T) {
|
||||
if err := (Rule{}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty name error")
|
||||
}
|
||||
if err := (Rule{Name: "alert"}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty expr error")
|
||||
}
|
||||
if err := (Rule{Name: "alert", Expr: "test{"}).Validate(); err == nil {
|
||||
t.Errorf("exptected invalid expr error")
|
||||
}
|
||||
if err := (Rule{Name: "alert", Expr: "test>0"}).Validate(); err != nil {
|
||||
t.Errorf("exptected valid rule got %s", err)
|
||||
}
|
||||
}
|
||||
@@ -1,17 +1,17 @@
|
||||
package config
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/common"
|
||||
"gopkg.in/yaml.v2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
// Parse parses rule configs from given file patterns
|
||||
func Parse(pathPatterns []string, validateAnnotations bool) ([]common.Group, error) {
|
||||
func Parse(pathPatterns []string, validateAnnotations bool) ([]Group, error) {
|
||||
var fp []string
|
||||
for _, pattern := range pathPatterns {
|
||||
matches, err := filepath.Glob(pattern)
|
||||
@@ -20,7 +20,7 @@ func Parse(pathPatterns []string, validateAnnotations bool) ([]common.Group, err
|
||||
}
|
||||
fp = append(fp, matches...)
|
||||
}
|
||||
var groups []common.Group
|
||||
var groups []Group
|
||||
for _, file := range fp {
|
||||
groupsNames := map[string]struct{}{}
|
||||
gr, err := parseFile(file)
|
||||
@@ -36,11 +36,17 @@ func Parse(pathPatterns []string, validateAnnotations bool) ([]common.Group, err
|
||||
if err = rule.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("invalid rule filepath:%s, group %s:%w", file, group.Name, err)
|
||||
}
|
||||
// TODO: this init looks weird here
|
||||
rule.alerts = make(map[uint64]*notifier.Alert)
|
||||
if validateAnnotations {
|
||||
if err = common.ValidateAnnotations(rule.Annotations); err != nil {
|
||||
return nil, fmt.Errorf("invalida annotations filepath:%s, group %s:%w", file, group.Name, err)
|
||||
if err = notifier.ValidateTemplates(rule.Annotations); err != nil {
|
||||
return nil, fmt.Errorf("invalid annotations filepath:%s, group %s:%w", file, group.Name, err)
|
||||
}
|
||||
if err = notifier.ValidateTemplates(rule.Labels); err != nil {
|
||||
return nil, fmt.Errorf("invalid labels filepath:%s, group %s:%w", file, group.Name, err)
|
||||
}
|
||||
}
|
||||
rule.group = &group
|
||||
}
|
||||
}
|
||||
groups = append(groups, gr...)
|
||||
@@ -51,13 +57,13 @@ func Parse(pathPatterns []string, validateAnnotations bool) ([]common.Group, err
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func parseFile(path string) ([]common.Group, error) {
|
||||
func parseFile(path string) ([]Group, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading alert rule file: %w", err)
|
||||
}
|
||||
g := struct {
|
||||
Groups []common.Group `yaml:"groups"`
|
||||
Groups []Group `yaml:"groups"`
|
||||
}{}
|
||||
err = yaml.Unmarshal(data, &g)
|
||||
return g.Groups, err
|
||||
@@ -1,16 +1,16 @@
|
||||
package config
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
u, _ := url.Parse("https://victoriametrics.com/path")
|
||||
common.InitTemplateFunc(u)
|
||||
notifier.InitTemplateFunc(u)
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
@@ -30,6 +30,9 @@ func TestParseBad(t *testing.T) {
|
||||
if _, err := Parse([]string{"testdata/dir/rules1-bad.rules"}, true); err == nil {
|
||||
t.Errorf("expected same group error")
|
||||
}
|
||||
if _, err := Parse([]string{"testdata/dir/rules2-bad.rules"}, true); err == nil {
|
||||
t.Errorf("expected template label error")
|
||||
}
|
||||
if _, err := Parse([]string{"testdata/*.yaml"}, true); err == nil {
|
||||
t.Errorf("expected empty group")
|
||||
}
|
||||
@@ -1,5 +1,14 @@
|
||||
package datasource
|
||||
|
||||
import "context"
|
||||
|
||||
// Querier interface wraps Query method which
|
||||
// executes given query and returns list of Metrics
|
||||
// as result
|
||||
type Querier interface {
|
||||
Query(ctx context.Context, query string) ([]Metric, error)
|
||||
}
|
||||
|
||||
// Metric is the basic entity which should be return by datasource
|
||||
// It represents single data point with full list of labels
|
||||
type Metric struct {
|
||||
|
||||
8
app/vmalert/deployment/Dockerfile
Normal file
8
app/vmalert/deployment/Dockerfile
Normal file
@@ -0,0 +1,8 @@
|
||||
ARG base_image
|
||||
FROM $base_image
|
||||
|
||||
EXPOSE 8880
|
||||
|
||||
ENTRYPOINT ["/vmalert-prod"]
|
||||
ARG src_binary
|
||||
COPY $src_binary ./vmalert-prod
|
||||
@@ -8,36 +8,42 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/provider"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rulePath = flagutil.NewArray("rule", `Path to file with alert rules, accepts patterns.
|
||||
Flag can be specified multiple time.
|
||||
rulePath = flagutil.NewArray("rule", `Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule /path/to/file. Path to single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Paths to all yaml files in relative dir folder and absolute yaml file in a root.`)
|
||||
validateAlertAnnotations = flag.Bool("rule.validateAnnotations", true, "Indicates to validate annotation templates")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
datasourceURL = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username to use for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password to use for -datasource.url")
|
||||
evaluationInterval = flag.Duration("evaluationInterval", 1*time.Minute, "How often to evaluate the rules. Default 1m")
|
||||
providerURL = flag.String("provider.url", "", "Prometheus alertmanager url. Required parameter. e.g. http://127.0.0.1:9093")
|
||||
externalURL = flag.String("external.url", "", "Reachable external url. URL is used to generate sharable alert url and in annotation templates")
|
||||
-rule /path/to/file. Path to a single file with alerting rules
|
||||
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
|
||||
absolute path to all .yaml files in root.`)
|
||||
validateTemplates = flag.Bool("rule.validateTemplates", true, "Indicates to validate annotation and label templates")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
|
||||
datasourceURL = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428")
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username to use for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password to use for -datasource.url")
|
||||
remoteWriteURL = flag.String("remotewrite.url", "", "Optional URL to remote-write compatible storage where to write timeseries"+
|
||||
"based on active alerts. E.g. http://127.0.0.1:8428")
|
||||
evaluationInterval = flag.Duration("evaluationInterval", 1*time.Minute, "How often to evaluate the rules. Default 1m")
|
||||
notifierURL = flag.String("notifier.url", "", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
)
|
||||
|
||||
// TODO: hot configuration reload
|
||||
// TODO: alerts state persistence
|
||||
func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
@@ -48,76 +54,129 @@ func main() {
|
||||
if err != nil {
|
||||
logger.Fatalf("can not get external url:%s ", err)
|
||||
}
|
||||
common.InitTemplateFunc(eu)
|
||||
notifier.InitTemplateFunc(eu)
|
||||
|
||||
logger.Infof("reading alert rules configuration file from %s", strings.Join(*rulePath, ";"))
|
||||
alertGroups, err := config.Parse(*rulePath, *validateAlertAnnotations)
|
||||
groups, err := Parse(*rulePath, *validateTemplates)
|
||||
if err != nil {
|
||||
logger.Fatalf("Cannot parse configuration file: %s", err)
|
||||
logger.Fatalf("cannot parse configuration file: %s", err)
|
||||
}
|
||||
|
||||
w := &watchdog{
|
||||
storage: datasource.NewVMStorage(*datasourceURL, *basicAuthUsername, *basicAuthPassword, &http.Client{}),
|
||||
alertProvider: provider.NewAlertManager(*providerURL, func(group, name string) string {
|
||||
return strings.Replace(fmt.Sprintf("%s/%s/%s/status", eu, group, name), "//", "/", -1)
|
||||
alertProvider: notifier.NewAlertManager(*notifierURL, func(group, name string) string {
|
||||
return fmt.Sprintf("%s/api/v1/%s/%s/status", eu, group, name)
|
||||
}, &http.Client{}),
|
||||
}
|
||||
for id := range alertGroups {
|
||||
go func(group common.Group) {
|
||||
w.run(ctx, group, *evaluationInterval)
|
||||
}(alertGroups[id])
|
||||
}
|
||||
go func() {
|
||||
httpserver.Serve(*httpListenAddr, func(w http.ResponseWriter, r *http.Request) bool {
|
||||
panic("not implemented")
|
||||
|
||||
if *remoteWriteURL != "" {
|
||||
c, err := remotewrite.NewClient(ctx, remotewrite.Config{
|
||||
Addr: *remoteWriteURL,
|
||||
FlushInterval: *evaluationInterval,
|
||||
})
|
||||
}()
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init remotewrite client: %s", err)
|
||||
}
|
||||
w.rw = c
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
for i := range groups {
|
||||
wg.Add(1)
|
||||
go func(group Group) {
|
||||
w.run(ctx, group, *evaluationInterval)
|
||||
wg.Done()
|
||||
}(groups[i])
|
||||
}
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, (&requestHandler{groups: groups}).handler)
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("service received signal %s", sig)
|
||||
if err := httpserver.Stop(*httpListenAddr); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
cancel()
|
||||
w.stop()
|
||||
if w.rw != nil {
|
||||
err := w.rw.Close()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot stop the remotewrite: %s", err)
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
type watchdog struct {
|
||||
storage *datasource.VMStorage
|
||||
alertProvider provider.AlertProvider
|
||||
alertProvider notifier.Notifier
|
||||
rw *remotewrite.Client
|
||||
}
|
||||
|
||||
func (w *watchdog) run(ctx context.Context, a common.Group, evaluationInterval time.Duration) {
|
||||
logger.Infof("watchdog for %s has been run", a.Name)
|
||||
var (
|
||||
iterationTotal = metrics.NewCounter(`vmalert_iteration_total`)
|
||||
iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
|
||||
|
||||
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
|
||||
alertsSent = metrics.NewCounter(`vmalert_alerts_sent_total`)
|
||||
alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
|
||||
|
||||
remoteWriteSent = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
)
|
||||
|
||||
func (w *watchdog) run(ctx context.Context, group Group, evaluationInterval time.Duration) {
|
||||
logger.Infof("watchdog for %s has been started", group.Name)
|
||||
t := time.NewTicker(evaluationInterval)
|
||||
var metrics []datasource.Metric
|
||||
var err error
|
||||
var alerts []common.Alert
|
||||
defer t.Stop()
|
||||
for {
|
||||
|
||||
select {
|
||||
case <-t.C:
|
||||
start := time.Now()
|
||||
for _, r := range a.Rules {
|
||||
if metrics, err = w.storage.Query(ctx, r.Expr); err != nil {
|
||||
logger.Errorf("error reading metrics %s", err)
|
||||
continue
|
||||
}
|
||||
// todo check for and calculate alert states
|
||||
if len(metrics) < 1 {
|
||||
continue
|
||||
}
|
||||
// todo define alert end time
|
||||
alerts = common.AlertsFromMetrics(metrics, a.Name, r, start, time.Time{})
|
||||
// todo save to storage
|
||||
if err := w.alertProvider.Send(alerts); err != nil {
|
||||
logger.Errorf("error sending alerts %s", err)
|
||||
continue
|
||||
}
|
||||
// todo is alert still active/pending?
|
||||
}
|
||||
iterationTotal.Inc()
|
||||
iterationStart := time.Now()
|
||||
for _, rule := range group.Rules {
|
||||
execTotal.Inc()
|
||||
|
||||
execStart := time.Now()
|
||||
err := rule.Exec(ctx, w.storage)
|
||||
execDuration.UpdateDuration(execStart)
|
||||
|
||||
if err != nil {
|
||||
execErrors.Inc()
|
||||
logger.Errorf("failed to execute rule %q.%q: %s", group.Name, rule.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
var alertsToSend []notifier.Alert
|
||||
for _, a := range rule.alerts {
|
||||
if a.State != notifier.StatePending {
|
||||
alertsToSend = append(alertsToSend, *a)
|
||||
}
|
||||
if a.State == notifier.StateInactive || w.rw == nil {
|
||||
continue
|
||||
}
|
||||
tss := rule.AlertToTimeSeries(a, execStart)
|
||||
for _, ts := range tss {
|
||||
remoteWriteSent.Inc()
|
||||
if err := w.rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
logger.Errorf("failed to push timeseries to remotewrite: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
alertsSent.Add(len(alertsToSend))
|
||||
if err := w.alertProvider.Send(alertsToSend); err != nil {
|
||||
alertsSendErrors.Inc()
|
||||
logger.Errorf("failed to send alert for rule %q.%q: %s", group.Name, rule.Name, err)
|
||||
}
|
||||
}
|
||||
iterationDuration.UpdateDuration(iterationStart)
|
||||
case <-ctx.Done():
|
||||
logger.Infof("%s receive stop signal", a.Name)
|
||||
logger.Infof("%s received stop signal", group.Name)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -142,14 +201,10 @@ func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL
|
||||
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
|
||||
}
|
||||
|
||||
func (w *watchdog) stop() {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func checkFlags() {
|
||||
if *providerURL == "" {
|
||||
if *notifierURL == "" {
|
||||
flag.PrintDefaults()
|
||||
logger.Fatalf("provider.url is empty")
|
||||
logger.Fatalf("notifier.url is empty")
|
||||
}
|
||||
if *datasourceURL == "" {
|
||||
flag.PrintDefaults()
|
||||
|
||||
105
app/vmalert/notifier/alert.go
Normal file
105
app/vmalert/notifier/alert.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Alert the triggered alert
|
||||
// TODO: Looks like alert name isn't unique
|
||||
type Alert struct {
|
||||
Group string
|
||||
Name string
|
||||
Labels map[string]string
|
||||
Annotations map[string]string
|
||||
State AlertState
|
||||
|
||||
Start time.Time
|
||||
End time.Time
|
||||
Value float64
|
||||
ID uint64
|
||||
}
|
||||
|
||||
// AlertState type indicates the Alert state
|
||||
type AlertState int
|
||||
|
||||
const (
|
||||
// StateInactive is the state of an alert that is neither firing nor pending.
|
||||
StateInactive AlertState = iota
|
||||
// StatePending is the state of an alert that has been active for less than
|
||||
// the configured threshold duration.
|
||||
StatePending
|
||||
// StateFiring is the state of an alert that has been active for longer than
|
||||
// the configured threshold duration.
|
||||
StateFiring
|
||||
)
|
||||
|
||||
// String stringer for AlertState
|
||||
func (as AlertState) String() string {
|
||||
switch as {
|
||||
case StateFiring:
|
||||
return "firing"
|
||||
case StatePending:
|
||||
return "pending"
|
||||
}
|
||||
return "inactive"
|
||||
}
|
||||
|
||||
type alertTplData struct {
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
}
|
||||
|
||||
const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}`
|
||||
|
||||
// ExecTemplate executes the Alert template for give
|
||||
// map of annotations.
|
||||
func (a *Alert) ExecTemplate(annotations map[string]string) (map[string]string, error) {
|
||||
tplData := alertTplData{Value: a.Value, Labels: a.Labels}
|
||||
return templateAnnotations(annotations, tplHeader, tplData)
|
||||
}
|
||||
|
||||
// ValidateTemplates validate annotations for possible template error, uses empty data for template population
|
||||
func ValidateTemplates(annotations map[string]string) error {
|
||||
_, err := templateAnnotations(annotations, tplHeader, alertTplData{
|
||||
Labels: map[string]string{},
|
||||
Value: 0,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func templateAnnotations(annotations map[string]string, header string, data alertTplData) (map[string]string, error) {
|
||||
var builder strings.Builder
|
||||
var buf bytes.Buffer
|
||||
eg := errGroup{}
|
||||
r := make(map[string]string, len(annotations))
|
||||
for key, text := range annotations {
|
||||
r[key] = text
|
||||
buf.Reset()
|
||||
builder.Reset()
|
||||
builder.Grow(len(header) + len(text))
|
||||
builder.WriteString(header)
|
||||
builder.WriteString(text)
|
||||
if err := templateAnnotation(&buf, builder.String(), data); err != nil {
|
||||
eg.errs = append(eg.errs, fmt.Sprintf("key %s, template %s:%s", key, text, err))
|
||||
continue
|
||||
}
|
||||
r[key] = buf.String()
|
||||
}
|
||||
return r, eg.err()
|
||||
}
|
||||
|
||||
func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
|
||||
tpl, err := template.New("").Funcs(tmplFunc).Option("missingkey=zero").Parse(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing annotation:%w", err)
|
||||
}
|
||||
if err = tpl.Execute(dst, data); err != nil {
|
||||
return fmt.Errorf("error evaluating annotation template:%w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
65
app/vmalert/notifier/alert_test.go
Normal file
65
app/vmalert/notifier/alert_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAlert_ExecTemplate(t *testing.T) {
|
||||
testCases := []struct {
|
||||
alert *Alert
|
||||
annotations map[string]string
|
||||
expTpl map[string]string
|
||||
}{
|
||||
{
|
||||
alert: &Alert{},
|
||||
annotations: map[string]string{},
|
||||
expTpl: map[string]string{},
|
||||
},
|
||||
{
|
||||
alert: &Alert{
|
||||
Value: 1e4,
|
||||
Labels: map[string]string{
|
||||
"instance": "localhost",
|
||||
},
|
||||
},
|
||||
annotations: map[string]string{},
|
||||
expTpl: map[string]string{},
|
||||
},
|
||||
{
|
||||
alert: &Alert{
|
||||
Value: 1e4,
|
||||
Labels: map[string]string{
|
||||
"job": "staging",
|
||||
"instance": "localhost",
|
||||
},
|
||||
},
|
||||
annotations: map[string]string{
|
||||
"summary": "Too high connection number for {{$labels.instance}} for job {{$labels.job}}",
|
||||
"description": "It is {{ $value }} connections for {{$labels.instance}}",
|
||||
},
|
||||
expTpl: map[string]string{
|
||||
"summary": "Too high connection number for localhost for job staging",
|
||||
"description": "It is 10000 connections for localhost",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
tpl, err := tc.alert.ExecTemplate(tc.annotations)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(tpl) != len(tc.expTpl) {
|
||||
t.Fatalf("expected %d elements; got %d", len(tc.expTpl), len(tpl))
|
||||
}
|
||||
for k := range tc.expTpl {
|
||||
got, exp := tpl[k], tc.expTpl[k]
|
||||
if got != exp {
|
||||
t.Fatalf("expected %q=%q; got %q=%q", k, exp, k, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
51
app/vmalert/notifier/alertmanager.go
Normal file
51
app/vmalert/notifier/alertmanager.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// AlertManager represents integration provider with Prometheus alert manager
|
||||
// https://github.com/prometheus/alertmanager
|
||||
type AlertManager struct {
|
||||
alertURL string
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// Send an alert or resolve message
|
||||
func (am *AlertManager) Send(alerts []Alert) error {
|
||||
b := &bytes.Buffer{}
|
||||
writeamRequest(b, alerts, am.argFunc)
|
||||
resp, err := am.client.Post(am.alertURL, "application/json", b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read response from %q: %s", am.alertURL, err)
|
||||
}
|
||||
return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, am.alertURL, string(body))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AlertURLGenerator returns URL to single alert by given name
|
||||
type AlertURLGenerator func(group, id string) string
|
||||
|
||||
const alertManagerPath = "/api/v2/alerts"
|
||||
|
||||
// NewAlertManager is a constructor for AlertManager
|
||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, c *http.Client) *AlertManager {
|
||||
return &AlertManager{
|
||||
alertURL: strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath,
|
||||
argFunc: fn,
|
||||
client: c,
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,22 @@
|
||||
{% import (
|
||||
"strconv"
|
||||
"time"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/common"
|
||||
) %}
|
||||
{% stripspace %}
|
||||
|
||||
{% func amRequest(alerts []common.Alert, generatorURL func(string, string) string) %}
|
||||
{% func amRequest(alerts []Alert, generatorURL func(string, string) string) %}
|
||||
[
|
||||
{% for i, alert := range alerts %}
|
||||
{
|
||||
"startsAt":{%q= alert.Start.Format(time.RFC3339Nano) %},
|
||||
"generatorURL": {%q= generatorURL(alert.Group, alert.Name) %},
|
||||
"generatorURL": {%q= generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)) %},
|
||||
{% if !alert.End.IsZero() %}
|
||||
"endsAt":{%q= alert.End.Format(time.RFC3339Nano) %},
|
||||
{% endif %}
|
||||
"labels": {
|
||||
"alertname":{%q= alert.Name %}
|
||||
{% for _,v := range alert.Labels %}
|
||||
,{%q= v.Name %}:{%q= v.Value %}
|
||||
{% for k,v := range alert.Labels %}
|
||||
,{%q= k %}:{%q= v %}
|
||||
{% endfor %}
|
||||
},
|
||||
"annotations": {
|
||||
131
app/vmalert/notifier/alertmanager_request.qtpl.go
Normal file
131
app/vmalert/notifier/alertmanager_request.qtpl.go
Normal file
@@ -0,0 +1,131 @@
|
||||
// Code generated by qtc from "alertmanager_request.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:1
|
||||
package notifier
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:1
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:7
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:7
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:7
|
||||
func streamamRequest(qw422016 *qt422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:7
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:9
|
||||
for i, alert := range alerts {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:9
|
||||
qw422016.N().S(`{"startsAt":`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:11
|
||||
qw422016.N().Q(alert.Start.Format(time.RFC3339Nano))
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:11
|
||||
qw422016.N().S(`,"generatorURL":`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:12
|
||||
qw422016.N().Q(generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)))
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:12
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:13
|
||||
if !alert.End.IsZero() {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:13
|
||||
qw422016.N().S(`"endsAt":`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:14
|
||||
qw422016.N().Q(alert.End.Format(time.RFC3339Nano))
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:14
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:15
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:15
|
||||
qw422016.N().S(`"labels": {"alertname":`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:17
|
||||
qw422016.N().Q(alert.Name)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:18
|
||||
for k, v := range alert.Labels {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:18
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:19
|
||||
qw422016.N().Q(k)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:19
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:19
|
||||
qw422016.N().Q(v)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:20
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:20
|
||||
qw422016.N().S(`},"annotations": {`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:23
|
||||
c := len(alert.Annotations)
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:24
|
||||
for k, v := range alert.Annotations {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:25
|
||||
c = c - 1
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:26
|
||||
qw422016.N().Q(k)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:26
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:26
|
||||
qw422016.N().Q(v)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:26
|
||||
if c > 0 {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:26
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:26
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:27
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:27
|
||||
qw422016.N().S(`}}`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:30
|
||||
if i != len(alerts)-1 {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:30
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:30
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:31
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:31
|
||||
qw422016.N().S(`]`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
}
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
func writeamRequest(qq422016 qtio422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
streamamRequest(qw422016, alerts, generatorURL)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
}
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
func amRequest(alerts []Alert, generatorURL func(string, string) string) string {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
writeamRequest(qb422016, alerts, generatorURL)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
return qs422016
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:33
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package provider
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
@@ -6,8 +6,6 @@ import (
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/common"
|
||||
)
|
||||
|
||||
func TestAlertManager_Send(t *testing.T) {
|
||||
@@ -16,7 +14,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc(alertsPath, func(w http.ResponseWriter, r *http.Request) {
|
||||
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("expected POST method got %s", r.Method)
|
||||
@@ -42,7 +40,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
if len(a) != 1 {
|
||||
t.Errorf("expected 1 alert in array got %d", len(a))
|
||||
}
|
||||
if a[0].GeneratorURL != "group0alert0" {
|
||||
if a[0].GeneratorURL != "group0" {
|
||||
t.Errorf("exptected alert0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
}
|
||||
if a[0].Labels["alertname"] != "alert0" {
|
||||
@@ -61,14 +59,14 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
am := NewAlertManager(srv.URL, func(group, name string) string {
|
||||
return group + name
|
||||
}, srv.Client())
|
||||
if err := am.Send([]common.Alert{{}, {}}); err == nil {
|
||||
if err := am.Send([]Alert{{}, {}}); err == nil {
|
||||
t.Error("expected connection error got nil")
|
||||
}
|
||||
if err := am.Send([]common.Alert{}); err == nil {
|
||||
if err := am.Send([]Alert{}); err == nil {
|
||||
t.Error("expected wrong http code error got nil")
|
||||
}
|
||||
if err := am.Send([]common.Alert{{
|
||||
Group: "group0",
|
||||
if err := am.Send([]Alert{{
|
||||
Group: "group",
|
||||
Name: "alert0",
|
||||
Start: time.Now().UTC(),
|
||||
End: time.Now().UTC(),
|
||||
6
app/vmalert/notifier/notifier.go
Normal file
6
app/vmalert/notifier/notifier.go
Normal file
@@ -0,0 +1,6 @@
|
||||
package notifier
|
||||
|
||||
// Notifier is common interface for alert manager provider
|
||||
type Notifier interface {
|
||||
Send(alerts []Alert) error
|
||||
}
|
||||
@@ -11,7 +11,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package common
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
21
app/vmalert/notifier/utils.go
Normal file
21
app/vmalert/notifier/utils.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type errGroup struct {
|
||||
errs []string
|
||||
}
|
||||
|
||||
func (eg *errGroup) err() error {
|
||||
if eg == nil || len(eg.errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return eg
|
||||
}
|
||||
|
||||
func (eg *errGroup) Error() string {
|
||||
return fmt.Sprintf("errors:%s", strings.Join(eg.errs, "\n"))
|
||||
}
|
||||
@@ -1,131 +0,0 @@
|
||||
// Code generated by qtc from "alert_manager_request.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:1
|
||||
package provider
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:1
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/common"
|
||||
"time"
|
||||
)
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:7
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:7
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:7
|
||||
func streamamRequest(qw422016 *qt422016.Writer, alerts []common.Alert, generatorURL func(string, string) string) {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:7
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:9
|
||||
for i, alert := range alerts {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:9
|
||||
qw422016.N().S(`{"startsAt":`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:11
|
||||
qw422016.N().Q(alert.Start.Format(time.RFC3339Nano))
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:11
|
||||
qw422016.N().S(`,"generatorURL":`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:12
|
||||
qw422016.N().Q(generatorURL(alert.Group, alert.Name))
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:12
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:13
|
||||
if !alert.End.IsZero() {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:13
|
||||
qw422016.N().S(`"endsAt":`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:14
|
||||
qw422016.N().Q(alert.End.Format(time.RFC3339Nano))
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:14
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:15
|
||||
}
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:15
|
||||
qw422016.N().S(`"labels": {"alertname":`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:17
|
||||
qw422016.N().Q(alert.Name)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:18
|
||||
for _, v := range alert.Labels {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:18
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:19
|
||||
qw422016.N().Q(v.Name)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:19
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:19
|
||||
qw422016.N().Q(v.Value)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:20
|
||||
}
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:20
|
||||
qw422016.N().S(`},"annotations": {`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:23
|
||||
c := len(alert.Annotations)
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:24
|
||||
for k, v := range alert.Annotations {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:25
|
||||
c = c - 1
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:26
|
||||
qw422016.N().Q(k)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:26
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:26
|
||||
qw422016.N().Q(v)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:26
|
||||
if c > 0 {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:26
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:26
|
||||
}
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:27
|
||||
}
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:27
|
||||
qw422016.N().S(`}}`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:30
|
||||
if i != len(alerts)-1 {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:30
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:30
|
||||
}
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:31
|
||||
}
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:31
|
||||
qw422016.N().S(`]`)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
}
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
func writeamRequest(qq422016 qtio422016.Writer, alerts []common.Alert, generatorURL func(string, string) string) {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
streamamRequest(qw422016, alerts, generatorURL)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
}
|
||||
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
func amRequest(alerts []common.Alert, generatorURL func(string, string) string) string {
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
writeamRequest(qb422016, alerts, generatorURL)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
return qs422016
|
||||
//line app/vmalert/provider/alert_manager_request.qtpl:33
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
package provider
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
const alertsPath = "/api/v2/alerts"
|
||||
|
||||
// AlertProvider is common interface for alert manager provider
|
||||
type AlertProvider interface {
|
||||
Send(alerts []common.Alert) error
|
||||
}
|
||||
|
||||
var pool = sync.Pool{New: func() interface{} {
|
||||
return &bytes.Buffer{}
|
||||
}}
|
||||
|
||||
// AlertManager represents integration provider with Prometheus alert manager
|
||||
type AlertManager struct {
|
||||
alertURL string
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// AlertURLGenerator returns URL to single alert by given name
|
||||
type AlertURLGenerator func(group, name string) string
|
||||
|
||||
// NewAlertManager is a constructor for AlertManager
|
||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, c *http.Client) *AlertManager {
|
||||
return &AlertManager{
|
||||
alertURL: strings.TrimSuffix(alertManagerURL, "/") + alertsPath,
|
||||
argFunc: fn,
|
||||
client: c,
|
||||
}
|
||||
}
|
||||
|
||||
// Send an alert or resolve message
|
||||
func (am *AlertManager) Send(alerts []common.Alert) error {
|
||||
b := pool.Get().(*bytes.Buffer)
|
||||
b.Reset()
|
||||
defer pool.Put(b)
|
||||
writeamRequest(b, alerts, am.argFunc)
|
||||
resp, err := am.client.Post(am.alertURL, "application/json", b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
b.Reset()
|
||||
if _, err := io.Copy(b, resp.Body); err != nil {
|
||||
logger.Errorf("unable to copy error response body to buffer %s", err)
|
||||
}
|
||||
return fmt.Errorf("invalid response from alertmanager %s", b)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
187
app/vmalert/remotewrite/remotewrite.go
Normal file
187
app/vmalert/remotewrite/remotewrite.go
Normal file
@@ -0,0 +1,187 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
// Client is an asynchronous HTTP client for writing
|
||||
// timeseries via remote write protocol.
|
||||
type Client struct {
|
||||
addr string
|
||||
c *http.Client
|
||||
input chan prompbmarshal.TimeSeries
|
||||
baUser, baPass string
|
||||
flushInterval time.Duration
|
||||
maxBatchSize int
|
||||
maxQueueSize int
|
||||
|
||||
wg sync.WaitGroup
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
// Addr of remote storage
|
||||
Addr string
|
||||
|
||||
BasicAuthUser string
|
||||
BasicAuthPass string
|
||||
|
||||
// MaxBatchSize defines max number of timeseries
|
||||
// to be flushed at once
|
||||
MaxBatchSize int
|
||||
// MaxQueueSize defines max length of input queue
|
||||
// populated by Push method
|
||||
MaxQueueSize int
|
||||
// FlushInterval defines time interval for flushing batches
|
||||
FlushInterval time.Duration
|
||||
// WriteTimeout defines timeout for HTTP write request
|
||||
// to remote storage
|
||||
WriteTimeout time.Duration
|
||||
}
|
||||
|
||||
const (
|
||||
defaultMaxBatchSize = 1e3
|
||||
defaultMaxQueueSize = 100
|
||||
defaultFlushInterval = 5 * time.Second
|
||||
defaultWriteTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
const writePath = "/api/v1/write"
|
||||
|
||||
// NewClient returns asynchronous client for
|
||||
// writing timeseries via remotewrite protocol.
|
||||
func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
if cfg.Addr == "" {
|
||||
return nil, fmt.Errorf("config.Addr can't be empty")
|
||||
}
|
||||
if cfg.MaxBatchSize == 0 {
|
||||
cfg.MaxBatchSize = defaultMaxBatchSize
|
||||
}
|
||||
if cfg.MaxQueueSize == 0 {
|
||||
cfg.MaxQueueSize = defaultMaxQueueSize
|
||||
}
|
||||
if cfg.FlushInterval == 0 {
|
||||
cfg.FlushInterval = defaultFlushInterval
|
||||
}
|
||||
if cfg.WriteTimeout == 0 {
|
||||
cfg.WriteTimeout = defaultWriteTimeout
|
||||
}
|
||||
c := &Client{
|
||||
c: &http.Client{
|
||||
Timeout: cfg.WriteTimeout,
|
||||
},
|
||||
addr: strings.TrimSuffix(cfg.Addr, "/") + writePath,
|
||||
baUser: cfg.BasicAuthUser,
|
||||
baPass: cfg.BasicAuthPass,
|
||||
flushInterval: cfg.FlushInterval,
|
||||
maxBatchSize: cfg.MaxBatchSize,
|
||||
doneCh: make(chan struct{}),
|
||||
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
|
||||
}
|
||||
c.run(ctx)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Push adds timeseries into queue for writing into remote storage.
|
||||
// Push returns and error if client is stopped or if queue is full.
|
||||
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
return fmt.Errorf("client is closed")
|
||||
case c.input <- s:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("failed to push timeseries - queue is full (%d entries)",
|
||||
c.maxQueueSize)
|
||||
}
|
||||
}
|
||||
|
||||
// Close stops the client and waits for all goroutines
|
||||
// to exit.
|
||||
func (c *Client) Close() error {
|
||||
if c.doneCh == nil {
|
||||
return fmt.Errorf("client is already closed")
|
||||
}
|
||||
close(c.input)
|
||||
close(c.doneCh)
|
||||
c.wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) run(ctx context.Context) {
|
||||
ticker := time.NewTicker(c.flushInterval)
|
||||
wr := prompbmarshal.WriteRequest{}
|
||||
shutdown := func() {
|
||||
for ts := range c.input {
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
}
|
||||
lastCtx, cancel := context.WithTimeout(context.Background(), time.Second*10)
|
||||
c.flush(lastCtx, wr)
|
||||
cancel()
|
||||
}
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
shutdown()
|
||||
return
|
||||
case <-ctx.Done():
|
||||
shutdown()
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.flush(ctx, wr)
|
||||
wr = prompbmarshal.WriteRequest{}
|
||||
case ts := <-c.input:
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||
c.flush(ctx, wr)
|
||||
wr = prompbmarshal.WriteRequest{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (c *Client) flush(ctx context.Context, wr prompbmarshal.WriteRequest) {
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
data, err := wr.Marshal()
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal WriteRequest: %s", err)
|
||||
return
|
||||
}
|
||||
req, err := http.NewRequest("POST", c.addr, bytes.NewReader(snappy.Encode(nil, data)))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to create new HTTP request: %s", err)
|
||||
return
|
||||
}
|
||||
if c.baPass != "" {
|
||||
req.SetBasicAuth(c.baUser, c.baPass)
|
||||
}
|
||||
resp, err := c.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
logger.Errorf("error getting response from %s:%s", req.URL, err)
|
||||
return
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusNoContent {
|
||||
body, _ := ioutil.ReadAll(resp.Body)
|
||||
logger.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL, body)
|
||||
return
|
||||
}
|
||||
}
|
||||
269
app/vmalert/rule.go
Normal file
269
app/vmalert/rule.go
Normal file
@@ -0,0 +1,269 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
// Group grouping array of alert
|
||||
type Group struct {
|
||||
Name string
|
||||
Rules []*Rule
|
||||
}
|
||||
|
||||
// Rule is basic alert entity
|
||||
type Rule struct {
|
||||
Name string `yaml:"alert"`
|
||||
Expr string `yaml:"expr"`
|
||||
For time.Duration `yaml:"for"`
|
||||
Labels map[string]string `yaml:"labels"`
|
||||
Annotations map[string]string `yaml:"annotations"`
|
||||
|
||||
group *Group
|
||||
|
||||
// guard status fields
|
||||
mu sync.RWMutex
|
||||
// stores list of active alerts
|
||||
alerts map[uint64]*notifier.Alert
|
||||
// stores last moment of time Exec was called
|
||||
lastExecTime time.Time
|
||||
// stores last error that happened in Exec func
|
||||
// resets on every successful Exec
|
||||
// may be used as Health state
|
||||
lastExecError error
|
||||
}
|
||||
|
||||
// Validate validates rule
|
||||
func (r *Rule) Validate() error {
|
||||
if r.Name == "" {
|
||||
return errors.New("rule name can not be empty")
|
||||
}
|
||||
if r.Expr == "" {
|
||||
return fmt.Errorf("expression for rule %q can't be empty", r.Name)
|
||||
}
|
||||
if _, err := metricsql.Parse(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q: %w", r.Name, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Exec executes Rule expression via the given Querier.
|
||||
// Based on the Querier results Rule maintains notifier.Alerts
|
||||
func (r *Rule) Exec(ctx context.Context, q datasource.Querier) error {
|
||||
qMetrics, err := q.Query(ctx, r.Expr)
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
r.lastExecError = err
|
||||
r.lastExecTime = time.Now()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute query %q: %s", r.Expr, err)
|
||||
}
|
||||
|
||||
for h, a := range r.alerts {
|
||||
// cleanup inactive alerts from previous Eval
|
||||
if a.State == notifier.StateInactive {
|
||||
delete(r.alerts, h)
|
||||
}
|
||||
}
|
||||
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
h := hash(m)
|
||||
updated[h] = struct{}{}
|
||||
if _, ok := r.alerts[h]; ok {
|
||||
continue
|
||||
}
|
||||
a, err := r.newAlert(m)
|
||||
if err != nil {
|
||||
r.lastExecError = err
|
||||
return fmt.Errorf("failed to create alert: %s", err)
|
||||
}
|
||||
a.ID = h
|
||||
a.State = notifier.StatePending
|
||||
r.alerts[h] = a
|
||||
}
|
||||
|
||||
for h, a := range r.alerts {
|
||||
// if alert wasn't updated in this iteration
|
||||
// means it is resolved already
|
||||
if _, ok := updated[h]; !ok {
|
||||
a.State = notifier.StateInactive
|
||||
// set endTime to last execution time
|
||||
// so it can be sent by notifier on next step
|
||||
a.End = r.lastExecTime
|
||||
continue
|
||||
}
|
||||
if a.State == notifier.StatePending && time.Since(a.Start) >= r.For {
|
||||
a.State = notifier.StateFiring
|
||||
alertsFired.Inc()
|
||||
}
|
||||
if a.State == notifier.StateFiring {
|
||||
a.End = r.lastExecTime.Add(3 * *evaluationInterval)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: consider hashing algorithm in VM
|
||||
func hash(m datasource.Metric) uint64 {
|
||||
hash := fnv.New64a()
|
||||
labels := m.Labels
|
||||
sort.Slice(labels, func(i, j int) bool {
|
||||
return labels[i].Name < labels[j].Name
|
||||
})
|
||||
for _, l := range labels {
|
||||
hash.Write([]byte(l.Name))
|
||||
hash.Write([]byte(l.Value))
|
||||
hash.Write([]byte("\xff"))
|
||||
}
|
||||
return hash.Sum64()
|
||||
}
|
||||
|
||||
func (r *Rule) newAlert(m datasource.Metric) (*notifier.Alert, error) {
|
||||
a := ¬ifier.Alert{
|
||||
Group: r.group.Name,
|
||||
Name: r.Name,
|
||||
Labels: map[string]string{},
|
||||
Value: m.Value,
|
||||
Start: time.Now(),
|
||||
// TODO: support End time
|
||||
}
|
||||
|
||||
// 1. use data labels
|
||||
for _, l := range m.Labels {
|
||||
a.Labels[l.Name] = l.Value
|
||||
}
|
||||
|
||||
// 2. template rule labels with data labels
|
||||
rLabels, err := a.ExecTemplate(r.Labels)
|
||||
if err != nil {
|
||||
return a, err
|
||||
}
|
||||
|
||||
// 3. merge data labels and rule labels
|
||||
// metric labels may be overridden by
|
||||
// rule labels
|
||||
for k, v := range rLabels {
|
||||
a.Labels[k] = v
|
||||
}
|
||||
|
||||
// 4. template merged labels
|
||||
a.Labels, err = a.ExecTemplate(a.Labels)
|
||||
if err != nil {
|
||||
return a, err
|
||||
}
|
||||
|
||||
a.Annotations, err = a.ExecTemplate(r.Annotations)
|
||||
return a, err
|
||||
}
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
||||
func (r *Rule) AlertAPI(id uint64) *APIAlert {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
a, ok := r.alerts[id]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return r.newAlertAPI(*a)
|
||||
}
|
||||
|
||||
// AlertsAPI generates list of APIAlert objects from existing alerts
|
||||
func (r *Rule) AlertsAPI() []*APIAlert {
|
||||
var alerts []*APIAlert
|
||||
r.mu.RLock()
|
||||
for _, a := range r.alerts {
|
||||
alerts = append(alerts, r.newAlertAPI(*a))
|
||||
}
|
||||
r.mu.RUnlock()
|
||||
return alerts
|
||||
}
|
||||
|
||||
func (r *Rule) newAlertAPI(a notifier.Alert) *APIAlert {
|
||||
return &APIAlert{
|
||||
ID: a.ID,
|
||||
Name: a.Name,
|
||||
Group: a.Group,
|
||||
Expression: r.Expr,
|
||||
Labels: a.Labels,
|
||||
Annotations: a.Annotations,
|
||||
State: a.State.String(),
|
||||
ActiveAt: a.Start,
|
||||
Value: strconv.FormatFloat(a.Value, 'e', -1, 64),
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
// AlertMetricName is the metric name for synthetic alert timeseries.
|
||||
alertMetricName = "ALERTS"
|
||||
// AlertForStateMetricName is the metric name for 'for' state of alert.
|
||||
alertForStateMetricName = "ALERTS_FOR_STATE"
|
||||
|
||||
// AlertNameLabel is the label name indicating the name of an alert.
|
||||
alertNameLabel = "alertname"
|
||||
// AlertStateLabel is the label name indicating the state of an alert.
|
||||
alertStateLabel = "alertstate"
|
||||
)
|
||||
|
||||
func (r *Rule) AlertToTimeSeries(a *notifier.Alert, timestamp time.Time) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
tss = append(tss, alertToTimeSeries(r.Name, a, timestamp))
|
||||
if r.For > 0 {
|
||||
tss = append(tss, alertForToTimeSeries(r.Name, a, timestamp))
|
||||
}
|
||||
return tss
|
||||
}
|
||||
|
||||
func alertToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
|
||||
labels := make(map[string]string)
|
||||
for k, v := range a.Labels {
|
||||
labels[k] = v
|
||||
}
|
||||
labels["__name__"] = alertMetricName
|
||||
labels[alertNameLabel] = name
|
||||
labels[alertStateLabel] = a.State.String()
|
||||
return newTimeSeries(1, labels, timestamp)
|
||||
}
|
||||
|
||||
func alertForToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
|
||||
labels := make(map[string]string)
|
||||
for k, v := range a.Labels {
|
||||
labels[k] = v
|
||||
}
|
||||
labels["__name__"] = alertForStateMetricName
|
||||
labels[alertNameLabel] = name
|
||||
return newTimeSeries(float64(a.Start.Unix()), labels, timestamp)
|
||||
}
|
||||
|
||||
func newTimeSeries(value float64, labels map[string]string, timestamp time.Time) prompbmarshal.TimeSeries {
|
||||
ts := prompbmarshal.TimeSeries{}
|
||||
ts.Samples = append(ts.Samples, prompbmarshal.Sample{
|
||||
Value: value,
|
||||
Timestamp: timestamp.UnixNano() / 1e6,
|
||||
})
|
||||
keys := make([]string, 0, len(labels))
|
||||
for k := range labels {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, key := range keys {
|
||||
ts.Labels = append(ts.Labels, prompbmarshal.Label{
|
||||
Name: key,
|
||||
Value: labels[key],
|
||||
})
|
||||
}
|
||||
return ts
|
||||
}
|
||||
399
app/vmalert/rule_test.go
Normal file
399
app/vmalert/rule_test.go
Normal file
@@ -0,0 +1,399 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func TestRule_Validate(t *testing.T) {
|
||||
if err := (&Rule{}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty name error")
|
||||
}
|
||||
if err := (&Rule{Name: "alert"}).Validate(); err == nil {
|
||||
t.Errorf("exptected empty expr error")
|
||||
}
|
||||
if err := (&Rule{Name: "alert", Expr: "test{"}).Validate(); err == nil {
|
||||
t.Errorf("exptected invalid expr error")
|
||||
}
|
||||
if err := (&Rule{Name: "alert", Expr: "test>0"}).Validate(); err != nil {
|
||||
t.Errorf("exptected valid rule got %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRule_AlertToTimeSeries(t *testing.T) {
|
||||
timestamp := time.Now()
|
||||
testCases := []struct {
|
||||
rule *Rule
|
||||
alert *notifier.Alert
|
||||
expTS []prompbmarshal.TimeSeries
|
||||
}{
|
||||
{
|
||||
newTestRule("instant", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries(1, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
alertNameLabel: "instant",
|
||||
}, timestamp),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("instant extra labels", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries(1, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
alertNameLabel: "instant extra labels",
|
||||
"job": "foo",
|
||||
"instance": "bar",
|
||||
}, timestamp),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("instant labels override", 0),
|
||||
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
||||
alertStateLabel: "foo",
|
||||
"__name__": "bar",
|
||||
}},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries(1, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
alertNameLabel: "instant labels override",
|
||||
}, timestamp),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for", time.Second),
|
||||
¬ifier.Alert{State: notifier.StateFiring, Start: timestamp.Add(time.Second)},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries(1, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StateFiring.String(),
|
||||
alertNameLabel: "for",
|
||||
}, timestamp),
|
||||
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
|
||||
"__name__": alertForStateMetricName,
|
||||
alertNameLabel: "for",
|
||||
}, timestamp),
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for pending", 10*time.Second),
|
||||
¬ifier.Alert{State: notifier.StatePending, Start: timestamp.Add(time.Second)},
|
||||
[]prompbmarshal.TimeSeries{
|
||||
newTimeSeries(1, map[string]string{
|
||||
"__name__": alertMetricName,
|
||||
alertStateLabel: notifier.StatePending.String(),
|
||||
alertNameLabel: "for pending",
|
||||
}, timestamp),
|
||||
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
|
||||
"__name__": alertForStateMetricName,
|
||||
alertNameLabel: "for pending",
|
||||
}, timestamp),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
tss := tc.rule.AlertToTimeSeries(tc.alert, timestamp)
|
||||
if len(tc.expTS) != len(tss) {
|
||||
t.Fatalf("expected number of timeseries %d; got %d", len(tc.expTS), len(tss))
|
||||
}
|
||||
for i := range tc.expTS {
|
||||
expTS, gotTS := tc.expTS[i], tss[i]
|
||||
if len(expTS.Samples) != len(gotTS.Samples) {
|
||||
t.Fatalf("expected number of samples %d; got %d", len(expTS.Samples), len(gotTS.Samples))
|
||||
}
|
||||
for i, exp := range expTS.Samples {
|
||||
got := gotTS.Samples[i]
|
||||
if got.Value != exp.Value {
|
||||
t.Errorf("expected value %.2f; got %.2f", exp.Value, got.Value)
|
||||
}
|
||||
if got.Timestamp != exp.Timestamp {
|
||||
t.Errorf("expected timestamp %d; got %d", exp.Timestamp, got.Timestamp)
|
||||
}
|
||||
}
|
||||
if len(expTS.Labels) != len(gotTS.Labels) {
|
||||
t.Fatalf("expected number of labels %d; got %d", len(expTS.Labels), len(gotTS.Labels))
|
||||
}
|
||||
for i, exp := range expTS.Labels {
|
||||
got := gotTS.Labels[i]
|
||||
if got.Name != exp.Name {
|
||||
t.Errorf("expected label name %q; got %q", exp.Name, got.Name)
|
||||
}
|
||||
if got.Value != exp.Value {
|
||||
t.Errorf("expected label value %q; got %q", exp.Value, got.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func newTestRule(name string, waitFor time.Duration) *Rule {
|
||||
return &Rule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor}
|
||||
}
|
||||
|
||||
func TestRule_Exec(t *testing.T) {
|
||||
testCases := []struct {
|
||||
rule *Rule
|
||||
steps [][]datasource.Metric
|
||||
expAlerts map[uint64]*notifier.Alert
|
||||
}{
|
||||
{
|
||||
newTestRule("empty", 0),
|
||||
[][]datasource.Metric{},
|
||||
map[uint64]*notifier.Alert{},
|
||||
},
|
||||
{
|
||||
newTestRule("single-firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("single-firing=>inactive", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("single-firing=>inactive=>firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("single-firing=>inactive=>firing=>inactive", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("single-firing=>inactive=>firing=>inactive=>empty", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
{},
|
||||
},
|
||||
map[uint64]*notifier.Alert{},
|
||||
},
|
||||
{
|
||||
newTestRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{},
|
||||
{},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("multiple-firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{
|
||||
metricWithLabels(t, "__name__", "foo"),
|
||||
metricWithLabels(t, "__name__", "foo1"),
|
||||
metricWithLabels(t, "__name__", "foo2"),
|
||||
},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
|
||||
hash(metricWithLabels(t, "__name__", "foo1")): {State: notifier.StateFiring},
|
||||
hash(metricWithLabels(t, "__name__", "foo2")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("multiple-steps-firing", 0),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{metricWithLabels(t, "__name__", "foo1")},
|
||||
{metricWithLabels(t, "__name__", "foo2")},
|
||||
},
|
||||
// 1: fire first alert
|
||||
// 2: fire second alert, set first inactive
|
||||
// 3: fire third alert, set second inactive, delete first one
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo1")): {State: notifier.StateInactive},
|
||||
hash(metricWithLabels(t, "__name__", "foo2")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("duplicate", 0),
|
||||
[][]datasource.Metric{
|
||||
{
|
||||
// metrics with the same labelset should result in one alert
|
||||
metricWithLabels(t, "__name__", "foo", "type", "bar"),
|
||||
metricWithLabels(t, "type", "bar", "__name__", "foo"),
|
||||
},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo", "type", "bar")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for-pending", time.Minute),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StatePending},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for-fired", time.Millisecond),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for-pending=>inactive", time.Millisecond),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
{},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for-pending=>firing=>inactive", time.Millisecond),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
{},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for-pending=>firing=>inactive=>pending", time.Millisecond),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
{},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StatePending},
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for-pending=>firing=>inactive=>pending=>firing", time.Millisecond),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
{},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
{metricWithLabels(t, "__name__", "foo")},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
|
||||
},
|
||||
},
|
||||
}
|
||||
fakeGroup := &Group{Name: "TestRule_Exec"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.group = fakeGroup
|
||||
for _, step := range tc.steps {
|
||||
fq.reset()
|
||||
fq.add(t, step...)
|
||||
if err := tc.rule.Exec(context.TODO(), fq); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
// artificial delay between applying steps
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
||||
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
||||
}
|
||||
for key, exp := range tc.expAlerts {
|
||||
got, ok := tc.rule.alerts[key]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have key %d", key)
|
||||
}
|
||||
if got.State != exp.State {
|
||||
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
|
||||
t.Helper()
|
||||
if len(labels) == 0 || len(labels)%2 != 0 {
|
||||
t.Fatalf("expected to get even number of labels")
|
||||
}
|
||||
m := datasource.Metric{}
|
||||
for i := 0; i < len(labels); i += 2 {
|
||||
m.Labels = append(m.Labels, datasource.Label{
|
||||
Name: labels[i],
|
||||
Value: labels[i+1],
|
||||
})
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
type fakeQuerier struct {
|
||||
metrics []datasource.Metric
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) reset() {
|
||||
fq.metrics = fq.metrics[:0]
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) add(t *testing.T, metrics ...datasource.Metric) {
|
||||
fq.metrics = append(fq.metrics, metrics...)
|
||||
}
|
||||
|
||||
func (fq fakeQuerier) Query(ctx context.Context, query string) ([]datasource.Metric, error) {
|
||||
return fq.metrics, nil
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
package storage
|
||||
@@ -1,12 +1,11 @@
|
||||
groups:
|
||||
- name: groupGorSingleAlert
|
||||
- name: group
|
||||
rules:
|
||||
- alert: VMRows
|
||||
- alert: UnkownLabelFunction
|
||||
for: 5m
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
summary: "{{ value|query }}"
|
||||
annotations:
|
||||
summary: "{{ $value|humanize }}"
|
||||
description: "{{$labels}}"
|
||||
|
||||
23
app/vmalert/testdata/rules0-good.rules
vendored
Normal file
23
app/vmalert/testdata/rules0-good.rules
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
groups:
|
||||
- name: groupGorSingleAlert
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 10s
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
template: "{{ $value|humanize }}"
|
||||
annotations:
|
||||
summary: "{{ $value|humanize }}"
|
||||
description: "{{$labels}}"
|
||||
|
||||
- name: TestGroup
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: sum(vm_tcplistener_conns) by(instance) > 1
|
||||
annotations:
|
||||
summary: "Too high connection number for {{$labels.instance}}"
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
expr: sum by(job)
|
||||
(up == 1)
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 25 KiB |
134
app/vmalert/web.go
Normal file
134
app/vmalert/web.go
Normal file
@@ -0,0 +1,134 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
)
|
||||
|
||||
// APIAlert has info for an alert.
|
||||
type APIAlert struct {
|
||||
ID uint64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Group string `json:"group"`
|
||||
Expression string `json:"expression"`
|
||||
State string `json:"state"`
|
||||
Value string `json:"value"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
Annotations map[string]string `json:"annotations"`
|
||||
ActiveAt time.Time `json:"activeAt"`
|
||||
}
|
||||
|
||||
type requestHandler struct {
|
||||
groups []Group
|
||||
}
|
||||
|
||||
var pathList = [][]string{
|
||||
{"/api/v1/alerts", "list all active alerts"},
|
||||
{"/api/v1/groupName/alertID/status", "get alert status by ID"},
|
||||
// /metrics is served by httpserver by default
|
||||
{"/metrics", "list of application metrics"},
|
||||
}
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
resph := responseHandler{w}
|
||||
switch r.URL.Path {
|
||||
case "/":
|
||||
for _, path := range pathList {
|
||||
p, doc := path[0], path[1]
|
||||
fmt.Fprintf(w, "<a href='%s'>%q</a> - %s<br/>", p, p, doc)
|
||||
}
|
||||
return true
|
||||
case "/api/v1/alerts":
|
||||
resph.handle(rh.list())
|
||||
return true
|
||||
default:
|
||||
// /api/v1/<groupName>/<alertID>/status
|
||||
if strings.HasSuffix(r.URL.Path, "/status") {
|
||||
resph.handle(rh.alert(r.URL.Path))
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type listAlertsResponse struct {
|
||||
Data struct {
|
||||
Alerts []*APIAlert `json:"alerts"`
|
||||
} `json:"data"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
func (rh *requestHandler) list() ([]byte, error) {
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
for _, g := range rh.groups {
|
||||
for _, r := range g.Rules {
|
||||
lr.Data.Alerts = append(lr.Data.Alerts, r.AlertsAPI()...)
|
||||
}
|
||||
}
|
||||
|
||||
// sort list of alerts for deterministic output
|
||||
sort.Slice(lr.Data.Alerts, func(i, j int) bool {
|
||||
return lr.Data.Alerts[i].Name < lr.Data.Alerts[j].Name
|
||||
})
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %s`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) alert(path string) ([]byte, error) {
|
||||
parts := strings.SplitN(strings.TrimPrefix(path, "/api/v1/"), "/", 3)
|
||||
if len(parts) != 3 {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`path %q cointains /status suffix but doesn't match pattern "/group/alert/status"`, path),
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
group := strings.TrimRight(parts[0], "/")
|
||||
idStr := strings.TrimRight(parts[1], "/")
|
||||
id, err := strconv.ParseUint(idStr, 10, 0)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`cannot parse int from %q`, idStr),
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
for _, g := range rh.groups {
|
||||
if g.Name != group {
|
||||
continue
|
||||
}
|
||||
for i := range g.Rules {
|
||||
if apiAlert := g.Rules[i].AlertAPI(id); apiAlert != nil {
|
||||
return json.Marshal(apiAlert)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`cannot find alert %s in %q`, idStr, group),
|
||||
StatusCode: http.StatusNotFound,
|
||||
}
|
||||
}
|
||||
|
||||
// responseHandler wrapper on http.ResponseWriter with sugar
|
||||
type responseHandler struct{ http.ResponseWriter }
|
||||
|
||||
func (w responseHandler) handle(b []byte, err error) {
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, "%s", err)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(b)
|
||||
}
|
||||
72
app/vmalert/web_test.go
Normal file
72
app/vmalert/web_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
func TestHandler(t *testing.T) {
|
||||
rule := &Rule{
|
||||
Name: "alert",
|
||||
alerts: map[uint64]*notifier.Alert{
|
||||
0: {},
|
||||
},
|
||||
}
|
||||
rh := &requestHandler{
|
||||
groups: []Group{{
|
||||
Name: "group",
|
||||
Rules: []*Rule{rule},
|
||||
}},
|
||||
}
|
||||
getResp := func(url string, to interface{}, code int) {
|
||||
t.Helper()
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected err %s", err)
|
||||
}
|
||||
if code != resp.StatusCode {
|
||||
t.Errorf("unexpected status code %d want %d", resp.StatusCode, code)
|
||||
}
|
||||
defer func() {
|
||||
if err := resp.Body.Close(); err != nil {
|
||||
t.Errorf("err closing body %s", err)
|
||||
}
|
||||
}()
|
||||
if to != nil {
|
||||
if err = json.NewDecoder(resp.Body).Decode(to); err != nil {
|
||||
t.Errorf("unexpected err %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rh.handler(w, r) }))
|
||||
defer ts.Close()
|
||||
t.Run("/api/v1/alerts", func(t *testing.T) {
|
||||
lr := listAlertsResponse{}
|
||||
getResp(ts.URL+"/api/v1/alerts", &lr, 200)
|
||||
if length := len(lr.Data.Alerts); length != 1 {
|
||||
t.Errorf("expected 1 alert got %d", length)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/group/0/status", func(t *testing.T) {
|
||||
alert := &APIAlert{}
|
||||
getResp(ts.URL+"/api/v1/group/0/status", alert, 200)
|
||||
expAlert := rule.newAlertAPI(*rule.alerts[0])
|
||||
if !reflect.DeepEqual(alert, expAlert) {
|
||||
t.Errorf("expected %v is equal to %v", alert, expAlert)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/group/1/status", func(t *testing.T) {
|
||||
getResp(ts.URL+"/api/v1/group/1/status", nil, 404)
|
||||
})
|
||||
t.Run("/api/v1/unknown-group/0/status", func(t *testing.T) {
|
||||
getResp(ts.URL+"/api/v1/unknown-group/0/status", nil, 404)
|
||||
})
|
||||
t.Run("/", func(t *testing.T) {
|
||||
getResp(ts.URL, nil, 200)
|
||||
})
|
||||
}
|
||||
@@ -140,14 +140,28 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
-dst string
|
||||
Where to put the backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond int
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-origin string
|
||||
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
|
||||
-snapshot.createURL string
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snaphsot/create
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted.Example: http://victoriametrics:8428/snaphsot/delete
|
||||
-snapshotName string
|
||||
Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots
|
||||
-storageDataPath string
|
||||
@@ -164,7 +178,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmbackup` from the root folder of the repository.
|
||||
It builds `vmbackup` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -179,3 +193,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmbackup
|
||||
```
|
||||
|
||||
@@ -4,7 +4,9 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmbackup/snapshot"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/actions"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
|
||||
@@ -14,9 +16,13 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage")
|
||||
snapshotName = flag.String("snapshotName", "", "Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots")
|
||||
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
|
||||
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage")
|
||||
snapshotName = flag.String("snapshotName", "", "Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots")
|
||||
snapshotCreateURL = flag.String("snapshot.createURL", "", "VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup."+
|
||||
"Example: http://victoriametrics:8428/snaphsot/create")
|
||||
snapshotDeleteURL = flag.String("snapshot.deleteURL", "", "VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted."+
|
||||
"Example: http://victoriametrics:8428/snaphsot/delete")
|
||||
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
|
||||
"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir\n"+
|
||||
"-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded")
|
||||
origin = flag.String("origin", "", "Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups")
|
||||
@@ -29,6 +35,34 @@ func main() {
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
|
||||
if len(*snapshotCreateURL) > 0 {
|
||||
logger.Infof("%s", "Snapshots enabled")
|
||||
logger.Infof("Snapshot create url %s", *snapshotCreateURL)
|
||||
if len(*snapshotDeleteURL) <= 0 {
|
||||
err := flag.Set("snapshot.deleteURL", strings.Replace(*snapshotCreateURL, "/create", "/delete", 1))
|
||||
if err != nil {
|
||||
logger.Fatalf("Failed to set snapshot.deleteURL flag: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("Snapshot delete url %s", *snapshotDeleteURL)
|
||||
|
||||
name, err := snapshot.Create(*snapshotCreateURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("%s", err)
|
||||
}
|
||||
err = flag.Set("snapshotName", name)
|
||||
if err != nil {
|
||||
logger.Fatalf("Failed to set snapshotName flag: %v", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := snapshot.Delete(*snapshotDeleteURL, name)
|
||||
if err != nil {
|
||||
logger.Fatalf("%s", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
srcFS, err := newSrcFS()
|
||||
if err != nil {
|
||||
logger.Fatalf("%s", err)
|
||||
@@ -67,7 +101,7 @@ See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/a
|
||||
|
||||
func newSrcFS() (*fslocal.FS, error) {
|
||||
if len(*snapshotName) == 0 {
|
||||
return nil, fmt.Errorf("`-snapshotName` cannot be empty")
|
||||
return nil, fmt.Errorf("`-snapshotName` or `-snapshot.createURL` must be provided")
|
||||
}
|
||||
snapshotPath := *storageDataPath + "/snapshots/" + *snapshotName
|
||||
|
||||
|
||||
91
app/vmbackup/snapshot/snapshot.go
Normal file
91
app/vmbackup/snapshot/snapshot.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type snapshot struct {
|
||||
Status string `json:"status"`
|
||||
Snapshot string `json:"snapshot"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
// Create creates a snapshot and the provided api endpoint and returns
|
||||
// the snapshot name
|
||||
func Create(createSnapshotURL string) (string, error) {
|
||||
logger.Infof("%s", "Creating snapshot")
|
||||
u, err := url.Parse(createSnapshotURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
resp, err := http.Get(u.String())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
snap := snapshot{}
|
||||
err = json.Unmarshal(body, &snap)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if snap.Status == "ok" {
|
||||
logger.Infof("Snapshot %s created", snap.Snapshot)
|
||||
return snap.Snapshot, nil
|
||||
} else if snap.Status == "error" {
|
||||
return "", errors.New(snap.Msg)
|
||||
} else {
|
||||
return "", fmt.Errorf("Unkown status: %v", snap.Status)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete deletes a snapshot and the provided api endpoint returns any failure
|
||||
func Delete(deleteSnapshotURL string, snapshotName string) error {
|
||||
logger.Infof("Deleting snapshot %s", snapshotName)
|
||||
formData := url.Values{
|
||||
"snapshot": {snapshotName},
|
||||
}
|
||||
|
||||
u, err := url.Parse(deleteSnapshotURL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := http.PostForm(u.String(), formData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
snap := snapshot{}
|
||||
err = json.Unmarshal(body, &snap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if snap.Status == "ok" {
|
||||
logger.Infof("Snapshot %s deleted", snapshotName)
|
||||
return nil
|
||||
} else if snap.Status == "error" {
|
||||
return errors.New(snap.Msg)
|
||||
} else {
|
||||
return fmt.Errorf("Unkown status: %v", snap.Status)
|
||||
}
|
||||
}
|
||||
106
app/vmbackup/snapshot/snapshot_test.go
Normal file
106
app/vmbackup/snapshot/snapshot_test.go
Normal file
@@ -0,0 +1,106 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCreateSnapshot(t *testing.T) {
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/snapshot/create" {
|
||||
_, err := io.WriteString(w, `{"status":"ok","snapshot":"mysnapshot"}`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write response output: %v", err)
|
||||
}
|
||||
} else {
|
||||
t.Fatalf("Invalid path, got %v", r.URL.Path)
|
||||
}
|
||||
})
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(handler))
|
||||
defer server.Close()
|
||||
|
||||
snapshotName, err := Create(server.URL + "/snapshot/create")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed taking snapshot: %v", err)
|
||||
}
|
||||
|
||||
if snapshotName != "mysnapshot" {
|
||||
t.Fatalf("Snapshot name is not correct, got %v", snapshotName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSnapshotFailed(t *testing.T) {
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/snapshot/create" {
|
||||
_, err := io.WriteString(w, `{"status":"error","msg":"I am unwell"}`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write response output: %v", err)
|
||||
}
|
||||
} else {
|
||||
t.Fatalf("Invalid path, got %v", r.URL.Path)
|
||||
}
|
||||
})
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(handler))
|
||||
defer server.Close()
|
||||
|
||||
snapshotName, err := Create(server.URL + "/snapshot/create")
|
||||
if err == nil {
|
||||
t.Fatalf("Snapshot did not fail, got snapshot: %v", snapshotName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteSnapshot(t *testing.T) {
|
||||
snapshotName := "mysnapshot"
|
||||
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/snapshot/delete" {
|
||||
_, err := io.WriteString(w, `{"status":"ok"}`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write response output: %v", err)
|
||||
}
|
||||
} else {
|
||||
t.Fatalf("Invalid path, got %v", r.URL.Path)
|
||||
}
|
||||
if r.FormValue("snapshot") != snapshotName {
|
||||
t.Fatalf("Invalid snapshot name, got %v", snapshotName)
|
||||
}
|
||||
})
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(handler))
|
||||
defer server.Close()
|
||||
|
||||
err := Delete(server.URL+"/snapshot/delete", snapshotName)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to delete snapshot: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteSnapshotFailed(t *testing.T) {
|
||||
snapshotName := "mysnapshot"
|
||||
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/snapshot/delete" {
|
||||
_, err := io.WriteString(w, `{"status":"error", "msg":"failed to delete"}`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write response output: %v", err)
|
||||
}
|
||||
} else {
|
||||
t.Fatalf("Invalid path, got %v", r.URL.Path)
|
||||
}
|
||||
if r.FormValue("snapshot") != snapshotName {
|
||||
t.Fatalf("Invalid snapshot name, got %v", snapshotName)
|
||||
}
|
||||
})
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(handler))
|
||||
defer server.Close()
|
||||
|
||||
err := Delete(server.URL+"/snapshot/delete", snapshotName)
|
||||
if err == nil {
|
||||
t.Fatalf("Snapshot should have failed, got: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -47,12 +47,24 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond int
|
||||
The maximum download speed. There is no limit if it is set to 0
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-skipBackupCompleteCheck
|
||||
Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file
|
||||
-src string
|
||||
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-storageDataPath string
|
||||
@@ -69,7 +81,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmrestore` from the root folder of the repository.
|
||||
It builds `vmrestore` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -84,3 +96,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmrestore
|
||||
```
|
||||
|
||||
@@ -8,11 +8,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
@@ -24,7 +22,7 @@ var (
|
||||
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
|
||||
"It shouldn't be high, since a single request can saturate all the CPU cores. See also -search.maxQueueDuration")
|
||||
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
|
||||
resetCacheAuthKey = flag.String("search.resetCacheAuthKey", "", "Optional authKey for resetting rollup cache via /internal/resetCache call")
|
||||
resetCacheAuthKey = flag.String("search.resetCacheAuthKey", "", "Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call")
|
||||
)
|
||||
|
||||
func getDefaultMaxConcurrentRequests() int {
|
||||
@@ -43,9 +41,6 @@ func getDefaultMaxConcurrentRequests() int {
|
||||
|
||||
// Init initializes vmselect
|
||||
func Init() {
|
||||
tmpDirPath := *vmstorage.DataPath + "/tmp"
|
||||
fs.RemoveDirContents(tmpDirPath)
|
||||
netstorage.InitTmpBlocksDir(tmpDirPath)
|
||||
promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
|
||||
|
||||
concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
|
||||
@@ -179,6 +174,14 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
return true
|
||||
case "/api/v1/status/tsdb":
|
||||
tsdbStatusRequests.Inc()
|
||||
if err := prometheus.TSDBStatusHandler(startTime, w, r); err != nil {
|
||||
tsdbStatusErrors.Inc()
|
||||
sendPrometheusError(w, r, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
case "/api/v1/export":
|
||||
exportRequests.Inc()
|
||||
if err := prometheus.ExportHandler(startTime, w, r); err != nil {
|
||||
@@ -191,7 +194,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
federateRequests.Inc()
|
||||
if err := prometheus.FederateHandler(startTime, w, r); err != nil {
|
||||
federateErrors.Inc()
|
||||
httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
|
||||
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
@@ -233,7 +236,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
|
||||
func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
logger.Errorf("error in %q: %s", r.RequestURI, err)
|
||||
logger.Warnf("error in %q: %s", r.RequestURI, err)
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
statusCode := http.StatusUnprocessableEntity
|
||||
@@ -266,6 +269,9 @@ var (
|
||||
labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels/count"}`)
|
||||
labelsCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels/count"}`)
|
||||
|
||||
tsdbStatusRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/tsdb"}`)
|
||||
tsdbStatusErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/tsdb"}`)
|
||||
|
||||
deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
|
||||
deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)
|
||||
|
||||
|
||||
@@ -53,9 +53,8 @@ type Results struct {
|
||||
fetchData bool
|
||||
deadline Deadline
|
||||
|
||||
tbf *tmpBlocksFile
|
||||
|
||||
packedTimeseries []packedTimeseries
|
||||
sr *storage.Search
|
||||
}
|
||||
|
||||
// Len returns the number of results in rss.
|
||||
@@ -65,8 +64,12 @@ func (rss *Results) Len() int {
|
||||
|
||||
// Cancel cancels rss work.
|
||||
func (rss *Results) Cancel() {
|
||||
putTmpBlocksFile(rss.tbf)
|
||||
rss.tbf = nil
|
||||
rss.mustClose()
|
||||
}
|
||||
|
||||
func (rss *Results) mustClose() {
|
||||
putStorageSearch(rss.sr)
|
||||
rss.sr = nil
|
||||
}
|
||||
|
||||
// RunParallel runs in parallel f for all the results from rss.
|
||||
@@ -76,10 +79,7 @@ func (rss *Results) Cancel() {
|
||||
//
|
||||
// rss becomes unusable after the call to RunParallel.
|
||||
func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
|
||||
defer func() {
|
||||
putTmpBlocksFile(rss.tbf)
|
||||
rss.tbf = nil
|
||||
}()
|
||||
defer rss.mustClose()
|
||||
|
||||
workersCount := 1 + len(rss.packedTimeseries)/32
|
||||
if workersCount > gomaxprocs {
|
||||
@@ -106,7 +106,7 @@ func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
|
||||
err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
|
||||
break
|
||||
}
|
||||
if err = pts.Unpack(rss.tbf, rs, rss.tr, rss.fetchData, maxWorkersCount); err != nil {
|
||||
if err = pts.Unpack(rs, rss.tr, rss.fetchData, maxWorkersCount); err != nil {
|
||||
break
|
||||
}
|
||||
if len(rs.Timestamps) == 0 && rss.fetchData {
|
||||
@@ -156,18 +156,18 @@ var gomaxprocs = runtime.GOMAXPROCS(-1)
|
||||
|
||||
type packedTimeseries struct {
|
||||
metricName string
|
||||
addrs []tmpBlockAddr
|
||||
brs []storage.BlockRef
|
||||
}
|
||||
|
||||
// Unpack unpacks pts to dst.
|
||||
func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.TimeRange, fetchData bool, maxWorkersCount int) error {
|
||||
func (pts *packedTimeseries) Unpack(dst *Result, tr storage.TimeRange, fetchData bool, maxWorkersCount int) error {
|
||||
dst.reset()
|
||||
|
||||
if err := dst.MetricName.Unmarshal(bytesutil.ToUnsafeBytes(pts.metricName)); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal metricName %q: %s", pts.metricName, err)
|
||||
}
|
||||
|
||||
workersCount := 1 + len(pts.addrs)/32
|
||||
workersCount := 1 + len(pts.brs)/32
|
||||
if workersCount > maxWorkersCount {
|
||||
workersCount = maxWorkersCount
|
||||
}
|
||||
@@ -175,19 +175,19 @@ func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.
|
||||
logger.Panicf("BUG: workersCount cannot be zero")
|
||||
}
|
||||
|
||||
sbs := make([]*sortBlock, 0, len(pts.addrs))
|
||||
sbs := make([]*sortBlock, 0, len(pts.brs))
|
||||
var sbsLock sync.Mutex
|
||||
|
||||
workCh := make(chan tmpBlockAddr, workersCount)
|
||||
workCh := make(chan storage.BlockRef, workersCount)
|
||||
doneCh := make(chan error)
|
||||
|
||||
// Start workers
|
||||
for i := 0; i < workersCount; i++ {
|
||||
go func() {
|
||||
var err error
|
||||
for addr := range workCh {
|
||||
for br := range workCh {
|
||||
sb := getSortBlock()
|
||||
if err = sb.unpackFrom(tbf, addr, tr, fetchData); err != nil {
|
||||
if err = sb.unpackFrom(br, tr, fetchData); err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
@@ -204,10 +204,10 @@ func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.
|
||||
}
|
||||
|
||||
// Feed workers with work
|
||||
for _, addr := range pts.addrs {
|
||||
workCh <- addr
|
||||
for _, br := range pts.brs {
|
||||
workCh <- br
|
||||
}
|
||||
pts.addrs = pts.addrs[:0]
|
||||
pts.brs = pts.brs[:0]
|
||||
close(workCh)
|
||||
|
||||
// Wait until workers finish
|
||||
@@ -314,8 +314,8 @@ func (sb *sortBlock) reset() {
|
||||
sb.NextIdx = 0
|
||||
}
|
||||
|
||||
func (sb *sortBlock) unpackFrom(tbf *tmpBlocksFile, addr tmpBlockAddr, tr storage.TimeRange, fetchData bool) error {
|
||||
tbf.MustReadBlockAt(&sb.b, addr)
|
||||
func (sb *sortBlock) unpackFrom(br storage.BlockRef, tr storage.TimeRange, fetchData bool) error {
|
||||
br.MustReadBlock(&sb.b, fetchData)
|
||||
if fetchData {
|
||||
if err := sb.b.UnmarshalData(); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal block: %s", err)
|
||||
@@ -449,6 +449,15 @@ func GetLabelEntries(deadline Deadline) ([]storage.TagEntry, error) {
|
||||
return labelEntries, nil
|
||||
}
|
||||
|
||||
// GetTSDBStatusForDate returns tsdb status according to https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
|
||||
func GetTSDBStatusForDate(deadline Deadline, date uint64, topN int) (*storage.TSDBStatus, error) {
|
||||
status, err := vmstorage.GetTSDBStatusForDate(date, topN)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error during tsdb status request: %s", err)
|
||||
}
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// GetSeriesCount returns the number of unique series.
|
||||
func GetSeriesCount(deadline Deadline) (uint64, error) {
|
||||
n, err := vmstorage.GetSeriesCount()
|
||||
@@ -474,6 +483,8 @@ func putStorageSearch(sr *storage.Search) {
|
||||
var ssPool sync.Pool
|
||||
|
||||
// ProcessSearchQuery performs sq on storage nodes until the given deadline.
|
||||
//
|
||||
// Results.RunParallel or Results.Cancel must be called on the returned Results.
|
||||
func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadline) (*Results, error) {
|
||||
// Setup search.
|
||||
tfss, err := setupTfss(sq.TagFilterss)
|
||||
@@ -489,60 +500,40 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadli
|
||||
defer vmstorage.WG.Done()
|
||||
|
||||
sr := getStorageSearch()
|
||||
defer putStorageSearch(sr)
|
||||
sr.Init(vmstorage.Storage, tfss, tr, fetchData, *maxMetricsPerSearch)
|
||||
sr.Init(vmstorage.Storage, tfss, tr, *maxMetricsPerSearch)
|
||||
|
||||
tbf := getTmpBlocksFile()
|
||||
m := make(map[string][]tmpBlockAddr)
|
||||
m := make(map[string][]storage.BlockRef)
|
||||
var orderedMetricNames []string
|
||||
blocksRead := 0
|
||||
bb := tmpBufPool.Get()
|
||||
defer tmpBufPool.Put(bb)
|
||||
for sr.NextMetricBlock() {
|
||||
blocksRead++
|
||||
bb.B = storage.MarshalBlock(bb.B[:0], sr.MetricBlock.Block)
|
||||
addr, err := tbf.WriteBlockData(bb.B)
|
||||
if err != nil {
|
||||
putTmpBlocksFile(tbf)
|
||||
return nil, fmt.Errorf("cannot write data block #%d to temporary blocks file: %s", blocksRead, err)
|
||||
}
|
||||
if time.Until(deadline.Deadline) < 0 {
|
||||
putTmpBlocksFile(tbf)
|
||||
return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.String())
|
||||
}
|
||||
metricName := sr.MetricBlock.MetricName
|
||||
m[string(metricName)] = append(m[string(metricName)], addr)
|
||||
metricName := sr.MetricBlockRef.MetricName
|
||||
brs := m[string(metricName)]
|
||||
if len(brs) == 0 {
|
||||
orderedMetricNames = append(orderedMetricNames, string(metricName))
|
||||
}
|
||||
m[string(metricName)] = append(brs, *sr.MetricBlockRef.BlockRef)
|
||||
}
|
||||
if err := sr.Error(); err != nil {
|
||||
putTmpBlocksFile(tbf)
|
||||
return nil, fmt.Errorf("search error after reading %d data blocks: %s", blocksRead, err)
|
||||
}
|
||||
if err := tbf.Finalize(); err != nil {
|
||||
putTmpBlocksFile(tbf)
|
||||
return nil, fmt.Errorf("cannot finalize temporary blocks file with %d blocks: %s", blocksRead, err)
|
||||
}
|
||||
|
||||
var rss Results
|
||||
rss.packedTimeseries = make([]packedTimeseries, len(m))
|
||||
rss.tr = tr
|
||||
rss.fetchData = fetchData
|
||||
rss.deadline = deadline
|
||||
rss.tbf = tbf
|
||||
i := 0
|
||||
for metricName, addrs := range m {
|
||||
pts := &rss.packedTimeseries[i]
|
||||
i++
|
||||
pts.metricName = metricName
|
||||
pts.addrs = addrs
|
||||
pts := make([]packedTimeseries, len(orderedMetricNames))
|
||||
for i, metricName := range orderedMetricNames {
|
||||
pts[i] = packedTimeseries{
|
||||
metricName: metricName,
|
||||
brs: m[metricName],
|
||||
}
|
||||
}
|
||||
|
||||
// Sort rss.packedTimeseries by the first addr offset in order
|
||||
// to reduce the number of disk seeks during unpacking in RunParallel.
|
||||
// In this case tmpBlocksFile must be read almost sequentially.
|
||||
sort.Slice(rss.packedTimeseries, func(i, j int) bool {
|
||||
pts := rss.packedTimeseries
|
||||
return pts[i].addrs[0].offset < pts[j].addrs[0].offset
|
||||
})
|
||||
|
||||
rss.packedTimeseries = pts
|
||||
rss.sr = sr
|
||||
return &rss, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,185 +0,0 @@
|
||||
package netstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// InitTmpBlocksDir initializes directory to store temporary search results.
|
||||
//
|
||||
// It stores data in system-defined temporary directory if tmpDirPath is empty.
|
||||
func InitTmpBlocksDir(tmpDirPath string) {
|
||||
if len(tmpDirPath) == 0 {
|
||||
tmpDirPath = os.TempDir()
|
||||
}
|
||||
tmpBlocksDir = tmpDirPath + "/searchResults"
|
||||
fs.MustRemoveAll(tmpBlocksDir)
|
||||
if err := fs.MkdirAllIfNotExist(tmpBlocksDir); err != nil {
|
||||
logger.Panicf("FATAL: cannot create %q: %s", tmpBlocksDir, err)
|
||||
}
|
||||
}
|
||||
|
||||
var tmpBlocksDir string
|
||||
|
||||
func maxInmemoryTmpBlocksFile() int {
|
||||
mem := memory.Allowed()
|
||||
maxLen := mem / 1024
|
||||
if maxLen < 64*1024 {
|
||||
return 64 * 1024
|
||||
}
|
||||
if maxLen > 4*1024*1024 {
|
||||
return 4 * 1024 * 1024
|
||||
}
|
||||
return maxLen
|
||||
}
|
||||
|
||||
var _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 {
|
||||
return float64(maxInmemoryTmpBlocksFile())
|
||||
})
|
||||
|
||||
type tmpBlocksFile struct {
|
||||
buf []byte
|
||||
|
||||
f *os.File
|
||||
r *fs.ReaderAt
|
||||
|
||||
offset uint64
|
||||
}
|
||||
|
||||
func getTmpBlocksFile() *tmpBlocksFile {
|
||||
v := tmpBlocksFilePool.Get()
|
||||
if v == nil {
|
||||
return &tmpBlocksFile{
|
||||
buf: make([]byte, 0, maxInmemoryTmpBlocksFile()),
|
||||
}
|
||||
}
|
||||
return v.(*tmpBlocksFile)
|
||||
}
|
||||
|
||||
func putTmpBlocksFile(tbf *tmpBlocksFile) {
|
||||
tbf.MustClose()
|
||||
tbf.buf = tbf.buf[:0]
|
||||
tbf.f = nil
|
||||
tbf.r = nil
|
||||
tbf.offset = 0
|
||||
tmpBlocksFilePool.Put(tbf)
|
||||
}
|
||||
|
||||
var tmpBlocksFilePool sync.Pool
|
||||
|
||||
type tmpBlockAddr struct {
|
||||
offset uint64
|
||||
size int
|
||||
}
|
||||
|
||||
func (addr tmpBlockAddr) String() string {
|
||||
return fmt.Sprintf("offset %d, size %d", addr.offset, addr.size)
|
||||
}
|
||||
|
||||
var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_total`)
|
||||
|
||||
// WriteBlockData writes b to tbf.
|
||||
//
|
||||
// It returns errors since the operation may fail on space shortage
|
||||
// and this must be handled.
|
||||
func (tbf *tmpBlocksFile) WriteBlockData(b []byte) (tmpBlockAddr, error) {
|
||||
var addr tmpBlockAddr
|
||||
addr.offset = tbf.offset
|
||||
addr.size = len(b)
|
||||
tbf.offset += uint64(addr.size)
|
||||
if len(tbf.buf)+len(b) <= cap(tbf.buf) {
|
||||
// Fast path - the data fits tbf.buf
|
||||
tbf.buf = append(tbf.buf, b...)
|
||||
return addr, nil
|
||||
}
|
||||
|
||||
// Slow path: flush the data from tbf.buf to file.
|
||||
if tbf.f == nil {
|
||||
f, err := ioutil.TempFile(tmpBlocksDir, "")
|
||||
if err != nil {
|
||||
return addr, err
|
||||
}
|
||||
tbf.f = f
|
||||
tmpBlocksFilesCreated.Inc()
|
||||
}
|
||||
_, err := tbf.f.Write(tbf.buf)
|
||||
tbf.buf = append(tbf.buf[:0], b...)
|
||||
if err != nil {
|
||||
return addr, fmt.Errorf("cannot write block to %q: %s", tbf.f.Name(), err)
|
||||
}
|
||||
return addr, nil
|
||||
}
|
||||
|
||||
func (tbf *tmpBlocksFile) Finalize() error {
|
||||
if tbf.f == nil {
|
||||
return nil
|
||||
}
|
||||
fname := tbf.f.Name()
|
||||
if _, err := tbf.f.Write(tbf.buf); err != nil {
|
||||
return fmt.Errorf("cannot write the remaining %d bytes to %q: %s", len(tbf.buf), fname, err)
|
||||
}
|
||||
tbf.buf = tbf.buf[:0]
|
||||
r, err := fs.OpenReaderAt(fname)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot open %q: %s", fname, err)
|
||||
}
|
||||
// Hint the OS that the file is read almost sequentiallly.
|
||||
// This should reduce the number of disk seeks, which is important
|
||||
// for HDDs.
|
||||
r.MustFadviseSequentialRead(true)
|
||||
tbf.r = r
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tbf *tmpBlocksFile) MustReadBlockAt(dst *storage.Block, addr tmpBlockAddr) {
|
||||
var buf []byte
|
||||
if tbf.f == nil {
|
||||
buf = tbf.buf[addr.offset : addr.offset+uint64(addr.size)]
|
||||
} else {
|
||||
bb := tmpBufPool.Get()
|
||||
defer tmpBufPool.Put(bb)
|
||||
bb.B = bytesutil.Resize(bb.B, addr.size)
|
||||
tbf.r.MustReadAt(bb.B, int64(addr.offset))
|
||||
buf = bb.B
|
||||
}
|
||||
tail, err := storage.UnmarshalBlock(dst, buf)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot unmarshal data at %s: %s", addr, err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
logger.Panicf("FATAL: unexpected non-empty tail left after unmarshaling data at %s; len(tail)=%d", addr, len(tail))
|
||||
}
|
||||
}
|
||||
|
||||
var tmpBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func (tbf *tmpBlocksFile) MustClose() {
|
||||
if tbf.f == nil {
|
||||
return
|
||||
}
|
||||
if tbf.r != nil {
|
||||
// tbf.r could be nil if Finalize wasn't called.
|
||||
tbf.r.MustClose()
|
||||
}
|
||||
fname := tbf.f.Name()
|
||||
|
||||
// Remove the file at first, then close it.
|
||||
// This way the OS shouldn't try to flush file contents to storage
|
||||
// on close.
|
||||
if err := os.Remove(fname); err != nil {
|
||||
logger.Panicf("FATAL: cannot remove %q: %s", fname, err)
|
||||
}
|
||||
if err := tbf.f.Close(); err != nil {
|
||||
logger.Panicf("FATAL: cannot close %q: %s", fname, err)
|
||||
}
|
||||
tbf.f = nil
|
||||
}
|
||||
@@ -1,153 +0,0 @@
|
||||
package netstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
tmpDir := "TestTmpBlocks"
|
||||
InitTmpBlocksDir(tmpDir)
|
||||
statusCode := m.Run()
|
||||
if err := os.RemoveAll(tmpDir); err != nil {
|
||||
logger.Panicf("cannot remove %q: %s", tmpDir, err)
|
||||
}
|
||||
os.Exit(statusCode)
|
||||
}
|
||||
|
||||
func TestTmpBlocksFileSerial(t *testing.T) {
|
||||
if err := testTmpBlocksFile(); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTmpBlocksFileConcurrent(t *testing.T) {
|
||||
concurrency := 3
|
||||
ch := make(chan error, concurrency)
|
||||
for i := 0; i < concurrency; i++ {
|
||||
go func() {
|
||||
ch <- testTmpBlocksFile()
|
||||
}()
|
||||
}
|
||||
for i := 0; i < concurrency; i++ {
|
||||
select {
|
||||
case err := <-ch:
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
case <-time.After(30 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testTmpBlocksFile() error {
|
||||
createBlock := func() *storage.Block {
|
||||
rowsCount := rand.Intn(8000) + 1
|
||||
var timestamps, values []int64
|
||||
ts := int64(rand.Intn(1023434))
|
||||
for i := 0; i < rowsCount; i++ {
|
||||
ts += int64(rand.Intn(1000) + 1)
|
||||
timestamps = append(timestamps, ts)
|
||||
values = append(values, int64(i*i+rand.Intn(20)))
|
||||
}
|
||||
tsid := &storage.TSID{
|
||||
MetricID: 234211,
|
||||
}
|
||||
scale := int16(rand.Intn(123))
|
||||
precisionBits := uint8(rand.Intn(63) + 1)
|
||||
var b storage.Block
|
||||
b.Init(tsid, timestamps, values, scale, precisionBits)
|
||||
_, _, _ = b.MarshalData(0, 0)
|
||||
return &b
|
||||
}
|
||||
for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile() / 2, 2 * maxInmemoryTmpBlocksFile()} {
|
||||
err := func() error {
|
||||
tbf := getTmpBlocksFile()
|
||||
defer putTmpBlocksFile(tbf)
|
||||
|
||||
// Write blocks until their summary size exceeds `size`.
|
||||
var addrs []tmpBlockAddr
|
||||
var blocks []*storage.Block
|
||||
bb := tmpBufPool.Get()
|
||||
defer tmpBufPool.Put(bb)
|
||||
for tbf.offset < uint64(size) {
|
||||
b := createBlock()
|
||||
bb.B = storage.MarshalBlock(bb.B[:0], b)
|
||||
addr, err := tbf.WriteBlockData(bb.B)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot write block at offset %d: %s", tbf.offset, err)
|
||||
}
|
||||
if addr.offset+uint64(addr.size) != tbf.offset {
|
||||
return fmt.Errorf("unexpected addr=%+v for offset %v", &addr, tbf.offset)
|
||||
}
|
||||
addrs = append(addrs, addr)
|
||||
blocks = append(blocks, b)
|
||||
}
|
||||
if err := tbf.Finalize(); err != nil {
|
||||
return fmt.Errorf("cannot finalize tbf: %s", err)
|
||||
}
|
||||
|
||||
// Read blocks in parallel and verify them
|
||||
concurrency := 2
|
||||
workCh := make(chan int)
|
||||
doneCh := make(chan error)
|
||||
for i := 0; i < concurrency; i++ {
|
||||
go func() {
|
||||
doneCh <- func() error {
|
||||
var b1 storage.Block
|
||||
for idx := range workCh {
|
||||
addr := addrs[idx]
|
||||
b := blocks[idx]
|
||||
if err := b.UnmarshalData(); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal data from the original block: %s", err)
|
||||
}
|
||||
b1.Reset()
|
||||
tbf.MustReadBlockAt(&b1, addr)
|
||||
if err := b1.UnmarshalData(); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal data from tbf: %s", err)
|
||||
}
|
||||
if b1.RowsCount() != b.RowsCount() {
|
||||
return fmt.Errorf("unexpected number of rows in tbf block; got %d; want %d", b1.RowsCount(), b.RowsCount())
|
||||
}
|
||||
if !reflect.DeepEqual(b1.Timestamps(), b.Timestamps()) {
|
||||
return fmt.Errorf("unexpected timestamps; got\n%v\nwant\n%v", b1.Timestamps(), b.Timestamps())
|
||||
}
|
||||
if !reflect.DeepEqual(b1.Values(), b.Values()) {
|
||||
return fmt.Errorf("unexpected values; got\n%v\nwant\n%v", b1.Values(), b.Values())
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
}()
|
||||
}
|
||||
for i := range addrs {
|
||||
workCh <- i
|
||||
}
|
||||
close(workCh)
|
||||
for i := 0; i < concurrency; i++ {
|
||||
select {
|
||||
case err := <-doneCh:
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
return fmt.Errorf("timeout")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -30,7 +30,12 @@ var (
|
||||
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for search query execution")
|
||||
maxQueryLen = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
|
||||
maxLookback = flag.Duration("search.maxLookback", 0, "Synonim to -search.lookback-delta from Prometheus. "+
|
||||
"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg")
|
||||
"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. "+
|
||||
"See also '-search.maxStalenessInterval' flag, which has the same meaining due to historical reasons")
|
||||
maxStalenessInterval = flag.Duration("search.maxStalenessInterval", 0, "The maximum interval for staleness calculations. "+
|
||||
"By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning "+
|
||||
"Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. "+
|
||||
"See also '-search.maxLookback' flag, which has the same meanining due to historical reasons")
|
||||
)
|
||||
|
||||
// Default step used if not set.
|
||||
@@ -372,7 +377,6 @@ func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Requ
|
||||
if err != nil {
|
||||
return fmt.Errorf(`cannot obtain label entries: %s`, err)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteLabelsCountResponse(w, labelEntries)
|
||||
labelsCountDuration.UpdateDuration(startTime)
|
||||
@@ -381,6 +385,52 @@ func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Requ
|
||||
|
||||
var labelsCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels/count"}`)
|
||||
|
||||
const secsPerDay = 3600 * 24
|
||||
|
||||
// TSDBStatusHandler processes /api/v1/status/tsdb request.
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
|
||||
func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := getDeadlineForQuery(r)
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse form values: %s", err)
|
||||
}
|
||||
date := time.Now().Unix() / secsPerDay
|
||||
dateStr := r.FormValue("date")
|
||||
if len(dateStr) > 0 {
|
||||
t, err := time.Parse("2006-01-02", dateStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `date` arg %q: %s", dateStr, err)
|
||||
}
|
||||
date = t.Unix() / secsPerDay
|
||||
}
|
||||
topN := 10
|
||||
topNStr := r.FormValue("topN")
|
||||
if len(topNStr) > 0 {
|
||||
n, err := strconv.Atoi(topNStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `topN` arg %q: %s", topNStr, err)
|
||||
}
|
||||
if n <= 0 {
|
||||
n = 1
|
||||
}
|
||||
if n > 1000 {
|
||||
n = 1000
|
||||
}
|
||||
topN = n
|
||||
}
|
||||
status, err := netstorage.GetTSDBStatusForDate(deadline, uint64(date), topN)
|
||||
if err != nil {
|
||||
return fmt.Errorf(`cannot obtain tsdb status for date=%d, topN=%d: %s`, date, topN, err)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteTSDBStatusResponse(w, status)
|
||||
tsdbStatusDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
}
|
||||
|
||||
var tsdbStatusDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/status/tsdb"}`)
|
||||
|
||||
// LabelsHandler processes /api/v1/labels request.
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
|
||||
@@ -576,15 +626,18 @@ func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) e
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
step, err := getDuration(r, "step", defaultStep)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deadline := getDeadlineForQuery(r)
|
||||
lookbackDelta, err := getMaxLookback(r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
step, err := getDuration(r, "step", lookbackDelta)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if step <= 0 {
|
||||
step = defaultStep
|
||||
}
|
||||
deadline := getDeadlineForQuery(r)
|
||||
|
||||
if len(query) > *maxQueryLen {
|
||||
return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), *maxQueryLen)
|
||||
@@ -900,6 +953,9 @@ const maxDurationMsecs = 100 * 365 * 24 * 3600 * 1000
|
||||
|
||||
func getMaxLookback(r *http.Request) (int64, error) {
|
||||
d := maxLookback.Milliseconds()
|
||||
if d == 0 {
|
||||
d = maxStalenessInterval.Milliseconds()
|
||||
}
|
||||
return getDuration(r, "max_lookback", d)
|
||||
}
|
||||
|
||||
|
||||
28
app/vmselect/prometheus/tsdb_status_response.qtpl
Normal file
28
app/vmselect/prometheus/tsdb_status_response.qtpl
Normal file
@@ -0,0 +1,28 @@
|
||||
{% import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" %}
|
||||
|
||||
{% stripspace %}
|
||||
TSDBStatusResponse generates response for /api/v1/status/tsdb .
|
||||
{% func TSDBStatusResponse(status *storage.TSDBStatus) %}
|
||||
{
|
||||
"status":"success",
|
||||
"data":{
|
||||
"seriesCountByMetricName":{%= tsdbStatusEntries(status.SeriesCountByMetricName) %},
|
||||
"labelValueCountByLabelName":{%= tsdbStatusEntries(status.LabelValueCountByLabelName) %},
|
||||
"seriesCountByLabelValuePair":{%= tsdbStatusEntries(status.SeriesCountByLabelValuePair) %}
|
||||
}
|
||||
}
|
||||
{% endfunc %}
|
||||
|
||||
{% func tsdbStatusEntries(a []storage.TopHeapEntry) %}
|
||||
[
|
||||
{% for i, e := range a %}
|
||||
{
|
||||
"name":{%q= e.Name %},
|
||||
"value":{%d= int(e.Count) %}
|
||||
}
|
||||
{% if i+1 < len(a) %},{% endif %}
|
||||
{% endfor %}
|
||||
]
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
||||
123
app/vmselect/prometheus/tsdb_status_response.qtpl.go
Normal file
123
app/vmselect/prometheus/tsdb_status_response.qtpl.go
Normal file
@@ -0,0 +1,123 @@
|
||||
// Code generated by qtc from "tsdb_status_response.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:1
|
||||
package prometheus
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:1
|
||||
import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
|
||||
// TSDBStatusResponse generates response for /api/v1/status/tsdb .
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
|
||||
func StreamTSDBStatusResponse(qw422016 *qt422016.Writer, status *storage.TSDBStatus) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
|
||||
qw422016.N().S(`{"status":"success","data":{"seriesCountByMetricName":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
|
||||
streamtsdbStatusEntries(qw422016, status.SeriesCountByMetricName)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
|
||||
qw422016.N().S(`,"labelValueCountByLabelName":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:10
|
||||
streamtsdbStatusEntries(qw422016, status.LabelValueCountByLabelName)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:10
|
||||
qw422016.N().S(`,"seriesCountByLabelValuePair":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:11
|
||||
streamtsdbStatusEntries(qw422016, status.SeriesCountByLabelValuePair)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:11
|
||||
qw422016.N().S(`}}`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
func WriteTSDBStatusResponse(qq422016 qtio422016.Writer, status *storage.TSDBStatus) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
StreamTSDBStatusResponse(qw422016, status)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
func TSDBStatusResponse(status *storage.TSDBStatus) string {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
WriteTSDBStatusResponse(qb422016, status)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
|
||||
func streamtsdbStatusEntries(qw422016 *qt422016.Writer, a []storage.TopHeapEntry) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
|
||||
for i, e := range a {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
|
||||
qw422016.N().S(`{"name":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:20
|
||||
qw422016.N().Q(e.Name)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:20
|
||||
qw422016.N().S(`,"value":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:21
|
||||
qw422016.N().D(int(e.Count))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:21
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
if i+1 < len(a) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
}
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
}
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
qw422016.N().S(`]`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
func writetsdbStatusEntries(qq422016 qtio422016.Writer, a []storage.TopHeapEntry) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
streamtsdbStatusEntries(qw422016, a)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
func tsdbStatusEntries(a []storage.TopHeapEntry) string {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
writetsdbStatusEntries(qb422016, a)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
}
|
||||
@@ -338,7 +338,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
q := `timestamp(123)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Values: []float64{900, 1100, 1300, 1500, 1700, 1900},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
@@ -349,7 +349,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
q := `timestamp(time())`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Values: []float64{900, 1100, 1300, 1500, 1700, 1900},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
@@ -360,7 +360,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
q := `timestamp(456/time()+123)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Values: []float64{900, 1100, 1300, 1500, 1700, 1900},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
@@ -371,7 +371,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
q := `timestamp(time()>=1600)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{nan, nan, nan, 1600, 1800, 2000},
|
||||
Values: []float64{nan, nan, nan, nan, 1700, 1900},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
||||
@@ -15,9 +15,9 @@ import (
|
||||
"github.com/valyala/histogram"
|
||||
)
|
||||
|
||||
var maxStalenessInterval = flag.Duration("search.maxStalenessInterval", 0, "The maximum interval for staleness calculations. "+
|
||||
"By default it is automatically calculated from the median interval between samples. This flag can be useful for tuning "+
|
||||
"Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details")
|
||||
var minStalenessInterval = flag.Duration("search.minStalenessInterval", 0, "The mimimum interval for staleness calculations. "+
|
||||
"This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. "+
|
||||
"See also '-search.maxStalenessInterval'")
|
||||
|
||||
var rollupFuncs = map[string]newRollupFunc{
|
||||
// Standard rollup funcs from PromQL.
|
||||
@@ -72,6 +72,11 @@ var rollupFuncs = map[string]newRollupFunc{
|
||||
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
|
||||
"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
|
||||
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
|
||||
|
||||
// `timestamp` function must return timestamp for the last datapoint on the current window
|
||||
// in order to properly handle offset and timestamps unaligned to the current step.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415 for details.
|
||||
"timestamp": newRollupFuncOneArg(rollupTimestamp),
|
||||
}
|
||||
|
||||
// rollupAggrFuncs are functions that can be passed to `aggr_over_time()`
|
||||
@@ -447,15 +452,15 @@ func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, valu
|
||||
dstValues = decimal.ExtendFloat64sCapacity(dstValues, len(rc.Timestamps))
|
||||
|
||||
scrapeInterval := getScrapeInterval(timestamps)
|
||||
if *maxStalenessInterval > 0 {
|
||||
if si := maxStalenessInterval.Milliseconds(); scrapeInterval > si {
|
||||
scrapeInterval = si
|
||||
}
|
||||
}
|
||||
maxPrevInterval := getMaxPrevInterval(scrapeInterval)
|
||||
if rc.LookbackDelta > 0 && maxPrevInterval > rc.LookbackDelta {
|
||||
maxPrevInterval = rc.LookbackDelta
|
||||
}
|
||||
if *minStalenessInterval > 0 {
|
||||
if msi := minStalenessInterval.Milliseconds(); msi > 0 && maxPrevInterval < msi {
|
||||
maxPrevInterval = msi
|
||||
}
|
||||
}
|
||||
window := rc.Window
|
||||
if window <= 0 {
|
||||
window = rc.Step
|
||||
@@ -1509,6 +1514,19 @@ func rollupLow(rfa *rollupFuncArg) float64 {
|
||||
return min
|
||||
}
|
||||
|
||||
func rollupTimestamp(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
timestamps := rfa.timestamps
|
||||
if len(timestamps) == 0 {
|
||||
// Do not take into account rfa.prevTimestamp, since it may lead
|
||||
// to inconsistent results comparing to Prometheus on broken time series
|
||||
// with irregular data points.
|
||||
return nan
|
||||
}
|
||||
return float64(timestamps[len(timestamps)-1]) / 1e3
|
||||
}
|
||||
|
||||
func rollupFirst(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
|
||||
@@ -53,9 +53,9 @@ var transformFuncs = map[string]transformFunc{
|
||||
"sort_desc": newTransformFuncSort(true),
|
||||
"sqrt": newTransformFuncOneArg(transformSqrt),
|
||||
"time": transformTime,
|
||||
"timestamp": transformTimestamp,
|
||||
"vector": transformVector,
|
||||
"year": newTransformFuncDateTime(transformYear),
|
||||
// "timestamp" has been moved to rollup funcs. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415
|
||||
"vector": transformVector,
|
||||
"year": newTransformFuncDateTime(transformYear),
|
||||
|
||||
// New funcs
|
||||
"label_set": transformLabelSet,
|
||||
@@ -1516,25 +1516,6 @@ func transformTime(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
return evalTime(tfa.ec), nil
|
||||
}
|
||||
|
||||
func transformTimestamp(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rvs := args[0]
|
||||
for _, ts := range rvs {
|
||||
ts.MetricName.ResetMetricGroup()
|
||||
values := ts.Values
|
||||
for i, t := range ts.Timestamps {
|
||||
v := values[i]
|
||||
if !math.IsNaN(v) {
|
||||
values[i] = float64(t) / 1e3
|
||||
}
|
||||
}
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func transformVector(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
|
||||
@@ -120,6 +120,14 @@ func SearchTagEntries(maxTagKeys, maxTagValues int) ([]storage.TagEntry, error)
|
||||
return tagEntries, err
|
||||
}
|
||||
|
||||
// GetTSDBStatusForDate returns TSDB status for the given date.
|
||||
func GetTSDBStatusForDate(date uint64, topN int) (*storage.TSDBStatus, error) {
|
||||
WG.Add(1)
|
||||
status, err := Storage.GetTSDBStatusForDate(date, topN)
|
||||
WG.Done()
|
||||
return status, err
|
||||
}
|
||||
|
||||
// GetSeriesCount returns the number of time series in the storage.
|
||||
func GetSeriesCount() (uint64, error) {
|
||||
WG.Add(1)
|
||||
|
||||
@@ -1,16 +1,25 @@
|
||||
# All these commands must run from repository root.
|
||||
|
||||
DOCKER_NAMESPACE := docker.io/victoriametrics
|
||||
BUILDER_IMAGE := local/builder:go1.14.1
|
||||
BASE_IMAGE := local/base:1.1.0
|
||||
DOCKER_NAMESPACE := victoriametrics
|
||||
|
||||
ROOT_IMAGE ?= scratch
|
||||
CERTS_IMAGE := alpine:3.11
|
||||
GO_BUILDER_IMAGE := golang:1.14.2
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr : _)
|
||||
BASE_IMAGE := local/base:1.1.1-$(shell echo $(ROOT_IMAGE) | tr : _)-$(shell echo $(CERTS_IMAGE) | tr : _)
|
||||
|
||||
package-base:
|
||||
(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(BASE_IMAGE)$$') \
|
||||
|| docker build -t $(BASE_IMAGE) deployment/docker/base
|
||||
|| docker build -t $(BASE_IMAGE) \
|
||||
--build-arg root_image=$(ROOT_IMAGE) \
|
||||
--build-arg certs_image=$(CERTS_IMAGE) \
|
||||
deployment/docker/base
|
||||
|
||||
package-builder:
|
||||
(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(BUILDER_IMAGE)$$') \
|
||||
|| docker build -t $(BUILDER_IMAGE) deployment/docker/builder
|
||||
|| docker build -t $(BUILDER_IMAGE) \
|
||||
--build-arg go_builder_image=$(GO_BUILDER_IMAGE) \
|
||||
deployment/docker/builder
|
||||
|
||||
app-via-docker: package-base package-builder
|
||||
mkdir -p gocache-for-docker
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
FROM alpine:3.10 as base
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
|
||||
RUN apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM scratch
|
||||
FROM $root_image
|
||||
|
||||
COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
root:x:0:root
|
||||
victoriametrics:x:1000:victoriametrics
|
||||
@@ -1,2 +0,0 @@
|
||||
root:x:0:0:root:/root:/bin/ash
|
||||
victoriametrics:x:1000:1000::/:
|
||||
@@ -1,2 +1,3 @@
|
||||
FROM golang:1.14.1
|
||||
ARG go_builder_image
|
||||
FROM $go_builder_image
|
||||
STOPSIGNAL SIGINT
|
||||
|
||||
@@ -2,7 +2,7 @@ version: '3.5'
|
||||
services:
|
||||
prometheus:
|
||||
container_name: prometheus
|
||||
image: prom/prometheus:v2.17.1
|
||||
image: prom/prometheus:v2.17.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -35,7 +35,7 @@ services:
|
||||
restart: always
|
||||
grafana:
|
||||
container_name: grafana
|
||||
image: grafana/grafana:6.7.1
|
||||
image: grafana/grafana:6.7.2
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
cd /var/lib/grafana &&
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
## Case studies and talks
|
||||
# Case studies and talks
|
||||
|
||||
Below are approved public case studies and talks from VictoriaMetrics users. Join our [community Slack channel](http://slack.victoriametrics.com/)
|
||||
and feel free asking for references, reviews and additional case studies from real VictoriaMetrics users there.
|
||||
|
||||
### Adidas
|
||||
## Adidas
|
||||
|
||||
See [slides](https://promcon.io/2019-munich/slides/remote-write-storage-wars.pdf) and [video](https://youtu.be/OsH6gPdxR4s)
|
||||
from [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk at [PromCon 2019](https://promcon.io/2019-munich/).
|
||||
VictoriaMetrics is compared to Thanos, Corex and M3DB in the talk.
|
||||
|
||||
|
||||
### COLOPL
|
||||
## COLOPL
|
||||
|
||||
[COLOPL](http://www.colopl.co.jp/en/) is Japaneese Game Development company. It started using VictoriaMetrics
|
||||
after evaulating the following remote storage solutions for Prometheus:
|
||||
@@ -24,7 +24,7 @@ See [slides](https://speakerdeck.com/inletorder/monitoring-platform-with-victori
|
||||
from `Large-scale, super-load system monitoring platform built with VictoriaMetrics` talk at [Prometheus Meetup Tokyo #3](https://prometheus.connpass.com/event/157721/).
|
||||
|
||||
|
||||
### Wix.com
|
||||
## Wix.com
|
||||
|
||||
[Wix.com](https://en.wikipedia.org/wiki/Wix.com) is the leading web development platform.
|
||||
|
||||
@@ -55,7 +55,7 @@ Numbers:
|
||||
Alex Ulstein, Head of Monitoring, Wix.com
|
||||
|
||||
|
||||
### Wedos.com
|
||||
## Wedos.com
|
||||
|
||||
> [Wedos](https://www.wedos.com/) is the Biggest Czech Hosting. We have our own private data center, that holds only our servers and technologies. The second data center, where the servers will be cooled in an oil bath, is being built. We started using [cluster VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md) to store Prometheus metrics from all our infrastructure after receiving positive references from our friends who successfully use VictoriaMetrics.
|
||||
|
||||
@@ -68,7 +68,7 @@ Numbers:
|
||||
> We like configuration simplicity and zero maintenance for VictoriaMetrics - once installed and forgot about it. It works out of the box without any issues.
|
||||
|
||||
|
||||
### Synthesio
|
||||
## Synthesio
|
||||
|
||||
[Synthesio](https://www.synthesio.com/) is the leading social intelligence tool for social media monitoring & social analytics.
|
||||
|
||||
@@ -85,7 +85,7 @@ Numbers:
|
||||
- Churn rate - 100 new time series per hour
|
||||
|
||||
|
||||
### MHI Vestas Offshore Wind
|
||||
## MHI Vestas Offshore Wind
|
||||
|
||||
The mission of [MHI Vestas Offshore Wind](http://www.mhivestasoffshore.com) is to co-develop offshore wind as an economically viable and sustainable energy resource to benefit future generations.
|
||||
|
||||
@@ -100,7 +100,7 @@ Numbers with current limited roll out:
|
||||
- Retention time: 3 years
|
||||
|
||||
|
||||
### Dreamteam
|
||||
## Dreamteam
|
||||
|
||||
[Dreamteam](https://dreamteam.gg/) successfully uses single-node VictoriaMetrics in multiple environments.
|
||||
|
||||
@@ -116,7 +116,7 @@ VictoriaMetrics in production environment runs on 2 M5 EC2 instances in "HA" mod
|
||||
as load balancer for reads.
|
||||
|
||||
|
||||
### Brandwatch
|
||||
## Brandwatch
|
||||
|
||||
[Brandwatch](https://www.brandwatch.com/) is the world's pioneering digital consumer intelligence suite,
|
||||
helping over 2,000 of the world's most admired brands and agencies to make insightful, data-driven business decisions.
|
||||
@@ -161,7 +161,7 @@ Query rates are insignificant as we have concentrated on data ingestion so far.
|
||||
Anders Bomberg, Monitoring and Infrastructure Team Lead, brandwatch.com
|
||||
|
||||
|
||||
### Adsterra
|
||||
## Adsterra
|
||||
|
||||
[Adsterra Network](https://adsterra.com) is a leading digital advertising company that offers
|
||||
performance-based solutions for advertisers and media partners worldwide.
|
||||
@@ -219,3 +219,46 @@ Setup:
|
||||
We have 2 single-node instances of VictoriaMetircs. The first instance collects and stores high-resolution metrics (10s scrape interval) for a month.
|
||||
The second instance collects and stores low-resolution metrics (300s scrape interval) for a month.
|
||||
We use Promxy + Alertmanager for global view and alerts evaluation.
|
||||
|
||||
|
||||
## ARNES
|
||||
|
||||
[The Academic and Research Network of Slovenia](https://www.arnes.si/en/) (ARNES) is a public institute that provides network services to research,
|
||||
educational and cultural organizations, and enables them to establish connections and cooperation with each other and with related organizations abroad.
|
||||
|
||||
After using Cacti, Graphite and StatsD for years, we wanted to upgrade our monitoring stack to something that:
|
||||
|
||||
- has native alerting support
|
||||
- can run on-prem
|
||||
- has multi-dimension metrics
|
||||
- lower hardware requirements
|
||||
- is scalable
|
||||
- simple client provisioning and discovery with Puppet
|
||||
|
||||
We were running Prometheus for about a year in a test environment and it worked great. But there was a need/wish for a few years of retention time,
|
||||
like the old systems provided. We tested Thanos, which was a bit resource hungry back then, but it worked great for about half a year
|
||||
until we discovered VictoriaMetrics. As our scale is not that big, we don't have on-prem S3 and no Kubernetes, VM's single node instance provided
|
||||
the same result with less maintenance overhead and lower hardware requirements.
|
||||
|
||||
After testing it a few months and having great support from the maintainers on [Slack](http://slack.victoriametrics.com/),
|
||||
we decided to go with it. VM's support for ingesting InfluxDB metrics was an additional bonus, since our hardware team uses
|
||||
SNMPCollector to collect metrics from network devices and switching from InfluxDB to VictoriaMetrics was a simple change in the config file for them.
|
||||
|
||||
Numbers:
|
||||
|
||||
- 2 single node instances per DC (one for prometheus and one for influxdb metrics)
|
||||
- Active time series per VictoriaMetrics instance: ~500k (prometheus) + ~320k (influxdb)
|
||||
- Ingestion rate per VictoriaMetrics instance: 45k/s (prometheus) / 30k/s (influxdb)
|
||||
- Query duration: median is ~5ms, 99th percentile is ~45ms
|
||||
- Total number of datapoints per instance: 390B (prometheus), 110B (influxdb)
|
||||
- Average datapoint size on drive: 0.4 bytes
|
||||
- Disk usage per VictoriaMetrics instance: 125GB (prometheus), 185GB (influxdb)
|
||||
- Index size per VictoriaMetrics instance: 1.6GB (prometheus), 1.2GB (influcdb)
|
||||
|
||||
We are running 1 Prometheus, 1 VictoriaMetrics and 1 Grafana server in each datacenter on baremetal servers, scraping 350+ targets
|
||||
(and 3k+ devices collected via SNMPCollector sending metrics directly to VM). Each Prometheus is scraping all targets,
|
||||
so we have all metrics in both VictoriaMetrics instances. We are using [Promxy](https://github.com/jacksontj/promxy) to deduplicate metrics from both instances.
|
||||
Grafana has a LB infront, so if one DC has problems, we can still view all metrics from both DCs on the other Grafana instance.
|
||||
|
||||
We are still in the process of migration, but we are really happy with the whole stack. It has proven as an essential piece
|
||||
for insight into our services during COVID-19 and has enabled us to provide better service and spot problems faster.
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
<img alt="Victoria Metrics" src="logo.png">
|
||||
|
||||
# Cluster version
|
||||
|
||||
VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.
|
||||
@@ -89,7 +91,12 @@ Run `make package`. It will build the following docker images locally:
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package`.
|
||||
|
||||
By default images are built on top of `scratch` image. It is possible to build on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds images on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package
|
||||
```
|
||||
|
||||
## Operation
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ VictoriaMetrics is similar to Cortex in the following aspects:
|
||||
i.e. there is no need in running sidecars unlike in [Thanos](https://github.com/thanos-io/thanos) case.
|
||||
- Both systems support multi-tenancy out of the box. See [the corresponding docs for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format).
|
||||
|
||||
The main differences between Corex and VictoriaMetrics:
|
||||
The main differences between Cortex and VictoriaMetrics:
|
||||
- Cortex re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
|
||||
- Cortex provides [Ruler](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ruler) and [Alertmanager](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#alertmanager) components,
|
||||
which are currently missing in VictoriaMetrics. However, these components can be substituted by [Promxy](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
[](https://hub.docker.com/r/victoriametrics/victoria-metrics)
|
||||
[](http://slack.victoriametrics.com/)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
|
||||
[](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
|
||||

|
||||
|
||||
## VictoriaMetrics
|
||||
|
||||
VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
|
||||
@@ -17,6 +27,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
|
||||
* [MHI Vestas Offshore Wind](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#mhi-vestas-offshore-wind)
|
||||
* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
|
||||
* [Brandwatch](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#brandwatch)
|
||||
* [Adsterra](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adsterra)
|
||||
* [ARNES](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#arnes)
|
||||
|
||||
|
||||
## Prominent features
|
||||
@@ -196,6 +208,10 @@ Read more about tuning remote write for Prometheus [here](https://prometheus.io/
|
||||
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
|
||||
since the previous versions may have issues with `remote_write`.
|
||||
|
||||
Take a look also at [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md),
|
||||
which can be used as faster and less resource-hungry alternative to Prometheus in certain cases.
|
||||
|
||||
|
||||
### Grafana setup
|
||||
|
||||
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
|
||||
@@ -243,6 +259,9 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
|
||||
|
||||
* [static_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config)
|
||||
* [file_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config)
|
||||
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
|
||||
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
|
||||
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
|
||||
|
||||
In the future other `*_sd_config` types will be supported.
|
||||
|
||||
@@ -260,7 +279,8 @@ For instance, put the following lines into `Telegraf` config, so it sends data t
|
||||
|
||||
Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
|
||||
|
||||
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag.
|
||||
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag
|
||||
and stream plain Influx line protocol data to the configured TCP and/or UDP addresses.
|
||||
|
||||
VictoriaMetrics maps Influx data using the following rules:
|
||||
|
||||
@@ -431,10 +451,10 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
|
||||
|
||||
* `<column_pos>` is the position of the CSV column (field). Column numbering starts from 1. The order of parsing rules may be arbitrary.
|
||||
* `<type>` describes the column type. Supported types are:
|
||||
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value. The metric name is read from the `<context>`.
|
||||
CSV line must have at least a single metric field.
|
||||
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value, which must be integer or floating-point number.
|
||||
The metric name is read from the `<context>`. CSV line must have at least a single metric field. Multiple metric fields per CSV line is OK.
|
||||
* `label` - the corresponding CSV column at `<column_pos>` contains label value. The label name is read from the `<context>`.
|
||||
CSV line may have arbitrary number of label fields. All these fields are attached to all the configured metrics.
|
||||
CSV line may have arbitrary number of label fields. All these labels are attached to all the configured metrics.
|
||||
* `time` - the corresponding CSV column at `<column_pos>` contains metric time. CSV line may contain either one or zero columns with time.
|
||||
If CSV line has no time, then the current time is used. The time is applied to all the configured metrics.
|
||||
The format of the time is configured via `<context>`. Supported time formats are:
|
||||
@@ -444,7 +464,7 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
|
||||
* `rfc3339` - timestamp in [RFC3339](https://tools.ietf.org/html/rfc3339) format, i.e. `2006-01-02T15:04:05Z`.
|
||||
* `custom:<layout>` - custom layout for the timestamp. The `<layout>` may contain arbitrary time layout according to [time.Parse rules in Go](https://golang.org/pkg/time/#Parse).
|
||||
|
||||
Each request to `/api/v1/import/csv` can contain arbitrary number of CSV lines.
|
||||
Each request to `/api/v1/import/csv` may contain arbitrary number of CSV lines.
|
||||
|
||||
Example for importing CSV data via `/api/v1/import/csv`:
|
||||
|
||||
@@ -479,6 +499,7 @@ VictoriaMetrics supports the following handlers from [Prometheus querying API](h
|
||||
* [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
|
||||
* [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
|
||||
* [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
|
||||
* [/api/v1/status/tsdb](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats)
|
||||
|
||||
These handlers can be queried from Prometheus-compatible clients such as Grafana or curl.
|
||||
|
||||
@@ -545,6 +566,13 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-victoria-metrics
|
||||
```
|
||||
|
||||
### Start with docker-compose
|
||||
|
||||
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
|
||||
@@ -589,11 +617,13 @@ Steps for restoring from a snapshot:
|
||||
Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
|
||||
where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
|
||||
for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
|
||||
the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
|
||||
the deleted time series isn't freed instantly - it is freed during subsequent [background merges of data files](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
|
||||
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
|
||||
before actually deleting the metrics.
|
||||
|
||||
The `/api/v1/admin/tsdb/delete_series` handler may be protected with `authKey` if `-deleteAuthKey` command-line flag is set.
|
||||
|
||||
The delete API is intended mainly for the following cases:
|
||||
|
||||
* One-off deleting of accidentally written invalid (or undesired) time series.
|
||||
@@ -601,10 +631,11 @@ The delete API is intended mainly for the following cases:
|
||||
|
||||
It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
|
||||
|
||||
* Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
|
||||
* Regular cleanups for unneeded data. Just prevent writing unneeded data into VictoriaMetrics.
|
||||
This can be done with relabeling in [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
|
||||
See [this article](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts) for details.
|
||||
* Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
|
||||
time series occupy disk space until the next merge operation, which can never occur.
|
||||
* Reducing disk space usage by deleting unneeded time series. This doesn't work as expected, since the deleted
|
||||
time series occupy disk space until the next merge operation, which can never occur when deleting too old data.
|
||||
|
||||
It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
|
||||
|
||||
@@ -826,6 +857,7 @@ Consider setting the following command-line flags:
|
||||
with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
|
||||
* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
|
||||
* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
|
||||
* `-search.resetCacheAuthKey` for protecting `/internal/resetRollupResultCache` endpoint. See [backfilling](#backfilling) for more details.
|
||||
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
|
||||
@@ -895,9 +927,22 @@ The most interesting metrics are:
|
||||
If this removes gaps on the graphs, then it is likely data with timestamps older than `-search.cacheTimestampOffset`
|
||||
is ingested into VictoriaMetrics. Make sure that data sources have synchronized time with VictoriaMetrics.
|
||||
|
||||
If the gaps are related to irregular intervals between samples, then try adjusting `-search.minStalenessInterval` command-line flag
|
||||
to value close to the maximum interval between samples.
|
||||
|
||||
* If you are switching from InfluxDB or TimescaleDB, then take a look at `-search.maxStalenessInterval` command-line flag.
|
||||
It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes
|
||||
each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals.
|
||||
|
||||
* Metrics and labels leading to high cardinality or high churn rate can be determined at `/api/v1/status/tsdb` page.
|
||||
See [these docs](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) for details.
|
||||
VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
|
||||
while `topN` equals to 10.
|
||||
|
||||
|
||||
### Backfilling
|
||||
|
||||
VictoriaMetrics accepts historical data in arbitrary order of time.
|
||||
VictoriaMetrics accepts historical data in arbitrary order of time via [any supported ingestion method](#how-to-import-time-series-data).
|
||||
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
|
||||
|
||||
It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
|
||||
@@ -936,7 +981,8 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
|
||||
See [these docs](https://github.com/netdata/netdata#integrations).
|
||||
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend.
|
||||
See [this example](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
|
||||
* [Ansible role for installing VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
|
||||
* [Ansible role for installing single-node VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
|
||||
* [Ansible role for installing cluster VictoriaMetrics](https://github.com/Slapper/ansible-victoriametrics-cluster-role).
|
||||
|
||||
## Third-party contributions
|
||||
|
||||
|
||||
BIN
docs/logo.png
Normal file
BIN
docs/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
@@ -131,12 +131,21 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
|
||||
* `static_configs` - for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
|
||||
* `file_sd_configs` - for scraping targets defined in external files aka file-based service discover.
|
||||
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
|
||||
* `kubernetes_sd_configs` - for scraping targets in Kubernetes (k8s).
|
||||
See [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) for details.
|
||||
* `ec2_sd_configs` - for scraping targets in Amazone EC2.
|
||||
See [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) for details.
|
||||
`vmagent` doesn't support `role_arn` config param yet.
|
||||
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
|
||||
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
|
||||
`vmagent` provides the following additional functionality `gce_sd_config`:
|
||||
* if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
|
||||
* if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
|
||||
* if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
|
||||
* `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
|
||||
|
||||
The following service discovery mechanisms will be added to `vmagent` soon:
|
||||
|
||||
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
|
||||
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
|
||||
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
|
||||
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
|
||||
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
|
||||
|
||||
@@ -182,7 +191,7 @@ Read more about relabeling in the following articles:
|
||||
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
|
||||
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format. This page also exports information on improperly configured scrape configs.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
@@ -190,11 +199,14 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
|
||||
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
|
||||
since `vmagent` establishes at least a single TCP connection per each target.
|
||||
|
||||
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
|
||||
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.
|
||||
|
||||
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
and `vmagent_remotewrite_pending_data_bytes` metric exported by `vmagent` at `/metrics` page constantly grows.
|
||||
|
||||
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow big when remote storage is unvailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
The directory can grow big when remote storage is unavailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
|
||||
|
||||
|
||||
@@ -205,7 +217,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmagent` from the root folder of the repository.
|
||||
It builds `vmagent` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -220,3 +232,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmagent
|
||||
```
|
||||
|
||||
@@ -140,14 +140,28 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
-dst string
|
||||
Where to put the backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond int
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-origin string
|
||||
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
|
||||
-snapshot.createURL string
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snaphsot/create
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted.Example: http://victoriametrics:8428/snaphsot/delete
|
||||
-snapshotName string
|
||||
Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots
|
||||
-storageDataPath string
|
||||
@@ -164,7 +178,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmbackup` from the root folder of the repository.
|
||||
It builds `vmbackup` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -179,3 +193,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmbackup
|
||||
```
|
||||
|
||||
@@ -47,12 +47,24 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-maxBytesPerSecond int
|
||||
The maximum download speed. There is no limit if it is set to 0
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
|
||||
-skipBackupCompleteCheck
|
||||
Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file
|
||||
-src string
|
||||
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-storageDataPath string
|
||||
@@ -69,7 +81,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmrestore` from the root folder of the repository.
|
||||
It builds `vmrestore` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -84,3 +96,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
|
||||
|
||||
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
|
||||
|
||||
```bash
|
||||
ROOT_IMAGE=alpine:3.11 make package-vmrestore
|
||||
```
|
||||
|
||||
27
go.mod
27
go.mod
@@ -1,26 +1,29 @@
|
||||
module github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
require (
|
||||
cloud.google.com/go v0.55.0 // indirect
|
||||
cloud.google.com/go v0.56.0 // indirect
|
||||
cloud.google.com/go/storage v1.6.0
|
||||
github.com/VictoriaMetrics/fastcache v1.5.7
|
||||
github.com/VictoriaMetrics/metrics v1.11.2
|
||||
github.com/aws/aws-sdk-go v1.29.34
|
||||
github.com/aws/aws-sdk-go v1.30.13
|
||||
github.com/cespare/xxhash/v2 v2.1.1
|
||||
github.com/golang/protobuf v1.4.0 // indirect
|
||||
github.com/golang/snappy v0.0.1
|
||||
github.com/jmespath/go-jmespath v0.3.0 // indirect
|
||||
github.com/klauspost/compress v1.10.3
|
||||
github.com/valyala/fasthttp v1.9.0
|
||||
github.com/valyala/fastjson v1.5.0
|
||||
github.com/klauspost/compress v1.10.5
|
||||
github.com/valyala/fasthttp v1.12.0
|
||||
github.com/valyala/fastjson v1.5.1
|
||||
github.com/valyala/fastrand v1.0.0
|
||||
github.com/valyala/gozstd v1.6.4
|
||||
github.com/valyala/gozstd v1.7.0
|
||||
github.com/valyala/histogram v1.0.1
|
||||
github.com/valyala/quicktemplate v1.4.1
|
||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e // indirect
|
||||
golang.org/x/sys v0.0.0-20200327173247-9dae0f8f5775
|
||||
golang.org/x/tools v0.0.0-20200330040139-fa3cc9eebcfe // indirect
|
||||
google.golang.org/api v0.20.0
|
||||
google.golang.org/genproto v0.0.0-20200330113809-af700f360a68 // indirect
|
||||
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd // indirect
|
||||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
|
||||
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f
|
||||
golang.org/x/tools v0.0.0-20200423205358-59e73619c742 // indirect
|
||||
google.golang.org/api v0.22.0
|
||||
google.golang.org/appengine v1.6.6 // indirect
|
||||
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215 // indirect
|
||||
google.golang.org/grpc v1.29.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.2.8
|
||||
)
|
||||
|
||||
|
||||
72
go.sum
72
go.sum
@@ -9,8 +9,8 @@ cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6T
|
||||
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
|
||||
cloud.google.com/go v0.53.0 h1:MZQCQQaRwOrAcuKjiHWHrgKykt4fZyuwF2dtiG3fGW8=
|
||||
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
|
||||
cloud.google.com/go v0.55.0 h1:eoz/lYxKSL4CNAiaUJ0ZfD1J3bfMYbU5B3rwM1C1EIU=
|
||||
cloud.google.com/go v0.55.0/go.mod h1:ZHmoY+/lIMNkN2+fBmuTiqZ4inFhvQad8ft7MT8IV5Y=
|
||||
cloud.google.com/go v0.56.0 h1:WRz29PgAsVEyPSDHyk+0fpEkwEFyfhHn+JbksT6gIL4=
|
||||
cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
|
||||
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
|
||||
cloud.google.com/go/bigquery v1.3.0 h1:sAbMqjY1PEQKZBWfbu6Y6bsupJ9c4QdHnzg/VvYTLcE=
|
||||
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
|
||||
@@ -40,8 +40,8 @@ github.com/VictoriaMetrics/metrics v1.11.2 h1:t/ceLP6SvagUqypCKU7cI7+tQn54+TIV/t
|
||||
github.com/VictoriaMetrics/metrics v1.11.2/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
|
||||
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
|
||||
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
|
||||
github.com/aws/aws-sdk-go v1.29.34 h1:yrzwfDaZFe9oT4AmQeNNunSQA7c0m2chz0B43+bJ1ok=
|
||||
github.com/aws/aws-sdk-go v1.29.34/go.mod h1:1KvfttTE3SPKMpo8g2c6jL3ZKfXtFvKscTgahTma5Xg=
|
||||
github.com/aws/aws-sdk-go v1.30.13 h1:fBDYaJzInlOHpoKFaTEze5MvZ/pw7mhYkzDE8HAmD74=
|
||||
github.com/aws/aws-sdk-go v1.30.13/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
|
||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
@@ -81,6 +81,12 @@ github.com/golang/protobuf v1.3.3 h1:gyjaxf+svBWX08ZjK86iN9geUJF0H6gp2IRKX6Nf6/I
|
||||
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
|
||||
github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls=
|
||||
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
|
||||
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
||||
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
||||
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
||||
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
||||
github.com/golang/protobuf v1.4.0 h1:oOuy+ugB+P/kBdUnG5QaMXSIyJ1q38wWSojYCb3z5VQ=
|
||||
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
||||
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
@@ -105,8 +111,6 @@ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5m
|
||||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
|
||||
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
|
||||
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
|
||||
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
@@ -115,12 +119,12 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/compress v1.10.3 h1:OP96hzwJVBIHYU52pVTI6CczrxPvrGfgqF9N5eTO0Q8=
|
||||
github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
|
||||
github.com/klauspost/compress v1.10.4 h1:jFzIFaf586tquEB5EhzQG0HwGNSlgAJpG53G6Ss11wc=
|
||||
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
|
||||
github.com/klauspost/compress v1.10.5 h1:7q6vHIqubShURwQz8cQK6yIe/xC3IF0Vm7TGfqjewrc=
|
||||
github.com/klauspost/compress v1.10.5/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
|
||||
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
@@ -140,20 +144,21 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5
|
||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasthttp v1.2.0/go.mod h1:4vX61m6KN+xDduDNwXrhIAVZaZaZiQ1luJk8LWSxF3s=
|
||||
github.com/valyala/fasthttp v1.9.0 h1:hNpmUdy/+ZXYpGy0OBfm7K0UQTzb73W0T0U4iJIVrMw=
|
||||
github.com/valyala/fasthttp v1.9.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
|
||||
github.com/valyala/fastjson v1.5.0 h1:DGrb4wEYso2HdGLyLmNoyNCQnCWfjd8yhghPv5/5YQg=
|
||||
github.com/valyala/fastjson v1.5.0/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
|
||||
github.com/valyala/fasthttp v1.12.0 h1:TsB9qkSeiMXB40ELWWSRMjlsE+8IkqXHcs01y2d9aw0=
|
||||
github.com/valyala/fasthttp v1.12.0/go.mod h1:229t1eWu9UXTPmoUkbpN/fctKPBY4IJoFXQnxHGXy6E=
|
||||
github.com/valyala/fastjson v1.5.1 h1:SXaQZVSwLjZOVhDEhjiCcDtnX0Feu7Z7A1+C5atpoHM=
|
||||
github.com/valyala/fastjson v1.5.1/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
|
||||
github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
|
||||
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
|
||||
github.com/valyala/gozstd v1.6.4 h1:nFLddjEf90SFl5cVWyElSHozQDsbvLljPK703/skBS0=
|
||||
github.com/valyala/gozstd v1.6.4/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
|
||||
github.com/valyala/gozstd v1.7.0 h1:Ljh5c9zboqLhwTI33al32R72iCZfn0mCbVGcFWbGwRQ=
|
||||
github.com/valyala/gozstd v1.7.0/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
|
||||
github.com/valyala/histogram v1.0.1 h1:FzA7n2Tz/wKRMejgu3PV1vw3htAklTjjuoI6z3d4KDg=
|
||||
github.com/valyala/histogram v1.0.1/go.mod h1:lQy0xA4wUz2+IUnf97SivorsJIp8FxsnRd6x25q7Mto=
|
||||
github.com/valyala/quicktemplate v1.4.1 h1:tEtkSN6mTCJlYVT7As5x4wjtkk2hj2thsb0M+AcAVeM=
|
||||
github.com/valyala/quicktemplate v1.4.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
|
||||
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
|
||||
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
|
||||
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
|
||||
go.opencensus.io v0.22.2 h1:75k/FF0Q2YM8QYo07VPddOLBslDt1MZOdEslOHvmzAs=
|
||||
@@ -212,7 +217,6 @@ golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn
|
||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
|
||||
@@ -220,10 +224,10 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL
|
||||
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b h1:0mm1VjtFUOIlE1SbDlwjYaDxZVDP2S5ou6y0gSgXHu8=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
|
||||
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
|
||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd h1:QPwSajcTUrFriMF1nJ3XzgoqakqQEsnZf9LdXdi2nkI=
|
||||
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
@@ -258,10 +262,10 @@ golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4 h1:sfkvUWPNGwSV+8/fNqctR5lS2
|
||||
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8=
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200317113312-5766fd39f98d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200327173247-9dae0f8f5775 h1:TC0v2RSO1u2kn1ZugjrFXkRZAEaqMN/RW+OTZkBzmLE=
|
||||
golang.org/x/sys v0.0.0-20200327173247-9dae0f8f5775/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
|
||||
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
@@ -299,9 +303,9 @@ golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapK
|
||||
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200317043434-63da46f3035e/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
|
||||
golang.org/x/tools v0.0.0-20200330040139-fa3cc9eebcfe h1:sOd+hT8wBUrIFR5Q6uQb/rg50z8NjHk96kC4adwvxjw=
|
||||
golang.org/x/tools v0.0.0-20200330040139-fa3cc9eebcfe/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
|
||||
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
|
||||
golang.org/x/tools v0.0.0-20200423205358-59e73619c742 h1:9OGWpORUXvk8AsaBJlpzzDx7Srv/rSK6rvjcsJq4rJo=
|
||||
golang.org/x/tools v0.0.0-20200423205358-59e73619c742/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
@@ -319,12 +323,16 @@ google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/
|
||||
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/api v0.20.0 h1:jz2KixHX7EcCPiQrySzPdnYT7DbINAypCqKZ1Z7GM40=
|
||||
google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/api v0.22.0 h1:J1Pl9P2lnmYFSJvgs70DKELqHNh8CNWXPbud4njEE2s=
|
||||
google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
|
||||
google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM=
|
||||
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc=
|
||||
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
|
||||
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
|
||||
@@ -343,9 +351,9 @@ google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4
|
||||
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce h1:1mbrb1tUU+Zmt5C94IGKADBTJZjZXAd+BubWi7r9EiI=
|
||||
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200317114155-1f3552e48f24/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200330113809-af700f360a68 h1:ay2fio+sR6N1ccqZQgr/bUoo6pwgbxU8imlLkQc9Nlo=
|
||||
google.golang.org/genproto v0.0.0-20200330113809-af700f360a68/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215 h1:0Uz5jLJQioKgVozXa1gzGbzYxbb/rhQEVvSWxzw5oUs=
|
||||
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
|
||||
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
|
||||
@@ -358,6 +366,14 @@ google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk=
|
||||
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
||||
google.golang.org/grpc v1.28.0 h1:bO/TA4OxCOummhSf10siHuG7vJOiwh7SpRpFZDkOgl4=
|
||||
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
|
||||
google.golang.org/grpc v1.29.1 h1:EC2SB8S04d2r73uptxphDSUG+kTKVgjRPF+N3xpxRB4=
|
||||
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
||||
google.golang.org/protobuf v1.21.0 h1:qdOKuR/EIArgaWNjetjgTzgVTAZ+S/WXVrq9HW9zimw=
|
||||
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
||||
@@ -164,7 +164,7 @@ func TestMaxUpExponent(t *testing.T) {
|
||||
|
||||
e := maxUpExponent(v)
|
||||
if e != eExpected {
|
||||
t.Fatalf("unexpected e for v=%d; got %d; epxecting %d", v, e, eExpected)
|
||||
t.Fatalf("unexpected e for v=%d; got %d; expecting %d", v, e, eExpected)
|
||||
}
|
||||
e = maxUpExponent(-v)
|
||||
if e != eExpected {
|
||||
|
||||
@@ -70,9 +70,9 @@ func TestMarshalInt64ArraySize(t *testing.T) {
|
||||
v += 30e3 + int64(rand.NormFloat64()*1e3)
|
||||
}
|
||||
|
||||
testMarshalInt64ArraySize(t, va, 1, 500, 1300)
|
||||
testMarshalInt64ArraySize(t, va, 2, 500, 1400)
|
||||
testMarshalInt64ArraySize(t, va, 3, 800, 1800)
|
||||
testMarshalInt64ArraySize(t, va, 1, 180, 1400)
|
||||
testMarshalInt64ArraySize(t, va, 2, 250, 1550)
|
||||
testMarshalInt64ArraySize(t, va, 3, 600, 1800)
|
||||
testMarshalInt64ArraySize(t, va, 4, 1300, 2100)
|
||||
testMarshalInt64ArraySize(t, va, 5, 2000, 3200)
|
||||
testMarshalInt64ArraySize(t, va, 6, 3000, 4800)
|
||||
|
||||
@@ -214,6 +214,6 @@ func testMarshalInt64ArraySize(t *testing.T, va []int64, precisionBits uint8, mi
|
||||
t.Fatalf("too big size for marshaled %d items with precisionBits %d: got %d; expecting %d", len(va), precisionBits, len(b), maxSizeExpected)
|
||||
}
|
||||
if len(b) < minSizeExpected {
|
||||
t.Fatalf("too small size for marshaled %d items with precisionBits %d: got %d; epxecting %d", len(va), precisionBits, len(b), minSizeExpected)
|
||||
t.Fatalf("too small size for marshaled %d items with precisionBits %d: got %d; expecting %d", len(va), precisionBits, len(b), minSizeExpected)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"os"
|
||||
)
|
||||
|
||||
func fadviseSequentialRead(f *os.File, prefetch bool) error {
|
||||
// TODO: implement this properly
|
||||
return nil
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
// +build linux freebsd
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func fadviseSequentialRead(f *os.File, prefetch bool) error {
|
||||
fd := int(f.Fd())
|
||||
mode := unix.FADV_SEQUENTIAL
|
||||
if prefetch {
|
||||
mode |= unix.FADV_WILLNEED
|
||||
}
|
||||
if err := unix.Fadvise(int(fd), 0, 0, mode); err != nil {
|
||||
return fmt.Errorf("error returned from unix.Fadvise(%d): %s", mode, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -67,15 +67,6 @@ func (r *ReaderAt) MustClose() {
|
||||
readersCount.Dec()
|
||||
}
|
||||
|
||||
// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
|
||||
//
|
||||
// if prefetch is set, then the OS is hinted to prefetch f data.
|
||||
func (r *ReaderAt) MustFadviseSequentialRead(prefetch bool) {
|
||||
if err := fadviseSequentialRead(r.f, prefetch); err != nil {
|
||||
logger.Panicf("FATAL: error in fadviseSequentialRead(%q, %v): %s", r.f.Name(), prefetch, err)
|
||||
}
|
||||
}
|
||||
|
||||
// OpenReaderAt opens ReaderAt for reading from filename.
|
||||
//
|
||||
// MustClose must be called on the returned ReaderAt when it is no longer needed.
|
||||
@@ -94,7 +85,6 @@ func OpenReaderAt(path string) (*ReaderAt, error) {
|
||||
}
|
||||
r.mmapData = data
|
||||
}
|
||||
r.MustFadviseSequentialRead(false)
|
||||
readersCount.Inc()
|
||||
return &r, nil
|
||||
}
|
||||
|
||||
@@ -146,7 +146,7 @@ func gzipHandler(rh RequestHandler) http.HandlerFunc {
|
||||
handlerWrapper(w, r, rh)
|
||||
if zrw, ok := w.(*gzipResponseWriter); ok {
|
||||
if err := zrw.Close(); err != nil && !isTrivialNetworkError(err) {
|
||||
logger.Errorf("gzipResponseWriter.Close: %s", err)
|
||||
logger.Warnf("gzipResponseWriter.Close: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -329,10 +329,10 @@ func (zrw *gzipResponseWriter) WriteHeader(statusCode int) {
|
||||
// Implements http.Flusher
|
||||
func (zrw *gzipResponseWriter) Flush() {
|
||||
if err := zrw.bw.Flush(); err != nil && !isTrivialNetworkError(err) {
|
||||
logger.Errorf("gzipResponseWriter.Flush (buffer): %s", err)
|
||||
logger.Warnf("gzipResponseWriter.Flush (buffer): %s", err)
|
||||
}
|
||||
if err := zrw.zw.Flush(); err != nil && !isTrivialNetworkError(err) {
|
||||
logger.Errorf("gzipResponseWriter.Flush (gzip): %s", err)
|
||||
logger.Warnf("gzipResponseWriter.Flush (gzip): %s", err)
|
||||
}
|
||||
if fw, ok := zrw.ResponseWriter.(http.Flusher); ok {
|
||||
fw.Flush()
|
||||
@@ -419,7 +419,7 @@ var (
|
||||
// Errorf writes formatted error message to w and to logger.
|
||||
func Errorf(w http.ResponseWriter, format string, args ...interface{}) {
|
||||
errStr := fmt.Sprintf(format, args...)
|
||||
logger.ErrorfSkipframes(1, "%s", errStr)
|
||||
logger.WarnfSkipframes(1, "%s", errStr)
|
||||
|
||||
// Extract statusCode from args
|
||||
statusCode := http.StatusBadRequest
|
||||
|
||||
@@ -18,7 +18,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
loggerLevel = flag.String("loggerLevel", "INFO", "Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC")
|
||||
loggerLevel = flag.String("loggerLevel", "INFO", "Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC")
|
||||
loggerFormat = flag.String("loggerFormat", "default", "Format for logs. Possible values: default, json")
|
||||
loggerOutput = flag.String("loggerOutput", "stderr", "Output for the logs. Supported values: stderr, stdout")
|
||||
)
|
||||
@@ -51,10 +51,10 @@ var output io.Writer = os.Stderr
|
||||
|
||||
func validateLoggerLevel() {
|
||||
switch *loggerLevel {
|
||||
case "INFO", "ERROR", "FATAL", "PANIC":
|
||||
case "INFO", "WARN", "ERROR", "FATAL", "PANIC":
|
||||
default:
|
||||
// We cannot use logger.Panicf here, since the logger isn't initialized yet.
|
||||
panic(fmt.Errorf("FATAL: unsupported `-loggerLevel` value: %q; supported values are: INFO, ERROR, FATAL, PANIC", *loggerLevel))
|
||||
panic(fmt.Errorf("FATAL: unsupported `-loggerLevel` value: %q; supported values are: INFO, WARN, ERROR, FATAL, PANIC", *loggerLevel))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,11 +79,21 @@ func Infof(format string, args ...interface{}) {
|
||||
logLevel("INFO", format, args...)
|
||||
}
|
||||
|
||||
// Warnf logs warn message.
|
||||
func Warnf(format string, args ...interface{}) {
|
||||
logLevel("WARN", format, args...)
|
||||
}
|
||||
|
||||
// Errorf logs error message.
|
||||
func Errorf(format string, args ...interface{}) {
|
||||
logLevel("ERROR", format, args...)
|
||||
}
|
||||
|
||||
// WarnfSkipframes logs warn message and skips the given number of frames for the caller.
|
||||
func WarnfSkipframes(skipframes int, format string, args ...interface{}) {
|
||||
logLevelSkipframes(skipframes, "WARN", format, args...)
|
||||
}
|
||||
|
||||
// ErrorfSkipframes logs error message and skips the given number of frames for the caller.
|
||||
func ErrorfSkipframes(skipframes int, format string, args ...interface{}) {
|
||||
logLevelSkipframes(skipframes, "ERROR", format, args...)
|
||||
@@ -185,6 +195,13 @@ var mu sync.Mutex
|
||||
|
||||
func shouldSkipLog(level string) bool {
|
||||
switch *loggerLevel {
|
||||
case "WARN":
|
||||
switch level {
|
||||
case "WARN", "ERROR", "FATAL", "PANIC":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
case "ERROR":
|
||||
switch level {
|
||||
case "ERROR", "FATAL", "PANIC":
|
||||
|
||||
@@ -8,7 +8,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
var allowedMemPercent = flag.Float64("memory.allowedPercent", 60, "Allowed percent of system memory VictoriaMetrics caches may occupy")
|
||||
var allowedMemPercent = flag.Float64("memory.allowedPercent", 60, "Allowed percent of system memory VictoriaMetrics caches may occupy. "+
|
||||
"Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. "+
|
||||
"Too high value may evict too much data from OS page cache, which will result in higher disk IO usage")
|
||||
|
||||
var (
|
||||
allowedMemory int
|
||||
|
||||
@@ -204,8 +204,6 @@ func (idxbc *indexBlockCache) MustClose() {
|
||||
close(idxbc.cleanerStopCh)
|
||||
idxbc.cleanerWG.Wait()
|
||||
|
||||
atomic.AddUint64(&indexBlockCacheRequests, idxbc.requests)
|
||||
atomic.AddUint64(&indexBlockCacheMisses, idxbc.misses)
|
||||
// It is safe returning idxbc.m to pool, since the Reset must be called
|
||||
// when the idxbc entries are no longer accessed by concurrent goroutines.
|
||||
for _, idxbe := range idxbc.m {
|
||||
@@ -240,11 +238,6 @@ func (idxbc *indexBlockCache) cleanByTimeout() {
|
||||
idxbc.mu.Unlock()
|
||||
}
|
||||
|
||||
var (
|
||||
indexBlockCacheRequests uint64
|
||||
indexBlockCacheMisses uint64
|
||||
)
|
||||
|
||||
func (idxbc *indexBlockCache) Get(k uint64) *indexBlock {
|
||||
atomic.AddUint64(&idxbc.requests, 1)
|
||||
idxbc.mu.RLock()
|
||||
@@ -361,8 +354,6 @@ func (ibc *inmemoryBlockCache) MustClose() {
|
||||
close(ibc.cleanerStopCh)
|
||||
ibc.cleanerWG.Wait()
|
||||
|
||||
atomic.AddUint64(&inmemoryBlockCacheRequests, ibc.requests)
|
||||
atomic.AddUint64(&inmemoryBlockCacheMisses, ibc.misses)
|
||||
// It is safe returning ibc.m entries to pool, since the Reset function may be called
|
||||
// only if no other goroutines access ibc entries.
|
||||
for _, ibe := range ibc.m {
|
||||
@@ -397,11 +388,6 @@ func (ibc *inmemoryBlockCache) cleanByTimeout() {
|
||||
ibc.mu.Unlock()
|
||||
}
|
||||
|
||||
var (
|
||||
inmemoryBlockCacheRequests uint64
|
||||
inmemoryBlockCacheMisses uint64
|
||||
)
|
||||
|
||||
func (ibc *inmemoryBlockCache) Get(k inmemoryBlockCacheKey) *inmemoryBlock {
|
||||
atomic.AddUint64(&ibc.requests, 1)
|
||||
|
||||
|
||||
@@ -19,6 +19,16 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
|
||||
)
|
||||
|
||||
// These are global counters for cache requests and misses for parts
|
||||
// which were already merged into another parts.
|
||||
var (
|
||||
historicalDataBlockCacheRequests uint64
|
||||
historicalDataBlockCacheMisses uint64
|
||||
|
||||
historicalIndexBlockCacheRequests uint64
|
||||
historicalIndexBlockCacheMisses uint64
|
||||
)
|
||||
|
||||
// maxParts is the maximum number of parts in the table.
|
||||
//
|
||||
// This number may be reached when the insertion pace outreaches merger pace.
|
||||
@@ -326,11 +336,11 @@ func (tb *Table) UpdateMetrics(m *TableMetrics) {
|
||||
}
|
||||
tb.partsLock.Unlock()
|
||||
|
||||
atomic.AddUint64(&m.DataBlocksCacheRequests, atomic.LoadUint64(&inmemoryBlockCacheRequests))
|
||||
atomic.AddUint64(&m.DataBlocksCacheMisses, atomic.LoadUint64(&inmemoryBlockCacheMisses))
|
||||
m.DataBlocksCacheRequests += atomic.LoadUint64(&historicalDataBlockCacheRequests)
|
||||
m.DataBlocksCacheMisses += atomic.LoadUint64(&historicalDataBlockCacheMisses)
|
||||
|
||||
atomic.AddUint64(&m.IndexBlocksCacheRequests, atomic.LoadUint64(&indexBlockCacheRequests))
|
||||
atomic.AddUint64(&m.IndexBlocksCacheMisses, atomic.LoadUint64(&indexBlockCacheMisses))
|
||||
m.IndexBlocksCacheRequests += atomic.LoadUint64(&historicalIndexBlockCacheRequests)
|
||||
m.IndexBlocksCacheMisses += atomic.LoadUint64(&historicalIndexBlockCacheMisses)
|
||||
}
|
||||
|
||||
// AddItems adds the given items to the tb.
|
||||
@@ -1300,6 +1310,10 @@ func removeParts(pws []*partWrapper, partsToRemove map[*partWrapper]bool) ([]*pa
|
||||
dst := pws[:0]
|
||||
for _, pw := range pws {
|
||||
if partsToRemove[pw] {
|
||||
atomic.AddUint64(&historicalDataBlockCacheRequests, pw.p.ibCache.Requests())
|
||||
atomic.AddUint64(&historicalDataBlockCacheMisses, pw.p.ibCache.Misses())
|
||||
atomic.AddUint64(&historicalIndexBlockCacheRequests, pw.p.idxbCache.Requests())
|
||||
atomic.AddUint64(&historicalIndexBlockCacheMisses, pw.p.idxbCache.Misses())
|
||||
removedParts++
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -57,6 +57,10 @@ var rollupFuncs = map[string]bool{
|
||||
"aggr_over_time": true,
|
||||
"hoeffding_bound_upper": true,
|
||||
"hoeffding_bound_lower": true,
|
||||
|
||||
// `timestamp` func has been moved here because it must work properly with offsets and samples unaligned to the current step.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415 for details.
|
||||
"timestamp": true,
|
||||
}
|
||||
|
||||
// IsRollupFunc returns whether funcName is known rollup function.
|
||||
|
||||
@@ -32,9 +32,9 @@ var transformFuncs = map[string]bool{
|
||||
"sort_desc": true,
|
||||
"sqrt": true,
|
||||
"time": true,
|
||||
"timestamp": true,
|
||||
"vector": true,
|
||||
"year": true,
|
||||
// "timestamp" has been moved to rollup funcs. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415
|
||||
"vector": true,
|
||||
"year": true,
|
||||
|
||||
// New funcs from MetricsQL
|
||||
"label_set": true,
|
||||
|
||||
135
lib/promauth/config.go
Normal file
135
lib/promauth/config.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package promauth
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
)
|
||||
|
||||
// TLSConfig represents TLS config.
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config
|
||||
type TLSConfig struct {
|
||||
CAFile string `yaml:"ca_file"`
|
||||
CertFile string `yaml:"cert_file"`
|
||||
KeyFile string `yaml:"key_file"`
|
||||
ServerName string `yaml:"server_name"`
|
||||
InsecureSkipVerify bool `yaml:"insecure_skip_verify"`
|
||||
}
|
||||
|
||||
// BasicAuthConfig represents basic auth config.
|
||||
type BasicAuthConfig struct {
|
||||
Username string `yaml:"username"`
|
||||
Password string `yaml:"password"`
|
||||
PasswordFile string `yaml:"password_file"`
|
||||
}
|
||||
|
||||
// Config is auth config.
|
||||
type Config struct {
|
||||
// Optional `Authorization` header.
|
||||
//
|
||||
// It may contain `Basic ....` or `Bearer ....` string.
|
||||
Authorization string
|
||||
|
||||
// Optional TLS config
|
||||
TLSRootCA *x509.CertPool
|
||||
TLSCertificate *tls.Certificate
|
||||
TLSServerName string
|
||||
TLSInsecureSkipVerify bool
|
||||
}
|
||||
|
||||
// NewTLSConfig returns new TLS config for the given ac.
|
||||
func (ac *Config) NewTLSConfig() *tls.Config {
|
||||
tlsCfg := &tls.Config{
|
||||
RootCAs: ac.TLSRootCA,
|
||||
ClientSessionCache: tls.NewLRUClientSessionCache(0),
|
||||
}
|
||||
if ac.TLSCertificate != nil {
|
||||
tlsCfg.Certificates = []tls.Certificate{*ac.TLSCertificate}
|
||||
}
|
||||
tlsCfg.ServerName = ac.TLSServerName
|
||||
tlsCfg.InsecureSkipVerify = ac.TLSInsecureSkipVerify
|
||||
return tlsCfg
|
||||
}
|
||||
|
||||
// NewConfig creates auth config from the given args.
|
||||
func NewConfig(baseDir string, basicAuth *BasicAuthConfig, bearerToken, bearerTokenFile string, tlsConfig *TLSConfig) (*Config, error) {
|
||||
var authorization string
|
||||
if basicAuth != nil {
|
||||
if basicAuth.Username == "" {
|
||||
return nil, fmt.Errorf("missing `username` in `basic_auth` section")
|
||||
}
|
||||
username := basicAuth.Username
|
||||
password := basicAuth.Password
|
||||
if basicAuth.PasswordFile != "" {
|
||||
if basicAuth.Password != "" {
|
||||
return nil, fmt.Errorf("both `password`=%q and `password_file`=%q are set in `basic_auth` section", basicAuth.Password, basicAuth.PasswordFile)
|
||||
}
|
||||
path := getFilepath(baseDir, basicAuth.PasswordFile)
|
||||
pass, err := readPasswordFromFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read password from `password_file`=%q set in `basic_auth` section: %s", basicAuth.PasswordFile, err)
|
||||
}
|
||||
password = pass
|
||||
}
|
||||
// See https://en.wikipedia.org/wiki/Basic_access_authentication
|
||||
token := username + ":" + password
|
||||
token64 := base64.StdEncoding.EncodeToString([]byte(token))
|
||||
authorization = "Basic " + token64
|
||||
}
|
||||
if bearerTokenFile != "" {
|
||||
if bearerToken != "" {
|
||||
return nil, fmt.Errorf("both `bearer_token`=%q and `bearer_token_file`=%q are set", bearerToken, bearerTokenFile)
|
||||
}
|
||||
path := getFilepath(baseDir, bearerTokenFile)
|
||||
token, err := readPasswordFromFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read bearer token from `bearer_token_file`=%q: %s", bearerTokenFile, err)
|
||||
}
|
||||
bearerToken = token
|
||||
}
|
||||
if bearerToken != "" {
|
||||
if authorization != "" {
|
||||
return nil, fmt.Errorf("cannot use both `basic_auth` and `bearer_token`")
|
||||
}
|
||||
authorization = "Bearer " + bearerToken
|
||||
}
|
||||
var tlsRootCA *x509.CertPool
|
||||
var tlsCertificate *tls.Certificate
|
||||
tlsServerName := ""
|
||||
tlsInsecureSkipVerify := false
|
||||
if tlsConfig != nil {
|
||||
tlsServerName = tlsConfig.ServerName
|
||||
tlsInsecureSkipVerify = tlsConfig.InsecureSkipVerify
|
||||
if tlsConfig.CertFile != "" || tlsConfig.KeyFile != "" {
|
||||
certPath := getFilepath(baseDir, tlsConfig.CertFile)
|
||||
keyPath := getFilepath(baseDir, tlsConfig.KeyFile)
|
||||
cert, err := tls.LoadX509KeyPair(certPath, keyPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load TLS certificate from `cert_file`=%q, `key_file`=%q: %s", tlsConfig.CertFile, tlsConfig.KeyFile, err)
|
||||
}
|
||||
tlsCertificate = &cert
|
||||
}
|
||||
if tlsConfig.CAFile != "" {
|
||||
path := getFilepath(baseDir, tlsConfig.CAFile)
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read `ca_file` %q: %s", tlsConfig.CAFile, err)
|
||||
}
|
||||
tlsRootCA = x509.NewCertPool()
|
||||
if !tlsRootCA.AppendCertsFromPEM(data) {
|
||||
return nil, fmt.Errorf("cannot parse data from `ca_file` %q", tlsConfig.CAFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
ac := &Config{
|
||||
Authorization: authorization,
|
||||
TLSRootCA: tlsRootCA,
|
||||
TLSCertificate: tlsCertificate,
|
||||
TLSServerName: tlsServerName,
|
||||
TLSInsecureSkipVerify: tlsInsecureSkipVerify,
|
||||
}
|
||||
return ac, nil
|
||||
}
|
||||
24
lib/promauth/util.go
Normal file
24
lib/promauth/util.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package promauth
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
func getFilepath(baseDir, path string) string {
|
||||
if filepath.IsAbs(path) {
|
||||
return path
|
||||
}
|
||||
return filepath.Join(baseDir, path)
|
||||
}
|
||||
|
||||
func readPasswordFromFile(path string) (string, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
pass := strings.TrimRightFunc(string(data), unicode.IsSpace)
|
||||
return pass, nil
|
||||
}
|
||||
@@ -69,6 +69,20 @@ func removeEmptyLabels(labels []prompbmarshal.Label, labelsOffset int) []prompbm
|
||||
return dst
|
||||
}
|
||||
|
||||
// RemoveMetaLabels removes all the `__meta_` labels from src and puts the rest of labels to dst.
|
||||
//
|
||||
// See https://www.robustperception.io/life-of-a-label fo details.
|
||||
func RemoveMetaLabels(dst, src []prompbmarshal.Label) []prompbmarshal.Label {
|
||||
for i := range src {
|
||||
label := &src[i]
|
||||
if strings.HasPrefix(label.Name, "__meta_") {
|
||||
continue
|
||||
}
|
||||
dst = append(dst, *label)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// FinalizeLabels finalizes labels according to relabel_config rules.
|
||||
//
|
||||
// It renames `__address__` to `instance` and removes labels with "__" in the beginning.
|
||||
@@ -266,3 +280,14 @@ func GetLabelByName(labels []prompbmarshal.Label, name string) *prompbmarshal.La
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetLabelValueByName returns value for label with the given name from labels.
|
||||
//
|
||||
// It returns empty string for non-existing label.
|
||||
func GetLabelValueByName(labels []prompbmarshal.Label, name string) string {
|
||||
label := GetLabelByName(labels, name)
|
||||
if label == nil {
|
||||
return ""
|
||||
}
|
||||
return label.Value
|
||||
}
|
||||
|
||||
@@ -628,3 +628,50 @@ func TestFinalizeLabels(t *testing.T) {
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestRemoveMetaLabels(t *testing.T) {
|
||||
f := func(labels, resultExpected []prompbmarshal.Label) {
|
||||
t.Helper()
|
||||
result := RemoveMetaLabels(nil, labels)
|
||||
if !reflect.DeepEqual(result, resultExpected) {
|
||||
t.Fatalf("unexpected result of RemoveMetaLabels;\ngot\n%v\nwant\n%v", result, resultExpected)
|
||||
}
|
||||
}
|
||||
f(nil, nil)
|
||||
f([]prompbmarshal.Label{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
}, []prompbmarshal.Label{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
})
|
||||
f([]prompbmarshal.Label{
|
||||
{
|
||||
Name: "__meta_foo",
|
||||
Value: "bar",
|
||||
},
|
||||
}, nil)
|
||||
f([]prompbmarshal.Label{
|
||||
{
|
||||
Name: "__meta_foo",
|
||||
Value: "bdffr",
|
||||
},
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
{
|
||||
Name: "__meta_xxx",
|
||||
Value: "basd",
|
||||
},
|
||||
}, []prompbmarshal.Label{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ func newClient(sw *ScrapeWork) *client {
|
||||
isTLS := string(u.Scheme()) == "https"
|
||||
var tlsCfg *tls.Config
|
||||
if isTLS {
|
||||
tlsCfg = getTLSConfig(sw)
|
||||
tlsCfg = sw.AuthConfig.NewTLSConfig()
|
||||
}
|
||||
if !strings.Contains(host, ":") {
|
||||
if !isTLS {
|
||||
@@ -64,7 +64,7 @@ func newClient(sw *ScrapeWork) *client {
|
||||
scrapeURL: sw.ScrapeURL,
|
||||
host: host,
|
||||
requestURI: requestURI,
|
||||
authHeader: sw.Authorization,
|
||||
authHeader: sw.AuthConfig.Authorization,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,8 +79,8 @@ func (c *client) ReadData(dst []byte) ([]byte, error) {
|
||||
req.Header.Set("Authorization", c.authHeader)
|
||||
}
|
||||
resp := fasthttp.AcquireResponse()
|
||||
// There is no need in calling DoTimeout, since the timeout is already set in c.hc.ReadTimeout.
|
||||
err := c.hc.Do(req, resp)
|
||||
err := doRequestWithPossibleRetry(c.hc, req, resp)
|
||||
|
||||
fasthttp.ReleaseRequest(req)
|
||||
if err != nil {
|
||||
fasthttp.ReleaseResponse(resp)
|
||||
@@ -121,15 +121,15 @@ var (
|
||||
scrapesGunzipFailed = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`)
|
||||
)
|
||||
|
||||
func getTLSConfig(sw *ScrapeWork) *tls.Config {
|
||||
tlsCfg := &tls.Config{
|
||||
RootCAs: sw.TLSRootCA,
|
||||
ClientSessionCache: tls.NewLRUClientSessionCache(0),
|
||||
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response) error {
|
||||
// There is no need in calling DoTimeout, since the timeout must be already set in hc.ReadTimeout.
|
||||
err := hc.Do(req, resp)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if sw.TLSCertificate != nil {
|
||||
tlsCfg.Certificates = []tls.Certificate{*sw.TLSCertificate}
|
||||
if err != fasthttp.ErrConnectionClosed {
|
||||
return err
|
||||
}
|
||||
tlsCfg.ServerName = sw.TLSServerName
|
||||
tlsCfg.InsecureSkipVerify = sw.TLSInsecureSkipVerify
|
||||
return tlsCfg
|
||||
// Retry request if the server closed the keep-alive connection during the first attempt.
|
||||
return hc.Do(req, resp)
|
||||
}
|
||||
|
||||
@@ -1,23 +1,30 @@
|
||||
package promscrape
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/ec2"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/gce"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/kubernetes"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
var (
|
||||
strictParse = flag.Bool("promscrape.config.strictParse", false, "Whether to allow only supported fields in '-promscrape.config'. "+
|
||||
"This option may be used for errors detection in '-promscrape.config' file")
|
||||
)
|
||||
|
||||
// Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
type Config struct {
|
||||
Global GlobalConfig `yaml:"global"`
|
||||
@@ -48,15 +55,18 @@ type ScrapeConfig struct {
|
||||
HonorTimestamps bool `yaml:"honor_timestamps"`
|
||||
Scheme string `yaml:"scheme"`
|
||||
Params map[string][]string `yaml:"params"`
|
||||
BasicAuth *BasicAuthConfig `yaml:"basic_auth"`
|
||||
BasicAuth *promauth.BasicAuthConfig `yaml:"basic_auth"`
|
||||
BearerToken string `yaml:"bearer_token"`
|
||||
BearerTokenFile string `yaml:"bearer_token_file"`
|
||||
TLSConfig *TLSConfig `yaml:"tls_config"`
|
||||
TLSConfig *promauth.TLSConfig `yaml:"tls_config"`
|
||||
StaticConfigs []StaticConfig `yaml:"static_configs"`
|
||||
FileSDConfigs []FileSDConfig `yaml:"file_sd_configs"`
|
||||
KubernetesSDConfigs []kubernetes.SDConfig `yaml:"kubernetes_sd_configs"`
|
||||
EC2SDConfigs []ec2.SDConfig `yaml:"ec2_sd_configs"`
|
||||
GCESDConfigs []gce.SDConfig `yaml:"gce_sd_configs"`
|
||||
RelabelConfigs []promrelabel.RelabelConfig `yaml:"relabel_configs"`
|
||||
MetricRelabelConfigs []promrelabel.RelabelConfig `yaml:"metric_relabel_configs"`
|
||||
ScrapeLimit int `yaml:"scrape_limit"`
|
||||
SampleLimit int `yaml:"sample_limit"`
|
||||
|
||||
// This is set in loadConfig
|
||||
swc *scrapeWorkConfig
|
||||
@@ -70,24 +80,6 @@ type FileSDConfig struct {
|
||||
// `refresh_interval` is ignored. See `-prometheus.fileSDCheckInterval`
|
||||
}
|
||||
|
||||
// TLSConfig represents TLS config.
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config
|
||||
type TLSConfig struct {
|
||||
CAFile string `yaml:"ca_file"`
|
||||
CertFile string `yaml:"cert_file"`
|
||||
KeyFile string `yaml:"key_file"`
|
||||
ServerName string `yaml:"server_name"`
|
||||
InsecureSkipVerify bool `yaml:"insecure_skip_verify"`
|
||||
}
|
||||
|
||||
// BasicAuthConfig represents basic auth config.
|
||||
type BasicAuthConfig struct {
|
||||
Username string `yaml:"username"`
|
||||
Password string `yaml:"password"`
|
||||
PasswordFile string `yaml:"password_file"`
|
||||
}
|
||||
|
||||
// StaticConfig represents essential parts for `static_config` section of Prometheus config.
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config
|
||||
@@ -109,20 +101,20 @@ func loadStaticConfigs(path string) ([]StaticConfig, error) {
|
||||
}
|
||||
|
||||
// loadConfig loads Prometheus config from the given path.
|
||||
func loadConfig(path string) (cfg *Config, err error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
func loadConfig(path string) (cfg *Config, data []byte, err error) {
|
||||
data, err = ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read Prometheus config from %q: %s", path, err)
|
||||
return nil, nil, fmt.Errorf("cannot read Prometheus config from %q: %s", path, err)
|
||||
}
|
||||
var cfgObj Config
|
||||
if err := cfgObj.parse(data, path); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse Prometheus config from %q: %s", path, err)
|
||||
return nil, nil, fmt.Errorf("cannot parse Prometheus config from %q: %s", path, err)
|
||||
}
|
||||
return &cfgObj, nil
|
||||
return &cfgObj, data, nil
|
||||
}
|
||||
|
||||
func (cfg *Config) parse(data []byte, path string) error {
|
||||
if err := yaml.Unmarshal(data, cfg); err != nil {
|
||||
if err := unmarshalMaybeStrict(data, cfg); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal data: %s", err)
|
||||
}
|
||||
absPath, err := filepath.Abs(path)
|
||||
@@ -141,6 +133,40 @@ func (cfg *Config) parse(data []byte, path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalMaybeStrict(data []byte, dst interface{}) error {
|
||||
var err error
|
||||
if *strictParse {
|
||||
err = yaml.UnmarshalStrict(data, dst)
|
||||
} else {
|
||||
err = yaml.Unmarshal(data, dst)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (cfg *Config) kubernetesSDConfigsCount() int {
|
||||
n := 0
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
n += len(cfg.ScrapeConfigs[i].KubernetesSDConfigs)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (cfg *Config) ec2SDConfigsCount() int {
|
||||
n := 0
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
n += len(cfg.ScrapeConfigs[i].EC2SDConfigs)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (cfg *Config) gceSDConfigsCount() int {
|
||||
n := 0
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
n += len(cfg.ScrapeConfigs[i].GCESDConfigs)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (cfg *Config) fileSDConfigsCount() int {
|
||||
n := 0
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
@@ -149,67 +175,80 @@ func (cfg *Config) fileSDConfigsCount() int {
|
||||
return n
|
||||
}
|
||||
|
||||
// getKubernetesSDScrapeWork returns `kubernetes_sd_configs` ScrapeWork from cfg.
|
||||
func (cfg *Config) getKubernetesSDScrapeWork() []ScrapeWork {
|
||||
var dst []ScrapeWork
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
sc := &cfg.ScrapeConfigs[i]
|
||||
for j := range sc.KubernetesSDConfigs {
|
||||
sdc := &sc.KubernetesSDConfigs[j]
|
||||
dst = appendKubernetesScrapeWork(dst, sdc, cfg.baseDir, sc.swc)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// getEC2SDScrapeWork returns `ec2_sd_configs` ScrapeWork from cfg.
|
||||
func (cfg *Config) getEC2SDScrapeWork() []ScrapeWork {
|
||||
var dst []ScrapeWork
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
sc := &cfg.ScrapeConfigs[i]
|
||||
for j := range sc.EC2SDConfigs {
|
||||
sdc := &sc.EC2SDConfigs[j]
|
||||
dst = appendEC2ScrapeWork(dst, sdc, sc.swc)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// getGCESDScrapeWork returns `gce_sd_configs` ScrapeWork from cfg.
|
||||
func (cfg *Config) getGCESDScrapeWork() []ScrapeWork {
|
||||
var dst []ScrapeWork
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
sc := &cfg.ScrapeConfigs[i]
|
||||
for j := range sc.GCESDConfigs {
|
||||
sdc := &sc.GCESDConfigs[j]
|
||||
dst = appendGCEScrapeWork(dst, sdc, sc.swc)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// getFileSDScrapeWork returns `file_sd_configs` ScrapeWork from cfg.
|
||||
func (cfg *Config) getFileSDScrapeWork(prev []ScrapeWork) ([]ScrapeWork, error) {
|
||||
var sws []ScrapeWork
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
var err error
|
||||
sws, err = cfg.ScrapeConfigs[i].appendFileSDScrapeWork(sws, prev, cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when parsing `scrape_config` #%d: %s", i+1, err)
|
||||
}
|
||||
}
|
||||
return sws, nil
|
||||
}
|
||||
|
||||
// getStaticScrapeWork returns `static_configs` ScrapeWork from from cfg.
|
||||
func (cfg *Config) getStaticScrapeWork() ([]ScrapeWork, error) {
|
||||
var sws []ScrapeWork
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
var err error
|
||||
sws, err = cfg.ScrapeConfigs[i].appendStaticScrapeWork(sws)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when parsing `scrape_config` #%d: %s", i+1, err)
|
||||
}
|
||||
}
|
||||
return sws, nil
|
||||
}
|
||||
|
||||
func (sc *ScrapeConfig) appendFileSDScrapeWork(dst, prev []ScrapeWork, baseDir string) ([]ScrapeWork, error) {
|
||||
if len(sc.FileSDConfigs) == 0 {
|
||||
// Fast path - no `file_sd_configs`
|
||||
return dst, nil
|
||||
}
|
||||
func (cfg *Config) getFileSDScrapeWork(prev []ScrapeWork) []ScrapeWork {
|
||||
// Create a map for the previous scrape work.
|
||||
swPrev := make(map[string][]ScrapeWork)
|
||||
for i := range prev {
|
||||
sw := &prev[i]
|
||||
label := promrelabel.GetLabelByName(sw.Labels, "__meta_filepath")
|
||||
if label == nil {
|
||||
logger.Panicf("BUG: missing `__meta_filepath` label")
|
||||
filepath := promrelabel.GetLabelValueByName(sw.Labels, "__vm_filepath")
|
||||
if len(filepath) == 0 {
|
||||
logger.Panicf("BUG: missing `__vm_filepath` label")
|
||||
} else {
|
||||
swPrev[label.Value] = append(swPrev[label.Value], *sw)
|
||||
swPrev[filepath] = append(swPrev[filepath], *sw)
|
||||
}
|
||||
}
|
||||
for i := range sc.FileSDConfigs {
|
||||
var err error
|
||||
dst, err = sc.FileSDConfigs[i].appendScrapeWork(dst, swPrev, baseDir, sc.swc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when parsing `file_sd_config` #%d: %s", i+1, err)
|
||||
var dst []ScrapeWork
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
sc := &cfg.ScrapeConfigs[i]
|
||||
for j := range sc.FileSDConfigs {
|
||||
sdc := &sc.FileSDConfigs[j]
|
||||
dst = sdc.appendScrapeWork(dst, swPrev, cfg.baseDir, sc.swc)
|
||||
}
|
||||
}
|
||||
return dst, nil
|
||||
return dst
|
||||
}
|
||||
|
||||
func (sc *ScrapeConfig) appendStaticScrapeWork(dst []ScrapeWork) ([]ScrapeWork, error) {
|
||||
for i := range sc.StaticConfigs {
|
||||
var err error
|
||||
dst, err = sc.StaticConfigs[i].appendScrapeWork(dst, sc.swc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when parsing `static_config` #%d: %s", i+1, err)
|
||||
// getStaticScrapeWork returns `static_configs` ScrapeWork from from cfg.
|
||||
func (cfg *Config) getStaticScrapeWork() []ScrapeWork {
|
||||
var dst []ScrapeWork
|
||||
for i := range cfg.ScrapeConfigs {
|
||||
sc := &cfg.ScrapeConfigs[i]
|
||||
for j := range sc.StaticConfigs {
|
||||
stc := &sc.StaticConfigs[j]
|
||||
dst = stc.appendScrapeWork(dst, sc.swc, nil)
|
||||
}
|
||||
}
|
||||
return dst, nil
|
||||
return dst
|
||||
}
|
||||
|
||||
func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConfig) (*scrapeWorkConfig, error) {
|
||||
@@ -245,79 +284,10 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
|
||||
return nil, fmt.Errorf("unexpected `scheme` for `job_name` %q: %q; supported values: http or https", jobName, scheme)
|
||||
}
|
||||
params := sc.Params
|
||||
var authorization string
|
||||
if sc.BasicAuth != nil {
|
||||
if sc.BasicAuth.Username == "" {
|
||||
return nil, fmt.Errorf("missing `username` in `basic_auth` section for `job_name` %q", jobName)
|
||||
}
|
||||
username := sc.BasicAuth.Username
|
||||
password := sc.BasicAuth.Password
|
||||
if sc.BasicAuth.PasswordFile != "" {
|
||||
if sc.BasicAuth.Password != "" {
|
||||
return nil, fmt.Errorf("both `password`=%q and `password_file`=%q are set in `basic_auth` section for `job_name` %q",
|
||||
sc.BasicAuth.Password, sc.BasicAuth.PasswordFile, jobName)
|
||||
}
|
||||
path := getFilepath(baseDir, sc.BasicAuth.PasswordFile)
|
||||
pass, err := readPasswordFromFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read password from `password_file`=%q set in `basic_auth` section for `job_name` %q: %s",
|
||||
sc.BasicAuth.PasswordFile, jobName, err)
|
||||
}
|
||||
password = pass
|
||||
}
|
||||
// See https://en.wikipedia.org/wiki/Basic_access_authentication
|
||||
token := username + ":" + password
|
||||
token64 := base64.StdEncoding.EncodeToString([]byte(token))
|
||||
authorization = "Basic " + token64
|
||||
ac, err := promauth.NewConfig(baseDir, sc.BasicAuth, sc.BearerToken, sc.BearerTokenFile, sc.TLSConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse auth config for `job_name` %q: %s", jobName, err)
|
||||
}
|
||||
bearerToken := sc.BearerToken
|
||||
if sc.BearerTokenFile != "" {
|
||||
if sc.BearerToken != "" {
|
||||
return nil, fmt.Errorf("both `bearer_token`=%q and `bearer_token_file`=%q are set for `job_name` %q", sc.BearerToken, sc.BearerTokenFile, jobName)
|
||||
}
|
||||
path := getFilepath(baseDir, sc.BearerTokenFile)
|
||||
token, err := readPasswordFromFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read bearer token from `bearer_token_file`=%q for `job_name` %q: %s", sc.BearerTokenFile, jobName, err)
|
||||
}
|
||||
bearerToken = token
|
||||
}
|
||||
if bearerToken != "" {
|
||||
if authorization != "" {
|
||||
return nil, fmt.Errorf("cannot use both `basic_auth` and `bearer_token` for `job_name` %q", jobName)
|
||||
}
|
||||
authorization = "Bearer " + bearerToken
|
||||
}
|
||||
var tlsRootCA *x509.CertPool
|
||||
var tlsCertificate *tls.Certificate
|
||||
tlsServerName := ""
|
||||
tlsInsecureSkipVerify := false
|
||||
if sc.TLSConfig != nil {
|
||||
tlsServerName = sc.TLSConfig.ServerName
|
||||
tlsInsecureSkipVerify = sc.TLSConfig.InsecureSkipVerify
|
||||
if sc.TLSConfig.CertFile != "" || sc.TLSConfig.KeyFile != "" {
|
||||
certPath := getFilepath(baseDir, sc.TLSConfig.CertFile)
|
||||
keyPath := getFilepath(baseDir, sc.TLSConfig.KeyFile)
|
||||
cert, err := tls.LoadX509KeyPair(certPath, keyPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load TLS certificate for `job_name` %q from `cert_file`=%q, `key_file`=%q: %s",
|
||||
jobName, sc.TLSConfig.CertFile, sc.TLSConfig.KeyFile, err)
|
||||
}
|
||||
tlsCertificate = &cert
|
||||
}
|
||||
if sc.TLSConfig.CAFile != "" {
|
||||
path := getFilepath(baseDir, sc.TLSConfig.CAFile)
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read `ca_file` %q for `job_name` %q: %s", sc.TLSConfig.CAFile, jobName, err)
|
||||
}
|
||||
tlsRootCA = x509.NewCertPool()
|
||||
if !tlsRootCA.AppendCertsFromPEM(data) {
|
||||
return nil, fmt.Errorf("cannot parse data from `ca_file` %q for `job_name` %q", sc.TLSConfig.CAFile, jobName)
|
||||
}
|
||||
}
|
||||
}
|
||||
var err error
|
||||
var relabelConfigs []promrelabel.ParsedRelabelConfig
|
||||
relabelConfigs, err = promrelabel.ParseRelabelConfigs(relabelConfigs[:0], sc.RelabelConfigs)
|
||||
if err != nil {
|
||||
@@ -328,51 +298,86 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `metric_relabel_configs` for `job_name` %q: %s", jobName, err)
|
||||
}
|
||||
scrapeLimit := sc.ScrapeLimit
|
||||
swc := &scrapeWorkConfig{
|
||||
scrapeInterval: scrapeInterval,
|
||||
scrapeTimeout: scrapeTimeout,
|
||||
jobName: jobName,
|
||||
metricsPath: metricsPath,
|
||||
scheme: scheme,
|
||||
params: params,
|
||||
authorization: authorization,
|
||||
honorLabels: honorLabels,
|
||||
honorTimestamps: honorTimestamps,
|
||||
externalLabels: globalCfg.ExternalLabels,
|
||||
tlsRootCA: tlsRootCA,
|
||||
tlsCertificate: tlsCertificate,
|
||||
tlsServerName: tlsServerName,
|
||||
tlsInsecureSkipVerify: tlsInsecureSkipVerify,
|
||||
relabelConfigs: relabelConfigs,
|
||||
metricRelabelConfigs: metricRelabelConfigs,
|
||||
scrapeLimit: scrapeLimit,
|
||||
scrapeInterval: scrapeInterval,
|
||||
scrapeTimeout: scrapeTimeout,
|
||||
jobName: jobName,
|
||||
metricsPath: metricsPath,
|
||||
scheme: scheme,
|
||||
params: params,
|
||||
authConfig: ac,
|
||||
honorLabels: honorLabels,
|
||||
honorTimestamps: honorTimestamps,
|
||||
externalLabels: globalCfg.ExternalLabels,
|
||||
relabelConfigs: relabelConfigs,
|
||||
metricRelabelConfigs: metricRelabelConfigs,
|
||||
sampleLimit: sc.SampleLimit,
|
||||
}
|
||||
return swc, nil
|
||||
}
|
||||
|
||||
type scrapeWorkConfig struct {
|
||||
scrapeInterval time.Duration
|
||||
scrapeTimeout time.Duration
|
||||
jobName string
|
||||
metricsPath string
|
||||
scheme string
|
||||
params map[string][]string
|
||||
authorization string
|
||||
honorLabels bool
|
||||
honorTimestamps bool
|
||||
externalLabels map[string]string
|
||||
tlsRootCA *x509.CertPool
|
||||
tlsCertificate *tls.Certificate
|
||||
tlsServerName string
|
||||
tlsInsecureSkipVerify bool
|
||||
relabelConfigs []promrelabel.ParsedRelabelConfig
|
||||
metricRelabelConfigs []promrelabel.ParsedRelabelConfig
|
||||
scrapeLimit int
|
||||
metaLabels map[string]string
|
||||
scrapeInterval time.Duration
|
||||
scrapeTimeout time.Duration
|
||||
jobName string
|
||||
metricsPath string
|
||||
scheme string
|
||||
params map[string][]string
|
||||
authConfig *promauth.Config
|
||||
honorLabels bool
|
||||
honorTimestamps bool
|
||||
externalLabels map[string]string
|
||||
relabelConfigs []promrelabel.ParsedRelabelConfig
|
||||
metricRelabelConfigs []promrelabel.ParsedRelabelConfig
|
||||
sampleLimit int
|
||||
}
|
||||
|
||||
func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]ScrapeWork, baseDir string, swc *scrapeWorkConfig) ([]ScrapeWork, error) {
|
||||
func appendKubernetesScrapeWork(dst []ScrapeWork, sdc *kubernetes.SDConfig, baseDir string, swc *scrapeWorkConfig) []ScrapeWork {
|
||||
ac, err := promauth.NewConfig(baseDir, sdc.BasicAuth, sdc.BearerToken, sdc.BearerTokenFile, sdc.TLSConfig)
|
||||
if err != nil {
|
||||
logger.Errorf("cannot parse auth config for `kubernetes_sd_config` for `job_name` %q: %s; skipping it", swc.jobName, err)
|
||||
return dst
|
||||
}
|
||||
targetLabels, err := kubernetes.GetLabels(ac, sdc)
|
||||
if err != nil {
|
||||
logger.Errorf("error when discovering kubernetes nodes for `job_name` %q: %s; skipping it", swc.jobName, err)
|
||||
return dst
|
||||
}
|
||||
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "kubernetes_sd_config")
|
||||
}
|
||||
|
||||
func appendEC2ScrapeWork(dst []ScrapeWork, sdc *ec2.SDConfig, swc *scrapeWorkConfig) []ScrapeWork {
|
||||
targetLabels, err := ec2.GetLabels(sdc)
|
||||
if err != nil {
|
||||
logger.Errorf("error when discovering ec2 nodes for `job_name` %q: %s; skipping it", swc.jobName, err)
|
||||
return dst
|
||||
}
|
||||
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "ec2_sd_config")
|
||||
}
|
||||
|
||||
func appendGCEScrapeWork(dst []ScrapeWork, sdc *gce.SDConfig, swc *scrapeWorkConfig) []ScrapeWork {
|
||||
targetLabels, err := gce.GetLabels(sdc)
|
||||
if err != nil {
|
||||
logger.Errorf("error when discovering gce nodes for `job_name` %q: %s; skippint it", swc.jobName, err)
|
||||
return dst
|
||||
}
|
||||
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "gce_sd_config")
|
||||
}
|
||||
|
||||
func appendScrapeWorkForTargetLabels(dst []ScrapeWork, swc *scrapeWorkConfig, targetLabels []map[string]string, sectionName string) []ScrapeWork {
|
||||
for _, metaLabels := range targetLabels {
|
||||
target := metaLabels["__address__"]
|
||||
var err error
|
||||
dst, err = appendScrapeWork(dst, swc, target, nil, metaLabels)
|
||||
if err != nil {
|
||||
logger.Errorf("error when parsing `%s` target %q for `job_name` %q: %s; skipping it", sectionName, target, swc.jobName, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]ScrapeWork, baseDir string, swc *scrapeWorkConfig) []ScrapeWork {
|
||||
for _, file := range sdc.Files {
|
||||
pathPattern := getFilepath(baseDir, file)
|
||||
paths := []string{pathPattern}
|
||||
@@ -380,7 +385,9 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]
|
||||
var err error
|
||||
paths, err = filepath.Glob(pathPattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid pattern %q in `files` section: %s", file, err)
|
||||
// Do not return this error, since other files may contain valid scrape configs.
|
||||
logger.Errorf("invalid pattern %q in `files` section: %s; skipping it", file, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
for _, path := range paths {
|
||||
@@ -396,7 +403,6 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]
|
||||
}
|
||||
continue
|
||||
}
|
||||
swcCopy := *swc
|
||||
pathShort := path
|
||||
if strings.HasPrefix(pathShort, baseDir) {
|
||||
pathShort = path[len(baseDir):]
|
||||
@@ -404,90 +410,91 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]
|
||||
pathShort = pathShort[1:]
|
||||
}
|
||||
}
|
||||
swcCopy.metaLabels = map[string]string{
|
||||
metaLabels := map[string]string{
|
||||
"__meta_filepath": pathShort,
|
||||
"__vm_filepath": pathShort, // This label is needed for internal promscrape logic
|
||||
}
|
||||
for i := range stcs {
|
||||
dst, err = stcs[i].appendScrapeWork(dst, &swcCopy)
|
||||
if err != nil {
|
||||
// Do not return this error, since other paths may contain valid scrape configs.
|
||||
logger.Errorf("error when parsing `static_config` #%d from %q: %s", i+1, path, err)
|
||||
continue
|
||||
}
|
||||
dst = stcs[i].appendScrapeWork(dst, swc, metaLabels)
|
||||
}
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func (stc *StaticConfig) appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfig, metaLabels map[string]string) []ScrapeWork {
|
||||
for _, target := range stc.Targets {
|
||||
if target == "" {
|
||||
// Do not return this error, since other targets may be valid
|
||||
logger.Errorf("`static_configs` target for `job_name` %q cannot be empty; skipping it", swc.jobName)
|
||||
continue
|
||||
}
|
||||
var err error
|
||||
dst, err = appendScrapeWork(dst, swc, target, stc.Labels, metaLabels)
|
||||
if err != nil {
|
||||
// Do not return this error, since other targets may be valid
|
||||
logger.Errorf("error when parsing `static_configs` target %q for `job_name` %q: %s; skipping it", target, swc.jobName, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfig, target string, extraLabels, metaLabels map[string]string) ([]ScrapeWork, error) {
|
||||
labels := mergeLabels(swc.jobName, swc.scheme, target, swc.metricsPath, extraLabels, swc.externalLabels, metaLabels, swc.params)
|
||||
labels = promrelabel.ApplyRelabelConfigs(labels, 0, swc.relabelConfigs, false)
|
||||
labels = promrelabel.RemoveMetaLabels(labels[:0], labels)
|
||||
if len(labels) == 0 {
|
||||
// Drop target without labels.
|
||||
return dst, nil
|
||||
}
|
||||
// See https://www.robustperception.io/life-of-a-label
|
||||
schemeRelabeled := promrelabel.GetLabelValueByName(labels, "__scheme__")
|
||||
if len(schemeRelabeled) == 0 {
|
||||
schemeRelabeled = "http"
|
||||
}
|
||||
addressRelabeled := promrelabel.GetLabelValueByName(labels, "__address__")
|
||||
if len(addressRelabeled) == 0 {
|
||||
// Drop target without scrape address.
|
||||
return dst, nil
|
||||
}
|
||||
targetRelabeled := addMissingPort(schemeRelabeled, addressRelabeled)
|
||||
if strings.Contains(targetRelabeled, "/") {
|
||||
// Drop target with '/'
|
||||
return dst, nil
|
||||
}
|
||||
metricsPathRelabeled := promrelabel.GetLabelValueByName(labels, "__metrics_path__")
|
||||
if metricsPathRelabeled == "" {
|
||||
metricsPathRelabeled = "/metrics"
|
||||
}
|
||||
paramsRelabeled := getParamsFromLabels(labels, swc.params)
|
||||
optionalQuestion := "?"
|
||||
if len(paramsRelabeled) == 0 || strings.Contains(metricsPathRelabeled, "?") {
|
||||
optionalQuestion = ""
|
||||
}
|
||||
paramsStr := url.Values(paramsRelabeled).Encode()
|
||||
scrapeURL := fmt.Sprintf("%s://%s%s%s%s", schemeRelabeled, targetRelabeled, metricsPathRelabeled, optionalQuestion, paramsStr)
|
||||
if _, err := url.Parse(scrapeURL); err != nil {
|
||||
return dst, fmt.Errorf("invalid url %q for scheme=%q (%q), target=%q (%q), metrics_path=%q (%q) for `job_name` %q: %s",
|
||||
scrapeURL, swc.scheme, schemeRelabeled, target, targetRelabeled, swc.metricsPath, metricsPathRelabeled, swc.jobName, err)
|
||||
}
|
||||
dst = append(dst, ScrapeWork{
|
||||
ID: atomic.AddUint64(&nextScrapeWorkID, 1),
|
||||
ScrapeURL: scrapeURL,
|
||||
ScrapeInterval: swc.scrapeInterval,
|
||||
ScrapeTimeout: swc.scrapeTimeout,
|
||||
HonorLabels: swc.honorLabels,
|
||||
HonorTimestamps: swc.honorTimestamps,
|
||||
Labels: labels,
|
||||
AuthConfig: swc.authConfig,
|
||||
MetricRelabelConfigs: swc.metricRelabelConfigs,
|
||||
SampleLimit: swc.sampleLimit,
|
||||
})
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func (stc *StaticConfig) appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfig) ([]ScrapeWork, error) {
|
||||
for _, target := range stc.Targets {
|
||||
if target == "" {
|
||||
return nil, fmt.Errorf("`static_configs` target for `job_name` %q cannot be empty", swc.jobName)
|
||||
}
|
||||
labels, err := mergeLabels(swc.jobName, swc.scheme, target, swc.metricsPath, stc.Labels, swc.externalLabels, swc.metaLabels, swc.params)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot merge labels for `static_configs` target for `job_name` %q: %s", swc.jobName, err)
|
||||
}
|
||||
labels = promrelabel.ApplyRelabelConfigs(labels, 0, swc.relabelConfigs, false)
|
||||
if len(labels) == 0 {
|
||||
// Drop target without labels.
|
||||
continue
|
||||
}
|
||||
// See https://www.robustperception.io/life-of-a-label
|
||||
schemeRelabeled := ""
|
||||
if schemeLabel := promrelabel.GetLabelByName(labels, "__scheme__"); schemeLabel != nil {
|
||||
schemeRelabeled = schemeLabel.Value
|
||||
}
|
||||
if schemeRelabeled == "" {
|
||||
schemeRelabeled = "http"
|
||||
}
|
||||
addressLabel := promrelabel.GetLabelByName(labels, "__address__")
|
||||
if addressLabel == nil || addressLabel.Name == "" {
|
||||
// Drop target without scrape address.
|
||||
continue
|
||||
}
|
||||
targetRelabeled := addMissingPort(schemeRelabeled, addressLabel.Value)
|
||||
if strings.Contains(targetRelabeled, "/") {
|
||||
// Drop target with '/'
|
||||
continue
|
||||
}
|
||||
metricsPathRelabeled := ""
|
||||
if metricsPathLabel := promrelabel.GetLabelByName(labels, "__metrics_path__"); metricsPathLabel != nil {
|
||||
metricsPathRelabeled = metricsPathLabel.Value
|
||||
}
|
||||
if metricsPathRelabeled == "" {
|
||||
metricsPathRelabeled = "/metrics"
|
||||
}
|
||||
paramsRelabeled := getParamsFromLabels(labels, swc.params)
|
||||
optionalQuestion := "?"
|
||||
if len(paramsRelabeled) == 0 || strings.Contains(metricsPathRelabeled, "?") {
|
||||
optionalQuestion = ""
|
||||
}
|
||||
paramsStr := url.Values(paramsRelabeled).Encode()
|
||||
scrapeURL := fmt.Sprintf("%s://%s%s%s%s", schemeRelabeled, targetRelabeled, metricsPathRelabeled, optionalQuestion, paramsStr)
|
||||
if _, err := url.Parse(scrapeURL); err != nil {
|
||||
return nil, fmt.Errorf("invalid url %q for scheme=%q (%q), target=%q (%q), metrics_path=%q (%q) for `job_name` %q: %s",
|
||||
scrapeURL, swc.scheme, schemeRelabeled, target, targetRelabeled, swc.metricsPath, metricsPathRelabeled, swc.jobName, err)
|
||||
}
|
||||
dst = append(dst, ScrapeWork{
|
||||
ScrapeURL: scrapeURL,
|
||||
ScrapeInterval: swc.scrapeInterval,
|
||||
ScrapeTimeout: swc.scrapeTimeout,
|
||||
HonorLabels: swc.honorLabels,
|
||||
HonorTimestamps: swc.honorTimestamps,
|
||||
Labels: labels,
|
||||
Authorization: swc.authorization,
|
||||
TLSRootCA: swc.tlsRootCA,
|
||||
TLSCertificate: swc.tlsCertificate,
|
||||
TLSServerName: swc.tlsServerName,
|
||||
TLSInsecureSkipVerify: swc.tlsInsecureSkipVerify,
|
||||
MetricRelabelConfigs: swc.metricRelabelConfigs,
|
||||
ScrapeLimit: swc.scrapeLimit,
|
||||
})
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
// Each ScrapeWork has an ID, which is used for locating it when updating its status.
|
||||
var nextScrapeWorkID uint64
|
||||
|
||||
func getParamsFromLabels(labels []prompbmarshal.Label, paramsOrig map[string][]string) map[string][]string {
|
||||
// See https://www.robustperception.io/life-of-a-label
|
||||
@@ -507,7 +514,7 @@ func getParamsFromLabels(labels []prompbmarshal.Label, paramsOrig map[string][]s
|
||||
return m
|
||||
}
|
||||
|
||||
func mergeLabels(job, scheme, target, metricsPath string, labels, externalLabels, metaLabels map[string]string, params map[string][]string) ([]prompbmarshal.Label, error) {
|
||||
func mergeLabels(job, scheme, target, metricsPath string, extraLabels, externalLabels, metaLabels map[string]string, params map[string][]string) []prompbmarshal.Label {
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
||||
m := make(map[string]string)
|
||||
for k, v := range externalLabels {
|
||||
@@ -525,7 +532,7 @@ func mergeLabels(job, scheme, target, metricsPath string, labels, externalLabels
|
||||
v := args[0]
|
||||
m[k] = v
|
||||
}
|
||||
for k, v := range labels {
|
||||
for k, v := range extraLabels {
|
||||
m[k] = v
|
||||
}
|
||||
for k, v := range metaLabels {
|
||||
@@ -538,7 +545,7 @@ func mergeLabels(job, scheme, target, metricsPath string, labels, externalLabels
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
return result, nil
|
||||
return result
|
||||
}
|
||||
|
||||
func getFilepath(baseDir, path string) string {
|
||||
@@ -548,15 +555,6 @@ func getFilepath(baseDir, path string) string {
|
||||
return filepath.Join(baseDir, path)
|
||||
}
|
||||
|
||||
func readPasswordFromFile(path string) (string, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
pass := strings.TrimRightFunc(string(data), unicode.IsSpace)
|
||||
return pass, nil
|
||||
}
|
||||
|
||||
func addMissingPort(scheme, target string) string {
|
||||
if strings.Contains(target, ":") {
|
||||
return target
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
@@ -41,7 +42,7 @@ func TestLoadStaticConfigs(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestLoadConfig(t *testing.T) {
|
||||
cfg, err := loadConfig("testdata/prometheus.yml")
|
||||
cfg, _, err := loadConfig("testdata/prometheus.yml")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
@@ -50,7 +51,7 @@ func TestLoadConfig(t *testing.T) {
|
||||
}
|
||||
|
||||
// Try loading non-existing file
|
||||
cfg, err = loadConfig("testdata/non-existing-file")
|
||||
cfg, _, err = loadConfig("testdata/non-existing-file")
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
@@ -59,7 +60,7 @@ func TestLoadConfig(t *testing.T) {
|
||||
}
|
||||
|
||||
// Try loading invalid file
|
||||
cfg, err = loadConfig("testdata/file_sd_1.yml")
|
||||
cfg, _, err = loadConfig("testdata/file_sd_1.yml")
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
@@ -79,10 +80,7 @@ scrape_configs:
|
||||
if err := cfg.parse([]byte(data), "sss"); err != nil {
|
||||
t.Fatalf("cannot parase data: %s", err)
|
||||
}
|
||||
sws, err := cfg.getFileSDScrapeWork(nil)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot obtain `file_sd_config`: %s", err)
|
||||
}
|
||||
sws := cfg.getFileSDScrapeWork(nil)
|
||||
if !equalStaticConfigForScrapeWorks(sws, sws) {
|
||||
t.Fatalf("unexpected non-equal static configs;\nsws:\n%#v", sws)
|
||||
}
|
||||
@@ -98,10 +96,7 @@ scrape_configs:
|
||||
if err := cfgNew.parse([]byte(dataNew), "sss"); err != nil {
|
||||
t.Fatalf("cannot parse data: %s", err)
|
||||
}
|
||||
swsNew, err := cfgNew.getFileSDScrapeWork(sws)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot obtain `file_sd_config`: %s", err)
|
||||
}
|
||||
swsNew := cfgNew.getFileSDScrapeWork(sws)
|
||||
if equalStaticConfigForScrapeWorks(swsNew, sws) {
|
||||
t.Fatalf("unexpected equal static configs;\nswsNew:\n%#v\nsws:\n%#v", swsNew, sws)
|
||||
}
|
||||
@@ -116,10 +111,7 @@ scrape_configs:
|
||||
if err := cfg.parse([]byte(data), "sss"); err != nil {
|
||||
t.Fatalf("cannot parse data: %s", err)
|
||||
}
|
||||
sws, err = cfg.getFileSDScrapeWork(swsNew)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
sws = cfg.getFileSDScrapeWork(swsNew)
|
||||
if len(sws) != 0 {
|
||||
t.Fatalf("unexpected non-empty sws:\n%#v", sws)
|
||||
}
|
||||
@@ -134,10 +126,7 @@ scrape_configs:
|
||||
if err := cfg.parse([]byte(data), "sss"); err != nil {
|
||||
t.Fatalf("cannot parse data: %s", err)
|
||||
}
|
||||
sws, err = cfg.getFileSDScrapeWork(swsNew)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
sws = cfg.getFileSDScrapeWork(swsNew)
|
||||
if len(sws) != 0 {
|
||||
t.Fatalf("unexpected non-empty sws:\n%#v", sws)
|
||||
}
|
||||
@@ -148,7 +137,7 @@ func getFileSDScrapeWork(data []byte, path string) ([]ScrapeWork, error) {
|
||||
if err := cfg.parse(data, path); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse data: %s", err)
|
||||
}
|
||||
return cfg.getFileSDScrapeWork(nil)
|
||||
return cfg.getFileSDScrapeWork(nil), nil
|
||||
}
|
||||
|
||||
func getStaticScrapeWork(data []byte, path string) ([]ScrapeWork, error) {
|
||||
@@ -156,7 +145,7 @@ func getStaticScrapeWork(data []byte, path string) ([]ScrapeWork, error) {
|
||||
if err := cfg.parse(data, path); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse data: %s", err)
|
||||
}
|
||||
return cfg.getStaticScrapeWork()
|
||||
return cfg.getStaticScrapeWork(), nil
|
||||
}
|
||||
|
||||
func TestGetStaticScrapeWorkFailure(t *testing.T) {
|
||||
@@ -190,22 +179,6 @@ scrape_configs:
|
||||
- targets: ["foo"]
|
||||
`)
|
||||
|
||||
// Empty target
|
||||
f(`
|
||||
scrape_configs:
|
||||
- job_name: x
|
||||
static_configs:
|
||||
- targets: ["foo", ""]
|
||||
`)
|
||||
|
||||
// Invalid url
|
||||
f(`
|
||||
scrape_configs:
|
||||
- job_name: x
|
||||
static_configs:
|
||||
- targets: ["a b"]
|
||||
`)
|
||||
|
||||
// Missing username in `basic_auth`
|
||||
f(`
|
||||
scrape_configs:
|
||||
@@ -400,6 +373,12 @@ scrape_configs:
|
||||
`)
|
||||
}
|
||||
|
||||
func resetScrapeWorkIDs(sws []ScrapeWork) {
|
||||
for i := range sws {
|
||||
sws[i].ID = 0
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetFileSDScrapeWorkSuccess(t *testing.T) {
|
||||
f := func(data string, expectedSws []ScrapeWork) {
|
||||
t.Helper()
|
||||
@@ -407,6 +386,7 @@ func TestGetFileSDScrapeWorkSuccess(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
resetScrapeWorkIDs(sws)
|
||||
if !reflect.DeepEqual(sws, expectedSws) {
|
||||
t.Fatalf("unexpected scrapeWork; got\n%v\nwant\n%v", sws, expectedSws)
|
||||
}
|
||||
@@ -435,10 +415,6 @@ scrape_configs:
|
||||
Name: "__address__",
|
||||
Value: "host1",
|
||||
},
|
||||
{
|
||||
Name: "__meta_filepath",
|
||||
Value: "testdata/file_sd.json",
|
||||
},
|
||||
{
|
||||
Name: "__metrics_path__",
|
||||
Value: "/abc/de",
|
||||
@@ -447,6 +423,10 @@ scrape_configs:
|
||||
Name: "__scheme__",
|
||||
Value: "http",
|
||||
},
|
||||
{
|
||||
Name: "__vm_filepath",
|
||||
Value: "testdata/file_sd.json",
|
||||
},
|
||||
{
|
||||
Name: "job",
|
||||
Value: "foo",
|
||||
@@ -456,6 +436,7 @@ scrape_configs:
|
||||
Value: "rty",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
{
|
||||
ScrapeURL: "http://host2:80/abc/de",
|
||||
@@ -468,10 +449,6 @@ scrape_configs:
|
||||
Name: "__address__",
|
||||
Value: "host2",
|
||||
},
|
||||
{
|
||||
Name: "__meta_filepath",
|
||||
Value: "testdata/file_sd.json",
|
||||
},
|
||||
{
|
||||
Name: "__metrics_path__",
|
||||
Value: "/abc/de",
|
||||
@@ -480,6 +457,10 @@ scrape_configs:
|
||||
Name: "__scheme__",
|
||||
Value: "http",
|
||||
},
|
||||
{
|
||||
Name: "__vm_filepath",
|
||||
Value: "testdata/file_sd.json",
|
||||
},
|
||||
{
|
||||
Name: "job",
|
||||
Value: "foo",
|
||||
@@ -489,6 +470,7 @@ scrape_configs:
|
||||
Value: "rty",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
{
|
||||
ScrapeURL: "http://localhost:9090/abc/de",
|
||||
@@ -501,10 +483,6 @@ scrape_configs:
|
||||
Name: "__address__",
|
||||
Value: "localhost:9090",
|
||||
},
|
||||
{
|
||||
Name: "__meta_filepath",
|
||||
Value: "testdata/file_sd_1.yml",
|
||||
},
|
||||
{
|
||||
Name: "__metrics_path__",
|
||||
Value: "/abc/de",
|
||||
@@ -513,6 +491,10 @@ scrape_configs:
|
||||
Name: "__scheme__",
|
||||
Value: "http",
|
||||
},
|
||||
{
|
||||
Name: "__vm_filepath",
|
||||
Value: "testdata/file_sd_1.yml",
|
||||
},
|
||||
{
|
||||
Name: "job",
|
||||
Value: "foo",
|
||||
@@ -522,6 +504,7 @@ scrape_configs:
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -533,6 +516,7 @@ func TestGetStaticScrapeWorkSuccess(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
resetScrapeWorkIDs(sws)
|
||||
if !reflect.DeepEqual(sws, expectedSws) {
|
||||
t.Fatalf("unexpected scrapeWork; got\n%v\nwant\n%v", sws, expectedSws)
|
||||
}
|
||||
@@ -568,6 +552,7 @@ scrape_configs:
|
||||
Value: "foo",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -612,6 +597,7 @@ scrape_configs:
|
||||
Value: "xxx",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -676,7 +662,9 @@ scrape_configs:
|
||||
Value: "y",
|
||||
},
|
||||
},
|
||||
Authorization: "Bearer xyz",
|
||||
AuthConfig: &promauth.Config{
|
||||
Authorization: "Bearer xyz",
|
||||
},
|
||||
},
|
||||
{
|
||||
ScrapeURL: "https://aaa:443/foo/bar?p=x%26y&p=%3D",
|
||||
@@ -710,7 +698,9 @@ scrape_configs:
|
||||
Value: "y",
|
||||
},
|
||||
},
|
||||
Authorization: "Bearer xyz",
|
||||
AuthConfig: &promauth.Config{
|
||||
Authorization: "Bearer xyz",
|
||||
},
|
||||
},
|
||||
{
|
||||
ScrapeURL: "http://1.2.3.4:80/metrics",
|
||||
@@ -736,9 +726,11 @@ scrape_configs:
|
||||
Value: "qwer",
|
||||
},
|
||||
},
|
||||
Authorization: "Basic dXNlcjpwYXNz",
|
||||
TLSServerName: "foobar",
|
||||
TLSInsecureSkipVerify: true,
|
||||
AuthConfig: &promauth.Config{
|
||||
Authorization: "Basic dXNlcjpwYXNz",
|
||||
TLSServerName: "foobar",
|
||||
TLSInsecureSkipVerify: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -807,6 +799,7 @@ scrape_configs:
|
||||
Value: "http://foo.bar:1234/metrics",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -867,6 +860,7 @@ scrape_configs:
|
||||
Value: "https",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -904,6 +898,7 @@ scrape_configs:
|
||||
Value: "3",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -937,6 +932,7 @@ scrape_configs:
|
||||
Value: "foo",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
MetricRelabelConfigs: []promrelabel.ParsedRelabelConfig{
|
||||
{
|
||||
SourceLabels: []string{"foo"},
|
||||
@@ -980,7 +976,9 @@ scrape_configs:
|
||||
Value: "foo",
|
||||
},
|
||||
},
|
||||
Authorization: "Basic eHl6OnNlY3JldC1wYXNz",
|
||||
AuthConfig: &promauth.Config{
|
||||
Authorization: "Basic eHl6OnNlY3JldC1wYXNz",
|
||||
},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -1012,7 +1010,9 @@ scrape_configs:
|
||||
Value: "foo",
|
||||
},
|
||||
},
|
||||
Authorization: "Bearer secret-pass",
|
||||
AuthConfig: &promauth.Config{
|
||||
Authorization: "Bearer secret-pass",
|
||||
},
|
||||
},
|
||||
})
|
||||
snakeoilCert, err := tls.LoadX509KeyPair("testdata/ssl-cert-snakeoil.pem", "testdata/ssl-cert-snakeoil.key")
|
||||
@@ -1050,7 +1050,9 @@ scrape_configs:
|
||||
Value: "foo",
|
||||
},
|
||||
},
|
||||
TLSCertificate: &snakeoilCert,
|
||||
AuthConfig: &promauth.Config{
|
||||
TLSCertificate: &snakeoilCert,
|
||||
},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -1107,6 +1109,7 @@ scrape_configs:
|
||||
Value: "qwe",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
f(`
|
||||
@@ -1160,6 +1163,7 @@ scrape_configs:
|
||||
Value: "snmp",
|
||||
},
|
||||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
218
lib/promscrape/discovery/ec2/api.go
Normal file
218
lib/promscrape/discovery/ec2/api.go
Normal file
@@ -0,0 +1,218 @@
|
||||
package ec2
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type apiConfig struct {
|
||||
endpoint string
|
||||
region string
|
||||
accessKey string
|
||||
secretKey string
|
||||
filters string
|
||||
port int
|
||||
}
|
||||
|
||||
func getAPIConfig(sdc *SDConfig) (*apiConfig, error) {
|
||||
apiConfigMapLock.Lock()
|
||||
defer apiConfigMapLock.Unlock()
|
||||
|
||||
if !hasAPIConfigMapCleaner {
|
||||
hasAPIConfigMapCleaner = true
|
||||
go apiConfigMapCleaner()
|
||||
}
|
||||
|
||||
e := apiConfigMap[sdc]
|
||||
if e != nil {
|
||||
e.lastAccessTime = time.Now()
|
||||
return e.cfg, nil
|
||||
}
|
||||
cfg, err := newAPIConfig(sdc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
apiConfigMap[sdc] = &apiConfigMapEntry{
|
||||
cfg: cfg,
|
||||
lastAccessTime: time.Now(),
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func apiConfigMapCleaner() {
|
||||
tc := time.NewTicker(15 * time.Minute)
|
||||
for currentTime := range tc.C {
|
||||
apiConfigMapLock.Lock()
|
||||
for k, e := range apiConfigMap {
|
||||
if currentTime.Sub(e.lastAccessTime) > 10*time.Minute {
|
||||
delete(apiConfigMap, k)
|
||||
}
|
||||
}
|
||||
apiConfigMapLock.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
type apiConfigMapEntry struct {
|
||||
cfg *apiConfig
|
||||
lastAccessTime time.Time
|
||||
}
|
||||
|
||||
var (
|
||||
apiConfigMap = make(map[*SDConfig]*apiConfigMapEntry)
|
||||
apiConfigMapLock sync.Mutex
|
||||
hasAPIConfigMapCleaner bool
|
||||
)
|
||||
|
||||
func newAPIConfig(sdc *SDConfig) (*apiConfig, error) {
|
||||
region := sdc.Region
|
||||
if len(region) == 0 {
|
||||
r, err := getDefaultRegion()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot determine default ec2 region; probably, `region` param in `ec2_sd_configs` is missing; the error: %s", err)
|
||||
}
|
||||
region = r
|
||||
}
|
||||
accessKey := sdc.AccessKey
|
||||
if len(accessKey) == 0 {
|
||||
accessKey = os.Getenv("AWS_ACCESS_KEY_ID")
|
||||
if len(accessKey) == 0 {
|
||||
return nil, fmt.Errorf("missing `access_key` in AWS_ACCESS_KEY_ID env var; probably, `access_key` must be set in `ec2_sd_config`?")
|
||||
}
|
||||
}
|
||||
secretKey := sdc.SecretKey
|
||||
if len(secretKey) == 0 {
|
||||
secretKey = os.Getenv("AWS_SECRET_ACCESS_KEY")
|
||||
if len(secretKey) == 0 {
|
||||
return nil, fmt.Errorf("miising `secret_key` in AWS_SECRET_ACCESS_KEY env var; probably, `secret_key` must be set in `ec2_sd_config`?")
|
||||
}
|
||||
}
|
||||
filters := getFiltersQueryString(sdc.Filters)
|
||||
port := 80
|
||||
if sdc.Port != nil {
|
||||
port = *sdc.Port
|
||||
}
|
||||
return &apiConfig{
|
||||
endpoint: sdc.Endpoint,
|
||||
region: region,
|
||||
accessKey: accessKey,
|
||||
secretKey: secretKey,
|
||||
filters: filters,
|
||||
port: port,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getFiltersQueryString(filters []Filter) string {
|
||||
// See how to build filters query string at examples at https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
|
||||
var args []string
|
||||
for i, f := range filters {
|
||||
args = append(args, fmt.Sprintf("Filter.%d.Name=%s", i+1, url.QueryEscape(f.Name)))
|
||||
for j, v := range f.Values {
|
||||
args = append(args, fmt.Sprintf("Filter.%d.Value.%d=%s", i+1, j+1, url.QueryEscape(v)))
|
||||
}
|
||||
}
|
||||
return strings.Join(args, "&")
|
||||
}
|
||||
|
||||
func getDefaultRegion() (string, error) {
|
||||
data, err := getMetadataByPath("dynamic/instance-identity/document")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var id IdentityDocument
|
||||
if err := json.Unmarshal(data, &id); err != nil {
|
||||
return "", fmt.Errorf("cannot parse identity document: %s", err)
|
||||
}
|
||||
return id.Region, nil
|
||||
}
|
||||
|
||||
// IdentityDocument is identity document returned from AWS metadata server.
|
||||
//
|
||||
// See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html
|
||||
type IdentityDocument struct {
|
||||
Region string
|
||||
}
|
||||
|
||||
func getMetadataByPath(apiPath string) ([]byte, error) {
|
||||
// See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html
|
||||
|
||||
// Obtain session token
|
||||
sessionTokenURL := "http://169.254.169.254/latest/api/token"
|
||||
req, err := http.NewRequest("PUT", sessionTokenURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create request for IMDSv2 session token at url %q: %s", sessionTokenURL, err)
|
||||
}
|
||||
req.Header.Set("X-aws-ec2-metadata-token-ttl-seconds", "60")
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot obtain IMDSv2 session token from %q; probably, `region` is missing in `ec2_sd_config`; error: %s", sessionTokenURL, err)
|
||||
}
|
||||
token, err := readResponseBody(resp, sessionTokenURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read IMDSv2 session token from %q; probably, `region` is missing in `ec2_sd_config`; error: %s", sessionTokenURL, err)
|
||||
}
|
||||
|
||||
// Use session token in the request.
|
||||
apiURL := "http://169.254.169.254/latest/" + apiPath
|
||||
req, err = http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create request to %q: %s", apiURL, err)
|
||||
}
|
||||
req.Header.Set("X-aws-ec2-metadata-token", string(token))
|
||||
resp, err = http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot obtain response for %q: %s", apiURL, err)
|
||||
}
|
||||
return readResponseBody(resp, apiURL)
|
||||
}
|
||||
|
||||
func getAPIResponse(cfg *apiConfig, action, nextPageToken string) ([]byte, error) {
|
||||
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/Query-Requests.html
|
||||
endpoint := fmt.Sprintf("https://ec2.%s.amazonaws.com/", cfg.region)
|
||||
if len(cfg.endpoint) > 0 {
|
||||
endpoint = cfg.endpoint
|
||||
// endpoint may contain only hostname. Convert it to proper url then.
|
||||
if !strings.Contains(endpoint, "://") {
|
||||
endpoint = "https://" + endpoint
|
||||
}
|
||||
if !strings.HasSuffix(endpoint, "/") {
|
||||
endpoint += "/"
|
||||
}
|
||||
}
|
||||
apiURL := fmt.Sprintf("%s?Action=%s", endpoint, url.QueryEscape(action))
|
||||
if len(cfg.filters) > 0 {
|
||||
apiURL += "&" + cfg.filters
|
||||
}
|
||||
if len(nextPageToken) > 0 {
|
||||
apiURL += fmt.Sprintf("&NextToken=%s", url.QueryEscape(nextPageToken))
|
||||
}
|
||||
apiURL += "&Version=2013-10-15"
|
||||
req, err := newSignedRequest(apiURL, "ec2", cfg.region, cfg.accessKey, cfg.secretKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create signed request: %s", err)
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot perform http request to %q: %s", apiURL, err)
|
||||
}
|
||||
return readResponseBody(resp, apiURL)
|
||||
}
|
||||
|
||||
func readResponseBody(resp *http.Response, apiURL string) ([]byte, error) {
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read response from %q: %s", apiURL, err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code for %q; got %d; want %d; response body: %q",
|
||||
apiURL, resp.StatusCode, http.StatusOK, data)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
44
lib/promscrape/discovery/ec2/ec2.go
Normal file
44
lib/promscrape/discovery/ec2/ec2.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package ec2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// SDConfig represents service discovery config for ec2.
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config
|
||||
type SDConfig struct {
|
||||
Region string `yaml:"region"`
|
||||
Endpoint string `yaml:"endpoint"`
|
||||
AccessKey string `yaml:"access_key"`
|
||||
SecretKey string `yaml:"secret_key"`
|
||||
Profile string `yaml:"profile"`
|
||||
// TODO: add support for RoleARN
|
||||
// RoleARN string `yaml:"role_arn"`
|
||||
// RefreshInterval time.Duration `yaml:"refresh_interval"`
|
||||
// refresh_interval is obtained from `-promscrape.ec2SDCheckInterval` command-line option.
|
||||
Port *int `yaml:"port"`
|
||||
Filters []Filter `yaml:"filters"`
|
||||
}
|
||||
|
||||
// Filter is ec2 filter.
|
||||
//
|
||||
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
|
||||
// and https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Filter.html
|
||||
type Filter struct {
|
||||
Name string `yaml:"name"`
|
||||
Values []string `yaml:"values"`
|
||||
}
|
||||
|
||||
// GetLabels returns ec2 labels according to sdc.
|
||||
func GetLabels(sdc *SDConfig) ([]map[string]string, error) {
|
||||
cfg, err := getAPIConfig(sdc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get API config: %s", err)
|
||||
}
|
||||
ms, err := getInstancesLabels(cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when fetching instances data from EC2: %s", err)
|
||||
}
|
||||
return ms, nil
|
||||
}
|
||||
177
lib/promscrape/discovery/ec2/instance.go
Normal file
177
lib/promscrape/discovery/ec2/instance.go
Normal file
@@ -0,0 +1,177 @@
|
||||
package ec2
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
|
||||
)
|
||||
|
||||
// getInstancesLabels returns labels for ec2 instances obtained from the given cfg
|
||||
func getInstancesLabels(cfg *apiConfig) ([]map[string]string, error) {
|
||||
rs, err := getReservations(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var ms []map[string]string
|
||||
for _, r := range rs {
|
||||
for _, inst := range r.InstanceSet.Items {
|
||||
ms = inst.appendTargetLabels(ms, r.OwnerID, cfg.port)
|
||||
}
|
||||
}
|
||||
return ms, nil
|
||||
}
|
||||
|
||||
func getReservations(cfg *apiConfig) ([]Reservation, error) {
|
||||
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
|
||||
action := "DescribeInstances"
|
||||
var rs []Reservation
|
||||
pageToken := ""
|
||||
for {
|
||||
data, err := getAPIResponse(cfg, action, pageToken)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot obtain instances: %s", err)
|
||||
}
|
||||
ir, err := parseInstancesResponse(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse instance list: %s", err)
|
||||
}
|
||||
rs = append(rs, ir.ReservationSet.Items...)
|
||||
if len(ir.NextPageToken) == 0 {
|
||||
return rs, nil
|
||||
}
|
||||
pageToken = ir.NextPageToken
|
||||
}
|
||||
}
|
||||
|
||||
// InstancesResponse represents response to https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
|
||||
type InstancesResponse struct {
|
||||
ReservationSet ReservationSet `xml:"reservationSet"`
|
||||
NextPageToken string `xml:"nextToken"`
|
||||
}
|
||||
|
||||
// ReservationSet represetns ReservationSet from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
|
||||
type ReservationSet struct {
|
||||
Items []Reservation `xml:"item"`
|
||||
}
|
||||
|
||||
// Reservation represents Reservation from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Reservation.html
|
||||
type Reservation struct {
|
||||
OwnerID string `xml:"ownerId"`
|
||||
InstanceSet InstanceSet `xml:"instancesSet"`
|
||||
}
|
||||
|
||||
// InstanceSet represents InstanceSet from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Reservation.html
|
||||
type InstanceSet struct {
|
||||
Items []Instance `xml:"item"`
|
||||
}
|
||||
|
||||
// Instance represents Instance from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Instance.html
|
||||
type Instance struct {
|
||||
PrivateIPAddress string `xml:"privateIpAddress"`
|
||||
Architecture string `xml:"architecture"`
|
||||
Placement Placement `xml:"placement"`
|
||||
ID string `xml:"instanceId"`
|
||||
Lifecycle string `xml:"instanceLifecycle"`
|
||||
State InstanceState `xml:"instanceState"`
|
||||
Type string `xml:"instanceType"`
|
||||
Platform string `xml:"platform"`
|
||||
SubnetID string `xml:"subnetId"`
|
||||
PrivateDNSName string `xml:"privateDnsName"`
|
||||
PublicDNSName string `xml:"dnsName"`
|
||||
PublicIPAddress string `xml:"ipAddress"`
|
||||
VPCID string `xml:"vpcId"`
|
||||
NetworkInterfaceSet NetworkInterfaceSet `xml:"networkInterfaceSet"`
|
||||
TagSet TagSet `xml:"tagSet"`
|
||||
}
|
||||
|
||||
// Placement represents Placement from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Placement.html
|
||||
type Placement struct {
|
||||
AvailabilityZone string `xml:"availabilityZone"`
|
||||
}
|
||||
|
||||
// InstanceState represents InstanceState from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceState.html
|
||||
type InstanceState struct {
|
||||
Name string `xml:"name"`
|
||||
}
|
||||
|
||||
// NetworkInterfaceSet represents NetworkInterfaceSet from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Instance.html
|
||||
type NetworkInterfaceSet struct {
|
||||
Items []NetworkInterface `xml:"item"`
|
||||
}
|
||||
|
||||
// NetworkInterface represents NetworkInterface from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceNetworkInterface.html
|
||||
type NetworkInterface struct {
|
||||
SubnetID string `xml:"subnetId"`
|
||||
}
|
||||
|
||||
// TagSet represents TagSet from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Instance.html
|
||||
type TagSet struct {
|
||||
Items []Tag `xml:"item"`
|
||||
}
|
||||
|
||||
// Tag represents Tag from https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Tag.html
|
||||
type Tag struct {
|
||||
Key string `xml:"key"`
|
||||
Value string `xml:"value"`
|
||||
}
|
||||
|
||||
func parseInstancesResponse(data []byte) (*InstancesResponse, error) {
|
||||
var v InstancesResponse
|
||||
if err := xml.Unmarshal(data, &v); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal InstancesResponse from %q: %s", data, err)
|
||||
}
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
func (inst *Instance) appendTargetLabels(ms []map[string]string, ownerID string, port int) []map[string]string {
|
||||
if len(inst.PrivateIPAddress) == 0 {
|
||||
// Cannot scrape instance without private IP address
|
||||
return ms
|
||||
}
|
||||
addr := discoveryutils.JoinHostPort(inst.PrivateIPAddress, port)
|
||||
m := map[string]string{
|
||||
"__address__": addr,
|
||||
"__meta_ec2_architecture": inst.Architecture,
|
||||
"__meta_ec2_availability_zone": inst.Placement.AvailabilityZone,
|
||||
"__meta_ec2_instance_id": inst.ID,
|
||||
"__meta_ec2_instance_lifecycle": inst.Lifecycle,
|
||||
"__meta_ec2_instance_state": inst.State.Name,
|
||||
"__meta_ec2_instance_type": inst.Type,
|
||||
"__meta_ec2_owner_id": ownerID,
|
||||
"__meta_ec2_platform": inst.Platform,
|
||||
"__meta_ec2_primary_subnet_id": inst.SubnetID,
|
||||
"__meta_ec2_private_dns_name": inst.PrivateDNSName,
|
||||
"__meta_ec2_private_ip": inst.PrivateIPAddress,
|
||||
"__meta_ec2_public_dns_name": inst.PublicDNSName,
|
||||
"__meta_ec2_public_ip": inst.PublicIPAddress,
|
||||
"__meta_ec2_vpc_id": inst.VPCID,
|
||||
}
|
||||
if len(inst.VPCID) > 0 {
|
||||
// Deduplicate VPC Subnet IDs maintaining the order of the network interfaces returned by EC2.
|
||||
subnets := make([]string, 0, len(inst.NetworkInterfaceSet.Items))
|
||||
seenSubnets := make(map[string]bool, len(inst.NetworkInterfaceSet.Items))
|
||||
for _, ni := range inst.NetworkInterfaceSet.Items {
|
||||
if len(ni.SubnetID) == 0 {
|
||||
continue
|
||||
}
|
||||
if !seenSubnets[ni.SubnetID] {
|
||||
seenSubnets[ni.SubnetID] = true
|
||||
subnets = append(subnets, ni.SubnetID)
|
||||
}
|
||||
}
|
||||
// We surround the separated list with the separator as well. This way regular expressions
|
||||
// in relabeling rules don't have to consider tag positions.
|
||||
m["__meta_ec2_subnet_id"] = "," + strings.Join(subnets, ",") + ","
|
||||
}
|
||||
for _, t := range inst.TagSet.Items {
|
||||
if len(t.Key) == 0 || len(t.Value) == 0 {
|
||||
continue
|
||||
}
|
||||
name := discoveryutils.SanitizeLabelName(t.Key)
|
||||
m["__meta_ec2_tag_"+name] = t.Value
|
||||
}
|
||||
ms = append(ms, m)
|
||||
return ms
|
||||
}
|
||||
219
lib/promscrape/discovery/ec2/instance_test.go
Normal file
219
lib/promscrape/discovery/ec2/instance_test.go
Normal file
@@ -0,0 +1,219 @@
|
||||
package ec2
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
|
||||
)
|
||||
|
||||
func TestParseInstancesResponse(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<DescribeInstancesResponse xmlns="http://ec2.amazonaws.com/doc/2013-10-15/">
|
||||
<requestId>98667f8e-7fb6-441b-a612-41c6268c6399</requestId>
|
||||
<reservationSet>
|
||||
<item>
|
||||
<reservationId>r-05534f81f74ea7036</reservationId>
|
||||
<ownerId>793614593844</ownerId>
|
||||
<groupSet/>
|
||||
<instancesSet>
|
||||
<item>
|
||||
<instanceId>i-0e730b692d9c15460</instanceId>
|
||||
<imageId>ami-0eb89db7593b5d434</imageId>
|
||||
<instanceState>
|
||||
<code>16</code>
|
||||
<name>running</name>
|
||||
</instanceState>
|
||||
<privateDnsName>ip-172-31-11-152.eu-west-2.compute.internal</privateDnsName>
|
||||
<dnsName>ec2-3-8-232-141.eu-west-2.compute.amazonaws.com</dnsName>
|
||||
<reason/>
|
||||
<keyName>my-laptop</keyName>
|
||||
<amiLaunchIndex>0</amiLaunchIndex>
|
||||
<productCodes/>
|
||||
<instanceType>t2.micro</instanceType>
|
||||
<launchTime>2020-04-27T09:19:26.000Z</launchTime>
|
||||
<placement>
|
||||
<availabilityZone>eu-west-2c</availabilityZone>
|
||||
<groupName/>
|
||||
<tenancy>default</tenancy>
|
||||
</placement>
|
||||
<monitoring>
|
||||
<state>disabled</state>
|
||||
</monitoring>
|
||||
<subnetId>subnet-57044c3e</subnetId>
|
||||
<vpcId>vpc-f1eaad99</vpcId>
|
||||
<privateIpAddress>172.31.11.152</privateIpAddress>
|
||||
<ipAddress>3.8.232.141</ipAddress>
|
||||
<sourceDestCheck>true</sourceDestCheck>
|
||||
<groupSet>
|
||||
<item>
|
||||
<groupId>sg-05d74e4e8551bd020</groupId>
|
||||
<groupName>launch-wizard-1</groupName>
|
||||
</item>
|
||||
</groupSet>
|
||||
<architecture>x86_64</architecture>
|
||||
<rootDeviceType>ebs</rootDeviceType>
|
||||
<rootDeviceName>/dev/sda1</rootDeviceName>
|
||||
<blockDeviceMapping>
|
||||
<item>
|
||||
<deviceName>/dev/sda1</deviceName>
|
||||
<ebs>
|
||||
<volumeId>vol-0153ef24058482522</volumeId>
|
||||
<status>attached</status>
|
||||
<attachTime>2020-04-27T09:19:27.000Z</attachTime>
|
||||
<deleteOnTermination>true</deleteOnTermination>
|
||||
</ebs>
|
||||
</item>
|
||||
</blockDeviceMapping>
|
||||
<virtualizationType>hvm</virtualizationType>
|
||||
<clientToken/>
|
||||
<tagSet>
|
||||
<item>
|
||||
<key>foo</key>
|
||||
<value>bar</value>
|
||||
</item>
|
||||
</tagSet>
|
||||
<hypervisor>xen</hypervisor>
|
||||
<networkInterfaceSet>
|
||||
<item>
|
||||
<networkInterfaceId>eni-01d7b338ea037a60b</networkInterfaceId>
|
||||
<subnetId>subnet-57044c3e</subnetId>
|
||||
<vpcId>vpc-f1eaad99</vpcId>
|
||||
<description/>
|
||||
<ownerId>793614593844</ownerId>
|
||||
<status>in-use</status>
|
||||
<macAddress>02:3b:63:46:13:9a</macAddress>
|
||||
<privateIpAddress>172.31.11.152</privateIpAddress>
|
||||
<privateDnsName>ip-172-31-11-152.eu-west-2.compute.internal</privateDnsName>
|
||||
<sourceDestCheck>true</sourceDestCheck>
|
||||
<groupSet>
|
||||
<item>
|
||||
<groupId>sg-05d74e4e8551bd020</groupId>
|
||||
<groupName>launch-wizard-1</groupName>
|
||||
</item>
|
||||
</groupSet>
|
||||
<attachment>
|
||||
<attachmentId>eni-attach-030cc2cdffe745682</attachmentId>
|
||||
<deviceIndex>0</deviceIndex>
|
||||
<status>attached</status>
|
||||
<attachTime>2020-04-27T09:19:26.000Z</attachTime>
|
||||
<deleteOnTermination>true</deleteOnTermination>
|
||||
</attachment>
|
||||
<association>
|
||||
<publicIp>3.8.232.141</publicIp>
|
||||
<publicDnsName>ec2-3-8-232-141.eu-west-2.compute.amazonaws.com</publicDnsName>
|
||||
<ipOwnerId>amazon</ipOwnerId>
|
||||
</association>
|
||||
<privateIpAddressesSet>
|
||||
<item>
|
||||
<privateIpAddress>172.31.11.152</privateIpAddress>
|
||||
<privateDnsName>ip-172-31-11-152.eu-west-2.compute.internal</privateDnsName>
|
||||
<primary>true</primary>
|
||||
<association>
|
||||
<publicIp>3.8.232.141</publicIp>
|
||||
<publicDnsName>ec2-3-8-232-141.eu-west-2.compute.amazonaws.com</publicDnsName>
|
||||
<ipOwnerId>amazon</ipOwnerId>
|
||||
</association>
|
||||
</item>
|
||||
</privateIpAddressesSet>
|
||||
</item>
|
||||
</networkInterfaceSet>
|
||||
<ebsOptimized>false</ebsOptimized>
|
||||
<instanceLifecycle>spot</instanceLifecycle>
|
||||
<platform>windows</platform>
|
||||
</item>
|
||||
</instancesSet>
|
||||
</item>
|
||||
</reservationSet>
|
||||
</DescribeInstancesResponse>
|
||||
`
|
||||
ir, err := parseInstancesResponse([]byte(data))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when parsing data: %s", err)
|
||||
}
|
||||
irExpected := &InstancesResponse{
|
||||
ReservationSet: ReservationSet{
|
||||
Items: []Reservation{
|
||||
{
|
||||
OwnerID: "793614593844",
|
||||
InstanceSet: InstanceSet{
|
||||
Items: []Instance{
|
||||
{
|
||||
PrivateIPAddress: "172.31.11.152",
|
||||
Architecture: "x86_64",
|
||||
Placement: Placement{
|
||||
AvailabilityZone: "eu-west-2c",
|
||||
},
|
||||
ID: "i-0e730b692d9c15460",
|
||||
Lifecycle: "spot",
|
||||
State: InstanceState{
|
||||
Name: "running",
|
||||
},
|
||||
Type: "t2.micro",
|
||||
Platform: "windows",
|
||||
SubnetID: "subnet-57044c3e",
|
||||
PrivateDNSName: "ip-172-31-11-152.eu-west-2.compute.internal",
|
||||
PublicDNSName: "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com",
|
||||
PublicIPAddress: "3.8.232.141",
|
||||
VPCID: "vpc-f1eaad99",
|
||||
NetworkInterfaceSet: NetworkInterfaceSet{
|
||||
Items: []NetworkInterface{
|
||||
{
|
||||
SubnetID: "subnet-57044c3e",
|
||||
},
|
||||
},
|
||||
},
|
||||
TagSet: TagSet{
|
||||
Items: []Tag{
|
||||
{
|
||||
Key: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(ir, irExpected) {
|
||||
t.Fatalf("unexpected InstancesResponse parsed;\ngot\n%+v\nwant\n%+v", ir, irExpected)
|
||||
}
|
||||
|
||||
rs := ir.ReservationSet.Items[0]
|
||||
ownerID := rs.OwnerID
|
||||
port := 423
|
||||
inst := rs.InstanceSet.Items[0]
|
||||
labelss := inst.appendTargetLabels(nil, ownerID, port)
|
||||
var sortedLabelss [][]prompbmarshal.Label
|
||||
for _, labels := range labelss {
|
||||
sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels))
|
||||
}
|
||||
expectedLabels := [][]prompbmarshal.Label{
|
||||
discoveryutils.GetSortedLabels(map[string]string{
|
||||
"__address__": "172.31.11.152:423",
|
||||
"__meta_ec2_architecture": "x86_64",
|
||||
"__meta_ec2_availability_zone": "eu-west-2c",
|
||||
"__meta_ec2_instance_id": "i-0e730b692d9c15460",
|
||||
"__meta_ec2_instance_lifecycle": "spot",
|
||||
"__meta_ec2_instance_state": "running",
|
||||
"__meta_ec2_instance_type": "t2.micro",
|
||||
"__meta_ec2_owner_id": "793614593844",
|
||||
"__meta_ec2_platform": "windows",
|
||||
"__meta_ec2_primary_subnet_id": "subnet-57044c3e",
|
||||
"__meta_ec2_private_dns_name": "ip-172-31-11-152.eu-west-2.compute.internal",
|
||||
"__meta_ec2_private_ip": "172.31.11.152",
|
||||
"__meta_ec2_public_dns_name": "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com",
|
||||
"__meta_ec2_public_ip": "3.8.232.141",
|
||||
"__meta_ec2_subnet_id": ",subnet-57044c3e,",
|
||||
"__meta_ec2_tag_foo": "bar",
|
||||
"__meta_ec2_vpc_id": "vpc-f1eaad99",
|
||||
}),
|
||||
}
|
||||
if !reflect.DeepEqual(sortedLabelss, expectedLabels) {
|
||||
t.Fatalf("unexpected labels:\ngot\n%v\nwant\n%v", sortedLabelss, expectedLabels)
|
||||
}
|
||||
}
|
||||
99
lib/promscrape/discovery/ec2/sign.go
Normal file
99
lib/promscrape/discovery/ec2/sign.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package ec2
|
||||
|
||||
import (
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// newSignedRequest signed request for apiURL according to aws signature algorithm.
|
||||
//
|
||||
// See the algorithm at https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html
|
||||
func newSignedRequest(apiURL, service, region, accessKey, secretKey string) (*http.Request, error) {
|
||||
t := time.Now().UTC()
|
||||
return newSignedRequestWithTime(apiURL, service, region, accessKey, secretKey, t)
|
||||
}
|
||||
|
||||
func newSignedRequestWithTime(apiURL, service, region, accessKey, secretKey string, t time.Time) (*http.Request, error) {
|
||||
uri, err := url.Parse(apiURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse %q: %s", apiURL, err)
|
||||
}
|
||||
|
||||
// Create canonicalRequest
|
||||
amzdate := t.Format("20060102T150405Z")
|
||||
datestamp := t.Format("20060102")
|
||||
canonicalURL := uri.Path
|
||||
canonicalQS := uri.Query().Encode()
|
||||
canonicalHeaders := fmt.Sprintf("host:%s\nx-amz-date:%s\n", uri.Host, amzdate)
|
||||
signedHeaders := "host;x-amz-date"
|
||||
payloadHash := hashHex("")
|
||||
tmp := []string{
|
||||
"GET",
|
||||
canonicalURL,
|
||||
canonicalQS,
|
||||
canonicalHeaders,
|
||||
signedHeaders,
|
||||
payloadHash,
|
||||
}
|
||||
canonicalRequest := strings.Join(tmp, "\n")
|
||||
|
||||
// Create stringToSign
|
||||
algorithm := "AWS4-HMAC-SHA256"
|
||||
credentialScope := fmt.Sprintf("%s/%s/%s/aws4_request", datestamp, region, service)
|
||||
tmp = []string{
|
||||
algorithm,
|
||||
amzdate,
|
||||
credentialScope,
|
||||
hashHex(canonicalRequest),
|
||||
}
|
||||
stringToSign := strings.Join(tmp, "\n")
|
||||
|
||||
// Calculate the signature
|
||||
signingKey := getSignatureKey(secretKey, datestamp, region, service)
|
||||
signature := hmacHex(signingKey, stringToSign)
|
||||
|
||||
// Calculate autheader
|
||||
authHeader := fmt.Sprintf("%s Credential=%s/%s, SignedHeaders=%s, Signature=%s", algorithm, accessKey, credentialScope, signedHeaders, signature)
|
||||
|
||||
req, err := http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create request from %q: %s", apiURL, err)
|
||||
}
|
||||
req.Header.Set("x-amz-date", amzdate)
|
||||
req.Header.Set("Authorization", authHeader)
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func getSignatureKey(key, datestamp, region, service string) string {
|
||||
kDate := hmacBin("AWS4"+key, datestamp)
|
||||
kRegion := hmacBin(kDate, region)
|
||||
kService := hmacBin(kRegion, service)
|
||||
return hmacBin(kService, "aws4_request")
|
||||
}
|
||||
|
||||
func hashHex(s string) string {
|
||||
h := sha256.Sum256([]byte(s))
|
||||
return hex.EncodeToString(h[:])
|
||||
}
|
||||
|
||||
func hmacHex(key, data string) string {
|
||||
digest := hmacBin(key, data)
|
||||
return hex.EncodeToString([]byte(digest))
|
||||
}
|
||||
|
||||
func hmacBin(key, data string) string {
|
||||
h := hmac.New(sha256.New, []byte(key))
|
||||
_, err := h.Write([]byte(data))
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when writing to hmac: %s", err)
|
||||
}
|
||||
return string(h.Sum(nil))
|
||||
}
|
||||
27
lib/promscrape/discovery/ec2/sign_test.go
Normal file
27
lib/promscrape/discovery/ec2/sign_test.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package ec2
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestNewSignedRequest(t *testing.T) {
|
||||
f := func(apiURL string, authHeaderExpected string) {
|
||||
t.Helper()
|
||||
service := "ec2"
|
||||
region := "us-east-1"
|
||||
accessKey := "fake-access-key"
|
||||
secretKey := "foobar"
|
||||
ct := time.Unix(0, 0).UTC()
|
||||
req, err := newSignedRequestWithTime(apiURL, service, region, accessKey, secretKey, ct)
|
||||
if err != nil {
|
||||
t.Fatalf("error in newSignedRequest: %s", err)
|
||||
}
|
||||
authHeader := req.Header.Get("Authorization")
|
||||
if authHeader != authHeaderExpected {
|
||||
t.Fatalf("unexpected auth header;\ngot\n%s\nwant\n%s", authHeader, authHeaderExpected)
|
||||
}
|
||||
}
|
||||
f("https://ec2.amazonaws.com/?Action=DescribeRegions&Version=2013-10-15",
|
||||
"AWS4-HMAC-SHA256 Credential=fake-access-key/19700101/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-date, Signature=79dc8f54719a4c11edcd5811824a071361b3514172a3f5c903b7e279dfa6a710")
|
||||
}
|
||||
195
lib/promscrape/discovery/gce/api.go
Normal file
195
lib/promscrape/discovery/gce/api.go
Normal file
@@ -0,0 +1,195 @@
|
||||
package gce
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"golang.org/x/oauth2/google"
|
||||
)
|
||||
|
||||
type apiConfig struct {
|
||||
client *http.Client
|
||||
zones []string
|
||||
project string
|
||||
filter string
|
||||
tagSeparator string
|
||||
port int
|
||||
}
|
||||
|
||||
func getAPIConfig(sdc *SDConfig) (*apiConfig, error) {
|
||||
apiConfigMapLock.Lock()
|
||||
defer apiConfigMapLock.Unlock()
|
||||
|
||||
if !hasAPIConfigMapCleaner {
|
||||
hasAPIConfigMapCleaner = true
|
||||
go apiConfigMapCleaner()
|
||||
}
|
||||
|
||||
e := apiConfigMap[sdc]
|
||||
if e != nil {
|
||||
e.lastAccessTime = time.Now()
|
||||
return e.cfg, nil
|
||||
}
|
||||
cfg, err := newAPIConfig(sdc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
apiConfigMap[sdc] = &apiConfigMapEntry{
|
||||
cfg: cfg,
|
||||
lastAccessTime: time.Now(),
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func apiConfigMapCleaner() {
|
||||
tc := time.NewTicker(15 * time.Minute)
|
||||
for currentTime := range tc.C {
|
||||
apiConfigMapLock.Lock()
|
||||
for k, e := range apiConfigMap {
|
||||
if currentTime.Sub(e.lastAccessTime) > 10*time.Minute {
|
||||
delete(apiConfigMap, k)
|
||||
}
|
||||
}
|
||||
apiConfigMapLock.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
type apiConfigMapEntry struct {
|
||||
cfg *apiConfig
|
||||
lastAccessTime time.Time
|
||||
}
|
||||
|
||||
var (
|
||||
apiConfigMap = make(map[*SDConfig]*apiConfigMapEntry)
|
||||
apiConfigMapLock sync.Mutex
|
||||
hasAPIConfigMapCleaner bool
|
||||
)
|
||||
|
||||
func newAPIConfig(sdc *SDConfig) (*apiConfig, error) {
|
||||
ctx := context.Background()
|
||||
client, err := google.DefaultClient(ctx, "https://www.googleapis.com/auth/compute.readonly")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create oauth2 client for gce: %s", err)
|
||||
}
|
||||
project := sdc.Project
|
||||
if len(project) == 0 {
|
||||
proj, err := getCurrentProject()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot determine the current project; make sure `vmagent` runs inside GCE; error: %s", err)
|
||||
}
|
||||
project = proj
|
||||
logger.Infof("autodetected the current GCE project: %q", project)
|
||||
}
|
||||
zones := sdc.Zone.zones
|
||||
if len(zones) == 0 {
|
||||
// Autodetect the current zone.
|
||||
zone, err := getCurrentZone()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot determine the current zone; make sure `vmagent` runs inside GCE; error: %s", err)
|
||||
}
|
||||
zones = append(zones, zone)
|
||||
logger.Infof("autodetected the current GCE zone: %q", zone)
|
||||
} else if len(zones) == 1 && zones[0] == "*" {
|
||||
// Autodetect zones for project.
|
||||
zs, err := getZonesForProject(client, project, sdc.Filter)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot obtain zones for project %q: %s", project, err)
|
||||
}
|
||||
zones = zs
|
||||
logger.Infof("autodetected all the zones for the GCE project %q: %q", project, zones)
|
||||
}
|
||||
tagSeparator := ","
|
||||
if sdc.TagSeparator != nil {
|
||||
tagSeparator = *sdc.TagSeparator
|
||||
}
|
||||
port := 80
|
||||
if sdc.Port != nil {
|
||||
port = *sdc.Port
|
||||
}
|
||||
return &apiConfig{
|
||||
client: client,
|
||||
zones: zones,
|
||||
project: project,
|
||||
filter: sdc.Filter,
|
||||
tagSeparator: tagSeparator,
|
||||
port: port,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getAPIResponse(client *http.Client, apiURL, filter, pageToken string) ([]byte, error) {
|
||||
apiURL = appendNonEmptyQueryArg(apiURL, "filter", filter)
|
||||
apiURL = appendNonEmptyQueryArg(apiURL, "pageToken", pageToken)
|
||||
resp, err := client.Get(apiURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot query %q: %s", apiURL, err)
|
||||
}
|
||||
return readResponseBody(resp, apiURL)
|
||||
}
|
||||
|
||||
func readResponseBody(resp *http.Response, apiURL string) ([]byte, error) {
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read response from %q: %s", apiURL, err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code for %q; got %d; want %d; response body: %q",
|
||||
apiURL, resp.StatusCode, http.StatusOK, data)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func appendNonEmptyQueryArg(apiURL, argName, argValue string) string {
|
||||
if len(argValue) == 0 {
|
||||
return apiURL
|
||||
}
|
||||
prefix := "?"
|
||||
if strings.Contains(apiURL, "?") {
|
||||
prefix = "&"
|
||||
}
|
||||
return apiURL + fmt.Sprintf("%s%s=%s", prefix, url.QueryEscape(argName), url.QueryEscape(argValue))
|
||||
}
|
||||
|
||||
func getCurrentZone() (string, error) {
|
||||
// See https://cloud.google.com/compute/docs/storing-retrieving-metadata#default
|
||||
data, err := getGCEMetadata("instance/zone")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
parts := strings.Split(string(data), "/")
|
||||
if len(parts) != 4 {
|
||||
return "", fmt.Errorf("unexpected data returned from GCE; it must contain something like `projects/projectnum/zones/zone`; data: %q", data)
|
||||
}
|
||||
return parts[3], nil
|
||||
}
|
||||
|
||||
func getCurrentProject() (string, error) {
|
||||
// See https://cloud.google.com/compute/docs/storing-retrieving-metadata#default
|
||||
data, err := getGCEMetadata("project/project-id")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
func getGCEMetadata(path string) ([]byte, error) {
|
||||
// See https://cloud.google.com/compute/docs/storing-retrieving-metadata#default
|
||||
metadataURL := "http://metadata.google.internal/computeMetadata/v1/" + path
|
||||
req, err := http.NewRequest("GET", metadataURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create http request for %q: %s", metadataURL, err)
|
||||
}
|
||||
req.Header.Set("Metadata-Flavor", "Google")
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot obtain response to %q: %s", metadataURL, err)
|
||||
}
|
||||
return readResponseBody(resp, metadataURL)
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user