Compare commits

...

152 Commits

Author SHA1 Message Date
Roman Khavronenko
3bfa41a95c app/vmalert: initial remote-write support for alerts state persistence. (#442)
* app/vmalert: initial remote-write support for alerts state persistence.

If `remotewrite.url` flag is set, vmalert will send alerts state  via remote-write protocol to remote storage. The sending is asynchronous to avoid blocking calls in rules evaluation loop.

* app/vmalert: merge with master

* app/vmalert: write both `instant` and `for` alerts timeseries states in remote storage.
2020-04-28 00:18:02 +03:00
Aliaksandr Valialkin
90670cb55e app/vmalert: include it into the next release 2020-04-28 00:10:12 +03:00
Aliaksandr Valialkin
303905cd84 lib/{encoding,decimal}: typo fixes in tests: epxecting->expecting 2020-04-28 00:01:55 +03:00
Aliaksandr Valialkin
36fa3078c2 lib/encoding: reduce possibility of failure in TestMarshalInt64ArraySize 2020-04-28 00:01:54 +03:00
Aliaksandr Valialkin
95942f1ac6 lib/promscrape/discovery/gce: make golangci-lint happy 2020-04-27 19:28:10 +03:00
Aliaksandr Valialkin
b768bc9a6a lib/promscrape: add initial support for Prometheus-compatible service discovery for Amazon EC2 aka ec2_sd_configs 2020-04-27 19:25:53 +03:00
Aliaksandr Valialkin
de59703a16 lib/promscrape/discovery/gce: properly set filter query arg in api url 2020-04-27 16:01:17 +03:00
Aliaksandr Valialkin
b4afe562c1 lib/storage: postpone reading data from blocks during search
This eliminates the need for storing block data into temporary files on a single-node VictoriaMetrics
during heavy queries, which touch big number of time series over long time ranges.

This improves single-node VM performance on heavy queries by up to 2x.
2020-04-27 11:45:24 +03:00
Aliaksandr Valialkin
0224071ebe lib/promscrape/discovery/gce: allow empty project and zone for gce_sd_config 2020-04-27 11:45:02 +03:00
Aliaksandr Valialkin
fcf57f9883 app/vmselect/netstorage: substitute sorting packedTimeseries with the natural order of the fetched blocks
This should minimize the number of disk seeks when reading data from temporary file.
2020-04-26 16:26:23 +03:00
Aliaksandr Valialkin
6954d0edb7 lib/promscrape/discovery/gce: allow empty zone arg in gce_sd_config - in this case zones for the given project are automatically discovered 2020-04-26 14:34:11 +03:00
kreedom
fb967ae6c8 happy fmt 2020-04-26 14:16:32 +03:00
kreedom
2c18548e08 alert - rename validate function and flags (#440)
* alert - rename validate function and flags
2020-04-26 14:15:04 +03:00
kreedom
5f61d43db9 vmalert - validate template in labels (#439) 2020-04-26 13:53:57 +03:00
肖贝贝
eeadfccdc5 fix: fix vmalert template label not complete bug (#435)
Co-authored-by: xiaobeibei <xiaobeibei@bigo.sg>
2020-04-26 13:30:10 +03:00
Aliaksandr Valialkin
d7c1ff8b0c lib/storage: improve deduplication algorithm
Now it leaves only the first data point on each `-dedup.minScrapeInterval` interval.

Previously it may leave two data points on the interval. This could lead to unexpected results
for `histogram_quantile(phi, sum(rate(buckets)) by (le))` query.
2020-04-26 13:10:02 +03:00
Aliaksandr Valialkin
1f3fd93b58 docs/{vmbackup,vmrestore}.md: update -help output 2020-04-24 22:44:21 +03:00
Jason Gardner
66af7e40f3 app/vmbackup: added ability to create and delete snapshots during backup (#428)
* app/vmbackup: added ability to create and delete snapshots during backup

Resolves: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/422

* Add snapshot create and delete url flags

* Fixed errcheck warnings in build
2020-04-24 22:35:03 +03:00
Aliaksandr Valialkin
491b31b369 lib/storage: postpone label filters matching too many time series instead of giving up with error
This should reduce the frequency of the following errors:

    cannot find tag filter matching less than N time series; either increase -search.maxUniqueTimeseries or use more specific tag filters

    more than N time series found on the time range [...]; either increase -search.maxUniqueTimeseries or shrink the time range
2020-04-24 21:13:50 +03:00
Aliaksandr Valialkin
4b84c592e9 docs/Single-server-VictoriaMetrics.md: document -search.resetCacheAuthKey 2020-04-24 19:47:52 +03:00
Aliaksandr Valialkin
a596aec82c app/vmselect: fix description for -search.resetCacheAuthKey 2020-04-24 19:45:50 +03:00
Aliaksandr Valialkin
7b8008e0bd lib/promscrape/discovery/gce: make golint happy by ignoring resp.Body.Close() result 2020-04-24 18:13:09 +03:00
Aliaksandr Valialkin
6d3567d65c .github/workflows: install dependencies before code checkout
Othwerise dependencies' install mangles go.mod
2020-04-24 17:55:17 +03:00
Aliaksandr Valialkin
9ef5935552 lib/promscrape: initial implementation for gce_sd_configs aga Prometheus-compatible service discovery for Google Compute Engine 2020-04-24 17:51:22 +03:00
Aliaksandr Valialkin
b80e6b4d56 .github/workflows: enable Go modules when installing dependencies
Disabled Go modules broke golangci-lint build
2020-04-24 17:39:58 +03:00
Aliaksandr Valialkin
5f9c23226a docs/Single-server-VictoriaMetrics.md: mention that -search.maxStalenessInterval can be useful for InfluxDB and TimescaleDB users 2020-04-24 16:22:50 +03:00
Aliaksandr Valialkin
ac43075cc9 .github/workflows: install golangci-lint at Dependencies step 2020-04-24 15:37:35 +03:00
Aliaksandr Valialkin
3157fb0186 .github/workflows: update Go version in actions/setup-go from v1.13 to v1.14 2020-04-24 15:31:16 +03:00
Aliaksandr Valialkin
e48822942d vendor: make vendor-update 2020-04-24 15:27:45 +03:00
Aliaksandr Valialkin
77bea69fab .github/workflows: use master branch for 'actions/setup-go' and 'actions/checkout' 2020-04-24 14:41:21 +03:00
Aliaksandr Valialkin
24461153bf lib/promscrape: query /api/v1/namespaces/* for the configured namespaces in kubernetes_sd_config
This should fix authroization issues described at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/432
2020-04-24 14:33:50 +03:00
Aliaksandr Valialkin
00e897119f lib/promscrape: add -promscrape.configCheckInterval command-line flag for automating config checking
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/431
2020-04-23 23:41:08 +03:00
Aliaksandr Valialkin
a9a7a7175e lib/promscrape: access Config entries by reference, so they can be compared by addresses 2020-04-23 14:38:20 +03:00
Aliaksandr Valialkin
a9b83bf512 vendor: update google.golang.org/api from v0.21.0 to v0.22.0 2020-04-23 14:30:46 +03:00
Aliaksandr Valialkin
a87ca3bdf0 vendor: update github.com/aws/aws-sdk-go from v1.30.8 to v1.30.12 2020-04-23 12:36:03 +03:00
Aliaksandr Valialkin
1c5d14a2eb lib/promscrape: move KubernetesSDConfig to lib/promscrape/discovery/kubernetes 2020-04-23 11:34:22 +03:00
Aliaksandr Valialkin
a714568374 lib/promscrape/discovery/kubernetes: hide role switch logic behind GetLabels function 2020-04-22 22:16:11 +03:00
Aliaksandr Valialkin
364db13c9c app/vmselect: add /api/v1/status/tsdb page with useful stats for locating root cause for high cardinality issues
See https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/425
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/268
2020-04-22 22:03:43 +03:00
Aliaksandr Valialkin
01e33be34a vendor: update github.com/valyala/fastjson from v1.5.0 to v1.5.1 2020-04-21 00:03:56 +03:00
Aliaksandr Valialkin
78ff5f2aa5 vendor: update github.com/valyala/gozstd from v1.6.4 to v1.7.0 2020-04-20 23:03:40 +03:00
Aliaksandr Valialkin
2dc5593b75 lib/writeconcurrencylimiter: improve docs for -maxConcurrentInserts command-line flag 2020-04-20 21:03:00 +03:00
Aliaksandr Valialkin
9ebc937685 app/vmselect: add -search.minStalenessInterval command-line flag for removing gaps on graphs built from time series with irregular duration between samples
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/426
2020-04-20 19:42:15 +03:00
Aliaksandr Valialkin
fe57d46687 app/vmselect: merge -search.maxLookback and -search.maxStalenessInterval flags, since it has been appeared they have identical purpose :(
Leave both flags for backwards compatibility reasons.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/209
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/426
2020-04-20 19:26:31 +03:00
Aliaksandr Valialkin
6cc6ec6d2e deployment/docker/docker-compose.yml: bump Prometheus from v2.17.1 to v2.17.2 and Grafana from v6.7.1 to v6.7.2 2020-04-20 17:29:20 +03:00
Aliaksandr Valialkin
5454b518a6 lib/promscrape/discovery/kubernetes: reuse a client for empty api_server inside different jobs 2020-04-20 17:07:11 +03:00
Aliaksandr Valialkin
5ecb50d7c2 docs/Single-server-VictoriaMetrics.md: mention about vmagent in the end of Prometheus setup section 2020-04-20 16:41:36 +03:00
Aliaksandr Valialkin
851946af1e deployment/docker: allow building docker images on top of any base image set via ROOT_IMAGE environment var
For example, the following command will build VictoriaMetrics docker image on top of alpine image:

    ROOT_IMAGE=alpine make package-victoria-metrics
2020-04-20 01:16:57 +03:00
Aliaksandr Valialkin
2de76bca96 deployment/docker/base: remove unused group and passwd files 2020-04-19 23:31:31 +03:00
Aliaksandr Valialkin
94ad531bfe Makefile: increase the timeout for make golangci-lint from 1 minute to 2 minutes
This should fix timeout errors on GitHub actions
2020-04-17 19:14:04 +03:00
Aliaksandr Valialkin
936fb0eac3 app/vmagent/remotewrite: retry sending data if the server closes keep-alive connection
This should fix the following error when sending data to remote storage:

couldn't send a block with size XX bytes to "YYY": the server closed connection before returning the first response byte. Make sure the server returns 'Connection: close' response header before closing the connection
2020-04-17 15:52:42 +03:00
Aliaksandr Valialkin
43375df923 lib/promscrape/discovery/kubernetes: update stale comments 2020-04-17 14:06:20 +03:00
Aliaksandr Valialkin
43bbffebb3 vendor: make vendor-update 2020-04-17 13:24:08 +03:00
Aliaksandr Valialkin
79fb595732 docs/vmagent.md: typo fix: unvailable -> unavailable 2020-04-17 13:11:31 +03:00
Aliaksandr Valialkin
546d26523c app/vmagent/README.md: mention about prodmscrape.suppressScrapeErrors 2020-04-17 13:08:21 +03:00
Aliaksandr Valialkin
f41e6a7bd9 app/vmselect: properly apply -search.maxLookback to queries sent to /api/v1/query 2020-04-17 12:30:11 +03:00
Dmitry Shihovtsev
830538e290 Fix misspelled Cortex name in the FAQ (#421) 2020-04-17 08:36:12 +01:00
Aliaksandr Valialkin
5d1537a395 lib/promscrape: suppress scrape errors if -promscrape.suppressScrapeErrors flag is set 2020-04-16 23:41:30 +03:00
Aliaksandr Valialkin
600490131f lib/promscrape: print all the labels for the target on error message for failed scrape
This should improve debuggability.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/420
2020-04-16 23:35:05 +03:00
Aliaksandr Valialkin
bd4c6d21dd lib/promscrape: retry target scraping when the target closes previously established keep-alive connection to it
This should fix the following error:

the server closed connection before returning the first response byte. Make sure the server returns 'Connection: close' response header before closing the connection
2020-04-16 23:25:29 +03:00
Aliaksandr Valialkin
95da8d410c docs/Single-server-VictoriaMetrics.md: mention that VictoriaMetrics supports Kubernetes service discovery 2020-04-16 18:40:11 +03:00
Aliaksandr Valialkin
bcec5c5429 docs/Single-server-VictoriaMetrics.md: typo fix: unneded -> unneeded 2020-04-16 17:35:08 +03:00
Aliaksandr Valialkin
467279acd2 docs/Single-server-VictoriaMetrics.md: imrpove docs about metrics deletion 2020-04-16 17:32:09 +03:00
Aliaksandr Valialkin
e0d213f82b docs/Single-server-VictoriaMetrics.md: mention that the delete API can be protected by authKey 2020-04-16 17:19:10 +03:00
Aliaksandr Valialkin
2fd2dec5eb lib/logger: typo fix 2020-04-16 00:19:10 +03:00
Aliaksandr Valialkin
071fdf5518 lib/logger: add WARN level for logging expected errors such as invalid user queries 2020-04-15 20:50:26 +03:00
Aliaksandr Valialkin
30b401ebbf docs/Single-server-VictoriaMetrics.md: typo fix 2020-04-15 15:21:58 +03:00
Aliaksandr Valialkin
a59a7bcc5e vendor: make vendor-update 2020-04-15 14:52:24 +03:00
Aliaksandr Valialkin
ccb887c0f6 docs/Single-server-VictoriaMetrics.md: clarify how to use -influxListenAddr command-line option 2020-04-15 12:33:42 +03:00
Aliaksandr Valialkin
6f7f64f757 app/vmselect: handle timestamp(metric offset X) the same way as Prometheus does
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415
2020-04-15 12:01:00 +03:00
Aliaksandr Valialkin
426a0567c4 lib/promscrape: code cleanup in runScraper func 2020-04-15 11:36:24 +03:00
Aliaksandr Valialkin
6e2f6574b8 docs/Single-server-VictoriaMetrics.md: mention that backfilling can be done via any supported ingestion method 2020-04-15 10:56:53 +03:00
Aliaksandr Valialkin
c1de3f67b4 lib/storage: skip metricID if the corresponding metricID->metricName is missing in inverted index during search
This case is possible when the corresponding metricID->metricName entry didn't propagate to inverted index yet.

This should fix the following error:

error when searching tsids for tfss [...]: cannot find metricName by metricID 1582417212213420669: EOF
2020-04-15 00:06:43 +03:00
Aliaksandr Valialkin
8a25c1ed71 docs/Single-server-VictoriaMetrics.md: add https://github.com/Slapper/ansible-victoriametrics-cluster-role to integrations chapter 2020-04-14 16:27:20 +03:00
Aliaksandr Valialkin
067c7afebc lib/promscrape: show information on improperly configured scrape targets at the bottom of /targets page
This is a common error whith improperly configured target autodiscovery and/or relabeling.
This error leads to duplicate scraping of the same targets with the same set of labels, which leads
to duplicate samples in time series.
2020-04-14 14:55:05 +03:00
Aliaksandr Valialkin
ac35635b71 lib/promscrape/discovery/kubernetes: remove only unused client for API server during cleaning 2020-04-14 14:19:21 +03:00
Aliaksandr Valialkin
78863d7066 lib/promscrape: add promrelabel.GetLabelValueByName helper function 2020-04-14 14:12:01 +03:00
Aliaksandr Valialkin
c64f003cfb lib/promscrape: mention job name in error messages when target cannot be scraped
This should improve debuggability
2020-04-14 13:33:13 +03:00
Aliaksandr Valialkin
4718a5d951 lib/promscrape: reset ScrapeWork.ID in tests 2020-04-14 13:31:31 +03:00
Aliaksandr Valialkin
257521a634 lib/promscrape: properly expose statuses for targets with duplicate scrape urls at /targets page
Previously targets with duplicate scrape urls were merged into a single line on the page.
Now each target with duplicate scrape url is displayed on a separate line.
2020-04-14 13:10:01 +03:00
Aliaksandr Valialkin
6a75c95194 lib/promscrape: remove labels starting with __meta_ after applying relabel_configs as Prometheus does
This should reduce CPU load during scraping when target discovery generates
big number of `__meta_*` labels (for instance, k8s discovery).

See https://www.robustperception.io/life-of-a-label for details.
2020-04-14 12:23:22 +03:00
Aliaksandr Valialkin
01d7d799dc lib/promscrape: rename 'scrape_config->scrape_limit' to 'scrape_config->sample_limit'
`scrape_config` block from Prometheus config contains `sample_limit` field,
while in `vmagent` this field was mistakenly named as `scrape_limit`.
2020-04-14 11:59:57 +03:00
Aliaksandr Valialkin
0b76c27fa1 docs/vmagent.md: mention that vmagent supports kubernetes_sd_configs now 2020-04-13 21:06:36 +03:00
Aliaksandr Valialkin
2e4e202c2b lib/promscrape: add initial support for kubernetes_sd_config
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/334
2020-04-13 21:03:28 +03:00
Aliaksandr Valialkin
2814b1490f lib/promscrape: add -promscrape.config.strictParse flag for detecting errors in -promscrape.config file 2020-04-13 13:15:44 +03:00
Aliaksandr Valialkin
90b4a6dd12 lib/promscrape: extract common auth code to lib/promauth 2020-04-13 12:59:10 +03:00
hagen1778
2eed6c393f vmalert: prepare package for external usage
* update README according to changes
* add Makefile with basic commands
2020-04-12 15:32:42 +03:00
kreedom
948f8b6b5f [vmalert] fix linter issues 2020-04-12 15:08:11 +03:00
kreedom
8fca5f2819 [vmalert] add tests to webserver (#413) 2020-04-12 14:51:03 +03:00
Roman Khavronenko
7c9405f53d Vmalert metrics (#412)
vmalert: add basic list of metrics
2020-04-11 20:42:01 +01:00
Roman Khavronenko
9f8cc8ae1b Extend web responses for alerts: (#411)
vmalert: Extend web responses for alerts

* populate apiAlert object with additional fields
* return all active alerts, not only firing
* sort list of API alerts for deterministic output
* add helper for available path list
2020-04-11 16:49:23 +01:00
kreedom
90de3086b3 [vmalert] add webserver (#410)
* [vmalert] add webserver
2020-04-11 12:40:24 +03:00
Aliaksandr Valialkin
830d5fb1e0 vendor: make vendor-update 2020-04-10 18:40:21 +03:00
Aliaksandr Valialkin
66d8086a5e vendor: update github.com/klauspost/compress from v1.10.3 to v1.10.4 2020-04-10 18:39:19 +03:00
Aliaksandr Valialkin
a30c98c0bc deployment/docker: update Go builder image from go1.14.1 to go1.14.2 2020-04-10 18:19:34 +03:00
Aliaksandr Valialkin
4de6c6bbf0 lib/storage: disable deduplication after dedup tests are complete
The rest of tests expect that the de-duplication is disabled.
2020-04-10 17:28:31 +03:00
Aliaksandr Valialkin
ded0c0d3c7 lib/storage: correctly handle -dedup.minScrapeInterval values smaller than 8ms
Such small values may be used for removing samples with duplicate timestamps.
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/409 for details.
2020-04-10 16:36:41 +03:00
Aliaksandr Valialkin
7d73623c69 lib/{storage,mergeset}: make sure that requests and misses cache counters never go down 2020-04-10 14:45:01 +03:00
Aliaksandr Valialkin
e62afc7366 lib/protoparser: add -*TrimTimstamp command-line flags for Influx, Graphite, OpenTSDB and CSV data
These flags can be used for reducing disk space usage for timestamps data ingested over the given protocols
2020-04-10 12:44:39 +03:00
Aliaksandr Valialkin
0681b4c27a lib/workingsetcache: accumulate stat counters on cache rotation
This should prevent from cache stats counters going down after cache rotation,
which may corrupt `cache hit ratio` graph on the official Grafan dasbhoards
when using the following query:

    1 - (sum(rate(vm_cache_misses_total[5m])) by (type) / sum(rate(vm_cache_requests_total[5m])) by (type))
2020-04-10 11:51:40 +03:00
Aliaksandr Valialkin
f86947d55c lib/memory: add more details to -memory.allowedPercent help message 2020-04-09 15:28:53 +03:00
Aliaksandr Valialkin
f94a090020 docs: update minimum supported Go version from 1.12 to 1.13 2020-04-07 13:38:37 +03:00
Aliaksandr Valialkin
8064775c02 docs/CaseStudies.md: updated ARNES numbers 2020-04-06 16:20:11 +03:00
Aliaksandr Valialkin
520a704606 docs/CaseStudies.md: prettifying of the formatting 2020-04-06 15:24:37 +03:00
Aliaksandr Valialkin
105f0c78d9 docs/CaseStudies.md: add ARNES case study 2020-04-06 15:17:33 +03:00
Roman Khavronenko
b099d84271 Vmalert/rules eval (#400)
* Initial rules evaluation support.

Rules are now store alerts state in private field `alerts`. Every evaluation updates
the alerts and state. Every unique metric received from datastore represents a unique alert,
uniqueness is guaranteed by hashing ordered labelset.

* merge with master

* cleanup

* support endAt parameter as 3*evaluationInterval for active alerts

* make golint happy
2020-04-06 14:44:03 +03:00
Aliaksandr Valialkin
407bdbf2b9 docs/Single-server-VictoriaMetrics.md: cosmetic fixes in Importing CSV data chapter 2020-04-06 12:29:28 +03:00
Aliaksandr Valialkin
69962a7001 docs/FAQ.md: small fixes 2020-04-05 13:53:08 +03:00
Aliaksandr Valialkin
9f03548e55 docs/FAQ.md: add more articles about VictoriaMetrics performance 2020-04-05 13:48:03 +03:00
Aliaksandr Valialkin
022310f35b docs/Articles.md: added a link to https://www.iunera.com/kraken/fabric/time-series-database/ 2020-04-04 16:40:00 +03:00
Aliaksandr Valialkin
895cadfae7 app/vmagent/remotewrite: add "X-Prometheus-Remote-Write-Version: 0.1.0" http header to remote_write request
This header is required by Cortex (and, probably, other remote storage systems).
See 9c1f44d090/docs/apis.md (remote-api) .

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/399
2020-04-04 16:24:56 +03:00
Aliaksandr Valialkin
57704aa584 app/victoria-metrics: add -selfScrapeInstance and -selfScrapeJob flags for tuning labels for self-scraped metrics 2020-04-04 14:57:22 +03:00
Aliaksandr Valialkin
f9b24d4899 app/vmselect/promql: keep metric name after applying first_over_time and last_over_time functions 2020-04-04 14:54:13 +03:00
Aliaksandr Valialkin
fa0554b771 docs/Articles.md: move Percona article to third-party 2020-04-02 15:43:02 +03:00
Aliaksandr Valialkin
35b133bff4 docs/Articles.md: add a link to https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/ 2020-04-02 15:41:09 +03:00
Aliaksandr Valialkin
a884803377 docs/CaseStudies.md: add Adsterra case 2020-04-02 00:49:16 +03:00
Aliaksandr Valialkin
b38d048dd9 app/vmstorage: add vm_free_disk_space_bytes metric for monitoring the remaining disk space at -storageDataPath 2020-04-01 23:08:58 +03:00
Aliaksandr Valialkin
de2cd4231b docs/Single-server-VictoriaMetrics.md: re-organize chapters 2020-04-01 22:38:56 +03:00
kreedom
298eb0a0f8 [vmalert] improve external url handling 2020-04-01 22:29:11 +03:00
kreedom
12fe915b48 [vmalert] add prometheus template function (#396)
* [vmalert] add prometheus template function

* make linter be happy

Co-authored-by: Aliaksandr Valialkin <valyala@gmail.com>
2020-04-01 18:17:53 +03:00
Aliaksandr Valialkin
cdf0a4cf8f lib/httpserver: remove unnecessary http.HandlerFunc wrapper in gzipHandler 2020-04-01 18:14:17 +03:00
Aliaksandr Valialkin
1c9c57db1c docs/Cluster-VictoriaMetrics.md: small fixes and updates 2020-04-01 18:10:12 +03:00
Aliaksandr Valialkin
8edc72201d docs/Single-server-VictoriaMetrics.md: small fixes and updates 2020-04-01 18:09:07 +03:00
Aliaksandr Valialkin
b024ecd10c docs/Cluster-VictoriaMetrics.md: swap production build and development build chapters 2020-04-01 17:49:51 +03:00
Aliaksandr Valialkin
e0d0348f36 lib/storage: add missing reset for tagFilter.matchesEmptyValue on tagFilter.Init 2020-04-01 17:42:44 +03:00
Aliaksandr Valialkin
3e55c7e069 lib/promscrape: reduce timestamp jitter when scraping targets
This should improve compression for timestamps
2020-04-01 16:11:35 +03:00
Aliaksandr Valialkin
c4acd20d2a lib/storage: remove duplicate data points on 7/8*minScrapeInterval interval instead of 1/2*minScrapeInterval
This should reduce storage usage and should improve deduplication accuracy
2020-04-01 15:48:48 +03:00
Aliaksandr Valialkin
8661dc4624 docs/Single-server-VictoriaMetrics.md: mention that environment vars may be prefixed with -envflag.prefix 2020-03-31 22:37:44 +03:00
Aliaksandr Valialkin
16572c8722 README.md: mention that response cache must be reset after import historical data 2020-03-31 19:33:20 +03:00
Aliaksandr Valialkin
b699c46046 lib/storage: handle errors returned from TagFilters.Add when cloning TagFilters with negative filter 2020-03-31 16:18:02 +03:00
Aliaksandr Valialkin
e71519b8b2 app/victoria-metrics/testdata: add a test for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/395 2020-03-31 12:51:25 +03:00
Aliaksandr Valialkin
972713bd79 lib/storage: add fast path for the previous indexdb search if it doesn't contain per-day inverted index yet 2020-03-31 12:51:21 +03:00
Aliaksandr Valialkin
5d99ca6cfc lib/storage: optimize per-day inverted index search for tag filters matching big number of time series
- Sort tag filters in the ascending number of matching time series
  in order to apply the most specific filters first.
- Fall back to metricName search for filters matching big number of time series
  (usually this are negative filters or regexp filters).
2020-03-31 00:48:35 +03:00
Aliaksandr Valialkin
318326c309 lib/storage: properly handle {label=~"foo|"} filters as Prometheus does
Such filters must match all the time series with `label="foo"` plus all the time series without `label`

Previously only time series with `label="foo"` were matched.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/395
2020-03-31 00:48:18 +03:00
Aliaksandr Valialkin
a1e4c6a2be .github/workflows/wiki.yml: fix copying files from docs to wiki 2020-03-30 15:59:12 +03:00
Aliaksandr Valialkin
ac3ee44fa7 docs/robots.txt: trigger github actions 2020-03-30 15:54:39 +03:00
Aliaksandr Valialkin
b98ca56d94 lib/envflag: add -envflag.prefix for setting optional prefix for environment vars 2020-03-30 15:51:19 +03:00
Aliaksandr Valialkin
b41ee5f27d vendor: make vendor-update 2020-03-30 15:06:35 +03:00
Aliaksandr Valialkin
8d35af6fdb .github/workflows: copy all the files from docs folder to wiki and github pages 2020-03-30 15:05:37 +03:00
Aliaksandr Valialkin
0f2dd77a76 go.mod: update the minimum required Go version from go1.12 to go1.13 2020-03-30 14:56:57 +03:00
Aliaksandr Valialkin
0c485f14d1 app/vmselect/prometheus: allow passing relative time to start, end and time args of /api/v1/* queries 2020-03-29 21:57:14 +03:00
Aliaksandr Valialkin
2ebf7d86ff app/vmselect/prometheus: code simplification: (d.Seconds()/1e3) -> d.Milliseconds() 2020-03-29 21:50:28 +03:00
kreedom
bf6c24d0f4 [vmalert] config parser (#393)
* [vmalert] config parser

* make linter be happy

* fix test

* fix sprintf add test for rule validation
2020-03-29 01:48:30 +02:00
Aliaksandr Valialkin
1f7292675a docs: add robots.txt 2020-03-28 23:22:46 +02:00
Aliaksandr Valialkin
bd156cd088 docs/vmagent.md: add prometheus remote_write proxy use case 2020-03-28 23:16:38 +02:00
Aliaksandr Valialkin
b695087119 docs/CaseStudies.md: add Brandwatch case study 2020-03-28 20:57:54 +02:00
Aliaksandr Valialkin
80f53e5396 deployment/docker: run docker apps under default user (0, root) in order to preserve backwards compatibility
If docker app is upgraded from root to non-root, then the data pointed by `-storageDataPath` or similar flags
becomes denied to non-root user after the upgrade. This breaks upgrade path. So revert back to default root user
for docker apps.

Users may explicitly execute `docker run --user <non_root_user>` for running docker apps under non-root user.
2020-03-28 19:23:26 +02:00
Roman Khavronenko
7acb797595 Update dashboard according to new Grafana version. (#390)
The way how regex for column style in Table panel should be applied has changed in 6.7 Grafana version. The change supposed to fix Flags panel column styles accordingly.
2020-03-28 01:24:39 +02:00
Roman Khavronenko
3a8bbfd6b9 bump Prometheus and Grafana images (#389) 2020-03-28 01:15:07 +02:00
Dmitry Naumov
27373807c1 Rootless docker images by default (#358)
* Rootless docker images by default

* Migrate to rootless base image

Co-authored-by: Aliaksandr Valialkin <valyala@gmail.com>
2020-03-27 21:23:50 +02:00
Aliaksandr Valialkin
8d7f0aa632 vendor: make vendor-update 2020-03-27 21:23:30 +02:00
Aliaksandr Valialkin
149f365f74 lib/httpserver: add -http.maxGracefulShutdownDuration command-line flag for tuning the maximum duration required for graceful shutdown of http server 2020-03-27 21:23:30 +02:00
kreedom
b22da547a2 [vmalert] - parse template annotaions (#387)
* [vmalert] - parse template annotations
2020-03-27 18:31:16 +02:00
497 changed files with 56544 additions and 23756 deletions

View File

@@ -2,7 +2,7 @@ name: github-pages
on:
push:
paths:
- 'docs/*.md'
- 'docs/*'
- 'README.md'
branches:
- master
@@ -17,14 +17,14 @@ jobs:
TOKEN: ${{secrets.CI_TOKEN}}
run: |
git clone https://vika:${TOKEN}@github.com/VictoriaMetrics/VictoriaMetrics.github.io.git gpages
cp docs/*.md gpages
cp docs/* gpages
cp README.md gpages
cd gpages
git config --local user.email "info@victoriametrics.com"
git config --local user.name "Vika"
git add "*.md"
git add .
git commit -m "update github pages"
remote_repo="https://vika:${TOKEN}@github.com/VictoriaMetrics/VictoriaMetrics.github.io.git"
git push "${remote_repo}"
cd ..
rm -rf gpages
rm -rf gpages

View File

@@ -14,18 +14,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Setup Go
uses: actions/setup-go@v1
uses: actions/setup-go@master
with:
go-version: 1.13
go-version: 1.14
id: go
- name: Code checkout
uses: actions/checkout@v1
- name: Dependencies
env:
GO111MODULE: off
GO111MODULE: on
run: |
go get -v golang.org/x/lint/golint
go get -u golang.org/x/lint/golint
go get -u github.com/kisielk/errcheck
go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
- name: Code checkout
uses: actions/checkout@master
- name: Build
env:
GO111MODULE: on

View File

@@ -2,7 +2,7 @@ name: wiki
on:
push:
paths:
- 'docs/*.md'
- 'docs/*'
branches:
- master
jobs:
@@ -15,15 +15,14 @@ jobs:
env:
TOKEN: ${{secrets.CI_TOKEN}}
run: |
cd docs
git clone https://vika:${TOKEN}@github.com/VictoriaMetrics/VictoriaMetrics.wiki.git wiki
find ./ -name '*.md' -exec cp -prv '{}' 'wiki' ';'
cp docs/* wiki
cd wiki
git config --local user.email "info@victoriametrics.com"
git config --local user.name "Vika"
git add "*.md"
git add .
git commit -m "update wiki pages"
remote_repo="https://vika:${TOKEN}@github.com/VictoriaMetrics/VictoriaMetrics.wiki.git"
git push "${remote_repo}"
cd ..
rm -rf wiki
rm -rf wiki

View File

@@ -13,6 +13,7 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date
all: \
victoria-metrics-prod \
vmagent-prod \
vmalert-prot \
vmbackup-prod \
vmrestore-prod
@@ -25,17 +26,20 @@ clean:
publish: \
publish-victoria-metrics \
publish-vmagent \
publish-vmalert \
publish-vmbackup \
publish-vmrestore
package: \
package-victoria-metrics \
package-vmagent \
package-vmalert \
package-vmbackup \
package-vmrestore
vmutils: \
vmagent \
vmalert \
vmbackup \
vmrestore
@@ -49,9 +53,10 @@ release-victoria-metrics: victoria-metrics-prod
release-vmutils: \
vmagent-prod \
vmalert-prod \
vmbackup-prod \
vmrestore-prod
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmbackup-prod vmrestore-prod && \
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmalert-prod vmbackup-prod vmrestore-prod && \
sha256sum vmutils-$(PKG_TAG).tar.gz > vmutils-$(PKG_TAG)_checksums.txt
pprof-cpu:
@@ -78,6 +83,7 @@ errcheck: install-errcheck
errcheck -exclude=errcheck_excludes.txt ./app/vmselect/...
errcheck -exclude=errcheck_excludes.txt ./app/vmstorage/...
errcheck -exclude=errcheck_excludes.txt ./app/vmagent/...
errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
@@ -130,7 +136,7 @@ install-qtc:
golangci-lint: install-golangci-lint
golangci-lint run --exclude '(SA4003|SA1019):' -D errcheck -D structcheck
golangci-lint run --exclude '(SA4003|SA1019):' -D errcheck -D structcheck --timeout 2m
install-golangci-lint:
which golangci-lint || GO111MODULE=off go get -u github.com/golangci/golangci-lint/cmd/golangci-lint

118
README.md
View File

@@ -26,6 +26,9 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* [Synthesio](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#synthesio)
* [MHI Vestas Offshore Wind](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#mhi-vestas-offshore-wind)
* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
* [Brandwatch](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#brandwatch)
* [Adsterra](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adsterra)
* [ARNES](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#arnes)
## Prominent features
@@ -114,11 +117,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* [Backfilling](#backfilling)
* [Profiling](#profiling)
* [Integrations](#integrations)
* [Roadmap](#roadmap)
* [Third-party contributions](#third-party-contributions)
* [Contacts](#contacts)
* [Community and contributions](#community-and-contributions)
* [Third-party contributions](#third-party-contributions)
* [Reporting bugs](#reporting-bugs)
* [Roadmap](#roadmap)
* [Victoria Metrics Logo](#victoria-metrics-logo)
* [Logo Usage Guidelines](#logo-usage-guidelines)
* [Font used](#font-used)
@@ -147,6 +150,7 @@ Each flag values can be set thru environment variables by following these rules:
* The `-envflag.enable` flag must be set
* Each `.` in flag names must be substituted by `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`)
* For repeating flags, an alternative syntax can be used by joining the different values into one using `,` as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`)
* It is possible setting prefix for environment vars with `-envflag.prefix`. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_`
### Prometheus setup
@@ -204,6 +208,10 @@ Read more about tuning remote write for Prometheus [here](https://prometheus.io/
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
since the previous versions may have issues with `remote_write`.
Take a look also at [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md),
which can be used as faster and less resource-hungry alternative to Prometheus in certain cases.
### Grafana setup
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
@@ -251,6 +259,9 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
* [static_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config)
* [file_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config)
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
In the future other `*_sd_config` types will be supported.
@@ -268,7 +279,8 @@ For instance, put the following lines into `Telegraf` config, so it sends data t
Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag.
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag
and stream plain Influx line protocol data to the configured TCP and/or UDP addresses.
VictoriaMetrics maps Influx data using the following rules:
@@ -439,10 +451,10 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
* `<column_pos>` is the position of the CSV column (field). Column numbering starts from 1. The order of parsing rules may be arbitrary.
* `<type>` describes the column type. Supported types are:
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value. The metric name is read from the `<context>`.
CSV line must have at least a single metric field.
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value, which must be integer or floating-point number.
The metric name is read from the `<context>`. CSV line must have at least a single metric field. Multiple metric fields per CSV line is OK.
* `label` - the corresponding CSV column at `<column_pos>` contains label value. The label name is read from the `<context>`.
CSV line may have arbitrary number of label fields. All these fields are attached to all the configured metrics.
CSV line may have arbitrary number of label fields. All these labels are attached to all the configured metrics.
* `time` - the corresponding CSV column at `<column_pos>` contains metric time. CSV line may contain either one or zero columns with time.
If CSV line has no time, then the current time is used. The time is applied to all the configured metrics.
The format of the time is configured via `<context>`. Supported time formats are:
@@ -452,7 +464,7 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
* `rfc3339` - timestamp in [RFC3339](https://tools.ietf.org/html/rfc3339) format, i.e. `2006-01-02T15:04:05Z`.
* `custom:<layout>` - custom layout for the timestamp. The `<layout>` may contain arbitrary time layout according to [time.Parse rules in Go](https://golang.org/pkg/time/#Parse).
Each request to `/api/v1/import/csv` can contain arbitrary number of CSV lines.
Each request to `/api/v1/import/csv` may contain arbitrary number of CSV lines.
Example for importing CSV data via `/api/v1/import/csv`:
@@ -475,6 +487,8 @@ The following response should be returned:
{"metric":{"__name__":"ask","market":"NYSE","ticker":"GOOG"},"values":[1.23],"timestamps":[1583865146495]}
```
Note that it could be required to flush response cache after importing historical data. See [these docs](#backfilling) for detail.
### Prometheus querying API usage
@@ -485,6 +499,7 @@ VictoriaMetrics supports the following handlers from [Prometheus querying API](h
* [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
* [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
* [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
* [/api/v1/status/tsdb](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats)
These handlers can be queried from Prometheus-compatible clients such as Grafana or curl.
@@ -505,11 +520,11 @@ Additionally VictoriaMetrics provides the following handlers:
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) instead of building VictoriaMetrics
from sources. Building from sources is reasonable when developing additional features specific
to your needs.
to your needs or when testing bugfixes.
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make victoria-metrics` from the root folder of the repository.
It builds `victoria-metrics` binary and puts it into the `bin` folder.
@@ -525,7 +540,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
#### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make victoria-metrics-arm` or `make victoria-metrics-arm64` from the root folder of the repository.
It builds `victoria-metrics-arm` or `victoria-metrics-arm64` binary respectively and puts it into the `bin` folder.
@@ -541,7 +556,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
This is an experimental mode, which may result in a lower compression ratio and slower decompression performance.
Use it with caution!
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make victoria-metrics-pure` from the root folder of the repository.
It builds `victoria-metrics-pure` binary and puts it into the `bin` folder.
@@ -551,6 +566,13 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-victoria-metrics
```
### Start with docker-compose
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
@@ -595,11 +617,13 @@ Steps for restoring from a snapshot:
Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
the deleted time series isn't freed instantly - it is freed during subsequent [background merges of data files](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
before actually deleting the metrics.
The `/api/v1/admin/tsdb/delete_series` handler may be protected with `authKey` if `-deleteAuthKey` command-line flag is set.
The delete API is intended mainly for the following cases:
* One-off deleting of accidentally written invalid (or undesired) time series.
@@ -607,10 +631,11 @@ The delete API is intended mainly for the following cases:
It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
* Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
* Regular cleanups for unneeded data. Just prevent writing unneeded data into VictoriaMetrics.
This can be done with relabeling in [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
See [this article](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts) for details.
* Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
time series occupy disk space until the next merge operation, which can never occur.
* Reducing disk space usage by deleting unneeded time series. This doesn't work as expected, since the deleted
time series occupy disk space until the next merge operation, which can never occur when deleting too old data.
It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
@@ -676,6 +701,8 @@ curl -H 'Accept-Encoding: gzip' http://source-victoriametrics:8428/api/v1/export
curl -X POST -H 'Content-Encoding: gzip' http://destination-victoriametrics:8428/api/v1/import -T exported_data.jsonl.gz
```
Note that it could be required to flush response cache after importing historical data. See [these docs](#backfilling) for detail.
Each request to `/api/v1/import` can load up to a single vCPU core on VictoriaMetrics. Import speed can be improved by splitting the original file into smaller parts
and importing them concurrently. Note that the original file must be split on newlines.
@@ -830,6 +857,7 @@ Consider setting the following command-line flags:
with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
* `-search.resetCacheAuthKey` for protecting `/internal/resetRollupResultCache` endpoint. See [backfilling](#backfilling) for more details.
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
@@ -868,10 +896,9 @@ The most interesting metrics are:
* `vm_rows{type="indexdb"}` - the number of rows in inverted index. High value for this number usually mean high churn rate for time series.
* Sum of `vm_rows{type="storage/big"}` and `vm_rows{type="storage/small"}` - total number of `(timestamp, value)` data points
in the database.
* Sum of all the `vm_cache_size_bytes` metrics - the total size of all the caches in the database.
* `vm_allowed_memory_bytes` - the maximum allowed size for caches in the database. It is calculated as `system_memory * <-memory.allowedPercent> / 100`,
where `system_memory` is the amount of system memory and `-memory.allowedPercent` is the corresponding flag value.
* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
* `sum(vm_data_size_bytes)` - the total data size on disk.
### Troubleshooting
@@ -887,7 +914,9 @@ The most interesting metrics are:
* VictoriaMetrics requires free disk space for [merging data files to bigger ones](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
It may slow down when there is no enough free space left. So make sure `-storageDataPath` directory
has at least 20% of free space comparing to disk size.
has at least 20% of free space comparing to disk size. The remaining amount of free space
can be [monitored](#monitoring) via `vm_free_disk_space_bytes` metric. The total size of data
stored on the disk can be monitored via sum of `vm_data_size_bytes` metrics.
* If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors,
then just remove directories with broken parts. This will recover VictoriaMetrics at the cost
@@ -898,9 +927,22 @@ The most interesting metrics are:
If this removes gaps on the graphs, then it is likely data with timestamps older than `-search.cacheTimestampOffset`
is ingested into VictoriaMetrics. Make sure that data sources have synchronized time with VictoriaMetrics.
If the gaps are related to irregular intervals between samples, then try adjusting `-search.minStalenessInterval` command-line flag
to value close to the maximum interval between samples.
* If you are switching from InfluxDB or TimescaleDB, then take a look at `-search.maxStalenessInterval` command-line flag.
It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes
each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals.
* Metrics and labels leading to high cardinality or high churn rate can be determined at `/api/v1/status/tsdb` page.
See [these docs](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) for details.
VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
while `topN` equals to 10.
### Backfilling
VictoriaMetrics accepts historical data in arbitrary order of time.
VictoriaMetrics accepts historical data in arbitrary order of time via [any supported ingestion method](#how-to-import-time-series-data).
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
@@ -938,18 +980,16 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
See [these docs](https://github.com/netdata/netdata#integrations).
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend.
See [this example](/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
* [Ansible role for installing VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
See [this example](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
* [Ansible role for installing single-node VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
* [Ansible role for installing cluster VictoriaMetrics](https://github.com/Slapper/ansible-victoriametrics-cluster-role).
## Roadmap
## Third-party contributions
* [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
* [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
* [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
* [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
* [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
* [Prometheus Oauth proxy](https://gitlab.com/optima_public/prometheus_oauth_proxy) - see [this article](https://medium.com/@richard.holly/powerful-saas-solution-for-detection-metrics-c67b9208d362) for details.
## Contacts
@@ -982,17 +1022,21 @@ We are open to third-party pull requests provided they follow [KISS design princ
Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.
## Third-party contributions
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
* [Prometheus Oauth proxy](https://gitlab.com/optima_public/prometheus_oauth_proxy) - see [this article](https://medium.com/@richard.holly/powerful-saas-solution-for-detection-metrics-c67b9208d362) for details.
## Reporting bugs
Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
## Roadmap
* [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
* [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
* [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
* [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
* [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.
## Victoria Metrics Logo
[Zip](VM_logo.zip) contains three folders with different image orientations (main color and inverted version).

View File

@@ -1,8 +1,8 @@
ARG certs_image
FROM $certs_image AS certs
FROM scratch
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
ARG base_image
FROM $base_image
EXPOSE 8428
ENTRYPOINT ["/victoria-metrics-prod"]
ARG src_binary
COPY $src_binary ./victoria-metrics-prod
EXPOSE 8428
ENTRYPOINT ["/victoria-metrics-prod"]

View File

@@ -14,7 +14,11 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
var selfScrapeInterval = flag.Duration("selfScrapeInterval", 0, "Interval for self-scraping own metrics at /metrics page")
var (
selfScrapeInterval = flag.Duration("selfScrapeInterval", 0, "Interval for self-scraping own metrics at /metrics page")
selfScrapeInstance = flag.String("selfScrapeInstance", "self", "Value for 'instance' label, which is added to self-scraped metrics")
selfScrapeJob = flag.String("selfScrapeJob", "victoria-metrics", "Value for 'job' label, which is added to self-scraped metrics")
)
var selfScraperStopCh chan struct{}
var selfScraperWG sync.WaitGroup
@@ -65,8 +69,8 @@ func selfScraper(scrapeInterval time.Duration) {
r := &rows.Rows[i]
labels = labels[:0]
labels = addLabel(labels, "", r.Metric)
labels = addLabel(labels, "job", "victoria-metrics")
labels = addLabel(labels, "instance", "self")
labels = addLabel(labels, "job", *selfScrapeJob)
labels = addLabel(labels, "instance", *selfScrapeInstance)
for j := range r.Tags {
t := &r.Tags[j]
labels = addLabel(labels, t.Key, t.Value)

View File

@@ -0,0 +1,16 @@
{
"name": "empty-label-match",
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/395",
"data": [
"empty_label_match 1 {TIME_S-1m}",
"empty_label_match;foo=bar 2 {TIME_S-1m}",
"empty_label_match;foo=baz 3 {TIME_S-1m}"],
"query": ["/api/v1/query_range?query=empty_label_match{foo=~'bar|'}&start={TIME_S}&end={TIME_S}&step=60"],
"result_query_range": {
"status":"success",
"data":{"resultType":"matrix",
"result":[
{"metric":{"__name__":"empty_label_match"},"values":[["{TIME_S}","1"]]},
{"metric":{"__name__":"empty_label_match","foo":"bar"},"values":[["{TIME_S}","2"]]}
]}}
}

View File

@@ -1,7 +1,8 @@
## vmagent
`vmagent` is a tiny but brave agent, which helps you collecting metrics from various sources
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
or any other Prometheus-compatible storage system that supports `remote_write` protocol.
<img alt="vmagent" src="vmagent.png">
@@ -105,6 +106,14 @@ data among long-term remote storage, short-term remote storage and real-time ana
Note that each destination can receive its own subset of the collected data thanks to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
#### Prometheus remote_write proxy
`vmagent` may be used as a proxy for Prometheus data sent via Prometheus `remote_write` protocol. It can accept data via `remote_write` API
at `/api/v1/write` endpoint, apply relabeling and filtering and then proxy it to another `remote_write` systems.
The `vmagent` can be configured to encrypt the incoming `remote_write` requests with `-tls*` command-line flags.
Additionally, Basic Auth can be enabled for the incoming `remote_write` requests with `-httpAuth.*` command-line flags.
### How to collect metrics in Prometheus format
@@ -122,12 +131,21 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
* `static_configs` - for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
* `file_sd_configs` - for scraping targets defined in external files aka file-based service discover.
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
* `kubernetes_sd_configs` - for scraping targets in Kubernetes (k8s).
See [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) for details.
* `ec2_sd_configs` - for scraping targets in Amazone EC2.
See [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) for details.
`vmagent` doesn't support `role_arn` config param yet.
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
`vmagent` provides the following additional functionality `gce_sd_config`:
* if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
* if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
* if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
* `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
The following service discovery mechanisms will be added to `vmagent` soon:
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
@@ -173,7 +191,7 @@ Read more about relabeling in the following articles:
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format. This page also exports information on improperly configured scrape configs.
### Troubleshooting
@@ -181,11 +199,14 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
since `vmagent` establishes at least a single TCP connection per each target.
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
and `vmagent_remotewrite_pending_data_bytes` metric exported by `vmagent` at `/metrics` page constantly grows.
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
The directory can grow big when remote storage is unvailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
The directory can grow big when remote storage is unavailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
If you don't want sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
@@ -196,7 +217,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make vmagent` from the root folder of the repository.
It builds `vmagent` binary and puts it into the `bin` folder.
@@ -211,3 +232,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-vmagent
```

View File

@@ -1,8 +1,8 @@
ARG certs_image
FROM $certs_image AS certs
FROM scratch
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
ARG base_image
FROM $base_image
EXPOSE 8429
ENTRYPOINT ["/vmagent-prod"]
ARG src_binary
COPY $src_binary ./vmagent-prod
EXPOSE 8429
ENTRYPOINT ["/vmagent-prod"]

View File

@@ -214,6 +214,7 @@ func (c *client) sendBlock(block []byte) {
req.Header.SetMethod("POST")
req.Header.Add("Content-Type", "application/x-protobuf")
req.Header.Add("Content-Encoding", "snappy")
req.Header.Add("X-Prometheus-Remote-Write-Version", "0.1.0")
if c.authHeader != "" {
req.Header.Set("Authorization", c.authHeader)
}
@@ -232,8 +233,7 @@ again:
}
startTime := time.Now()
// There is no need in calling DoTimeout, since the timeout is set in c.hc.ReadTimeout.
err := c.hc.Do(req, resp)
err := doRequestWithPossibleRetry(c.hc, req, resp)
c.requestDuration.UpdateDuration(startTime)
if err != nil {
c.errorsCount.Inc()
@@ -266,3 +266,16 @@ again:
fasthttp.ReleaseResponse(resp)
fasthttp.ReleaseRequest(req)
}
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response) error {
// There is no need in calling DoTimeout, since the timeout must be already set in hc.ReadTimeout.
err := hc.Do(req, resp)
if err == nil {
return nil
}
if err != fasthttp.ErrConnectionClosed {
return err
}
// Retry request if the server closed the keep-alive connection during the first attempt.
return hc.Do(req, resp)
}

78
app/vmalert/Makefile Normal file
View File

@@ -0,0 +1,78 @@
# All these commands must run from repository root.
vmalert:
APP_NAME=vmalert $(MAKE) app-local
vmalert-race:
APP_NAME=vmalert RACE=-race $(MAKE) app-local
vmalert-prod:
APP_NAME=vmalert $(MAKE) app-via-docker
vmalert-pure-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-pure
vmalert-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-amd64
vmalert-arm-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-arm
vmalert-arm64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-arm64
vmalert-ppc64le-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-ppc64le
vmalert-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-386
package-vmalert:
APP_NAME=vmalert $(MAKE) package-via-docker
package-vmalert-pure:
APP_NAME=vmalert $(MAKE) package-via-docker-pure
package-vmalert-amd64:
APP_NAME=vmalert $(MAKE) package-via-docker-amd64
package-vmalert-arm:
APP_NAME=vmalert $(MAKE) package-via-docker-arm
package-vmalert-arm64:
APP_NAME=vmalert $(MAKE) package-via-docker-arm64
package-vmalert-ppc64le:
APP_NAME=vmalert $(MAKE) package-via-docker-ppc64le
package-vmalert-386:
APP_NAME=vmalert $(MAKE) package-via-docker-386
publish-vmalert:
APP_NAME=vmalert $(MAKE) publish-via-docker
test-vmalert:
go test -race -cover ./app/vmalert
run-vmalert: vmalert
./bin/vmalert -rule=app/vmalert/testdata/rules0-good.rules \
-datasource.url=http://localhost:8428 -notifier.url=http://localhost:9093 \
-evaluationInterval=3s
vmalert-amd64:
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-amd64 ./app/vmalert
vmalert-arm:
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm ./app/vmalert
vmalert-arm64:
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-arm64 ./app/vmalert
vmalert-ppc64le:
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-ppc64le ./app/vmalert
vmalert-386:
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmalert-386 ./app/vmalert
vmalert-pure:
APP_NAME=vmalert $(MAKE) app-local-pure

View File

@@ -1,41 +1,95 @@
## VM Alert
#### Abstract
The application which accepts the alert rules, executes them on given source, sends(fires) an alert to(in) alert management system
`vmalert` executes a list of given MetricsQL expressions (rules) and
sends alerts to [Alert Manager](https://github.com/prometheus/alertmanager).
### Components
NOTE: `vmalert` is in early alpha and wasn't tested in production systems yet.
#### Alert Config Reader
It accepts yaml config as input parameter in Prometheus format, parses it into Go struct.
### Features:
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
* VictoriaMetrics [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL)
expressions validation;
* Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
support;
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager);
* Lightweight without extra dependencies.
#### Source Caller
Create own watchdog for every alert group (goroutines), which executes alert query on given source and issues an alert if source returns non-empty result.
Source can be any service which supports PromQL (MetricsQL).
### TODO:
* Persist alerts state as timeseries in TSDB. Currently, alerts state is stored
in process memory only and will be lost on restart;
* Configuration hot reload.
#### Alert Management System Provider
Send positive alert to alert management system, provides interface for every concrete implementation.
Should be ingratiated with Prometheus alertmanager.
### QuickStart
open questions:
- do we really need alert group or can just run every alert in own goroutine?
To build `vmalert` from sources:
```
git clone https://github.com/VictoriaMetrics/VictoriaMetrics
cd VictoriaMetrics
make vmalert
```
The build binary will be placed to `VictoriaMetrics/bin` folder.
#### Web Server
Expose metrics
To start using `vmalert` you will need the following things:
* list of alert rules - PromQL/MetricsQL expressions to execute;
* datasource address - reachable VictoriaMetrics instance for rules execution;
* notifier address - reachable Alertmanager instance for processing,
aggregating alerts and sending notifications.
open questions:
- should the tool provide API or UI for managing alerting rules? Where to store config updated via the API or UI?
- should the tool provide “alerting rules validation mode” for validating and debugging alerting rules? This mode is useful when creating and debugging alerting rules.
Then configure `vmalert` accordingly:
```
./bin/vmalert -rule=alert.rules \
-datasource.url=http://localhost:8428 \
-notifier.url=http://localhost:9093
```
#### Requirements:
- Stateless
- Avoid external dependencies if possible
- Reuse existing code from VictoriaMetrics repo
- Makefile rules for common tasks see Makefiles for other apps in the app/ dir
- Every package should be covered by tests
- Dockerfile
- Graceful shutdown
- Helm template
- Application uses command line flags for configuration
Example for `.rules` file may be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/testdata/rules0-good.rules)
`vmalert` runs evaluation for every group in a separate goroutine.
Rules in group evaluated one-by-one sequentially.
<img alt="VM Alert" src="vmalert.png">
`vmalert` also runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
* `http://<vmalert-addr>/api/v1/<groupName>/<alertID>/status" ` - get alert status by ID.
Used as alert source in AlertManager.
* `http://<vmalert-addr>/metrics` - application metrics.
### Configuration
The shortlist of configuration flags is the following:
```
Usage of vmalert:
-datasource.url string
Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428
-datasource.basicAuth.password string
Optional basic auth password to use for -datasource.url
-datasource.basicAuth.username string
Optional basic auth username to use for -datasource.url
-evaluationInterval duration
How often to evaluate the rules. Default 1m (default 1m0s)
-external.url string
External URL is used as alert's source for sent alerts to the notifier
-httpListenAddr string
Address to listen for http connections (default ":8880")
-notifier.url string
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
-remotewrite.url string
Optional URL to remote-write compatible storage where to write timeseriesbased on active alerts. E.g. http://127.0.0.1:8428
-rule value
Path to the file with alert rules.
Supports patterns. Flag can be specified multiple times.
Examples:
-rule /path/to/file. Path to a single file with alerting rules
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
absolute path to all .yaml files in root.
-rule.validateTemplates
Indicates to validate annotation and label templates (default true)
```
Pass `-help` to `vmalert` in order to see the full list of supported
command-line flags with their descriptions.
### Contributing
`vmalert` is mostly designed and built by VictoriaMetrics community.
Feel free to share your experience and ideas for improving this
software. Please keep simplicity as the main priority.

70
app/vmalert/config.go Normal file
View File

@@ -0,0 +1,70 @@
package main
import (
"fmt"
"gopkg.in/yaml.v2"
"io/ioutil"
"path/filepath"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
// Parse parses rule configs from given file patterns
func Parse(pathPatterns []string, validateAnnotations bool) ([]Group, error) {
var fp []string
for _, pattern := range pathPatterns {
matches, err := filepath.Glob(pattern)
if err != nil {
return nil, fmt.Errorf("error reading file patther %s:%v", pattern, err)
}
fp = append(fp, matches...)
}
var groups []Group
for _, file := range fp {
groupsNames := map[string]struct{}{}
gr, err := parseFile(file)
if err != nil {
return nil, fmt.Errorf("file %s: %w", file, err)
}
for _, group := range gr {
if _, ok := groupsNames[group.Name]; ok {
return nil, fmt.Errorf("one file can not contain groups with the same name %s, filepath:%s", file, group.Name)
}
groupsNames[group.Name] = struct{}{}
for _, rule := range group.Rules {
if err = rule.Validate(); err != nil {
return nil, fmt.Errorf("invalid rule filepath:%s, group %s:%w", file, group.Name, err)
}
// TODO: this init looks weird here
rule.alerts = make(map[uint64]*notifier.Alert)
if validateAnnotations {
if err = notifier.ValidateTemplates(rule.Annotations); err != nil {
return nil, fmt.Errorf("invalid annotations filepath:%s, group %s:%w", file, group.Name, err)
}
if err = notifier.ValidateTemplates(rule.Labels); err != nil {
return nil, fmt.Errorf("invalid labels filepath:%s, group %s:%w", file, group.Name, err)
}
}
rule.group = &group
}
}
groups = append(groups, gr...)
}
if len(groups) < 1 {
return nil, fmt.Errorf("no groups found in %s", strings.Join(pathPatterns, ";"))
}
return groups, nil
}
func parseFile(path string) ([]Group, error) {
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("error reading alert rule file: %w", err)
}
g := struct {
Groups []Group `yaml:"groups"`
}{}
err = yaml.Unmarshal(data, &g)
return g.Groups, err
}

View File

@@ -1,38 +0,0 @@
package config
import "time"
// Rule is basic alert entity
type Rule struct {
Name string
Expr string
For time.Duration
Labels map[string]string
Annotations map[string]string
}
// Group grouping array of alert
type Group struct {
Name string
Rules []Rule
}
// Parse parses config from given file
func Parse(filepath string) ([]Group, error) {
return []Group{{
Name: "foobar",
Rules: []Rule{{
Name: "vmrowsalert",
Expr: "vm_rows",
For: 1 * time.Second,
Labels: map[string]string{
"alert_label": "value1",
"alert_label2": "value2",
},
Annotations: map[string]string{
"summary": "{{ $value }}",
"description": "LABELS: {{ $labels }}",
},
}},
}}, nil
}

View File

@@ -0,0 +1,39 @@
package main
import (
"net/url"
"os"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
func TestMain(m *testing.M) {
u, _ := url.Parse("https://victoriametrics.com/path")
notifier.InitTemplateFunc(u)
os.Exit(m.Run())
}
func TestParseGood(t *testing.T) {
if _, err := Parse([]string{"testdata/*good.rules", "testdata/dir/*good.*"}, true); err != nil {
t.Errorf("error parsing files %s", err)
}
}
func TestParseBad(t *testing.T) {
if _, err := Parse([]string{"testdata/rules0-bad.rules"}, true); err == nil {
t.Errorf("expected syntaxt error")
}
if _, err := Parse([]string{"testdata/dir/rules0-bad.rules"}, true); err == nil {
t.Errorf("expected template annotation error")
}
if _, err := Parse([]string{"testdata/dir/rules1-bad.rules"}, true); err == nil {
t.Errorf("expected same group error")
}
if _, err := Parse([]string{"testdata/dir/rules2-bad.rules"}, true); err == nil {
t.Errorf("expected template label error")
}
if _, err := Parse([]string{"testdata/*.yaml"}, true); err == nil {
t.Errorf("expected empty group")
}
}

View File

@@ -1,5 +1,14 @@
package datasource
import "context"
// Querier interface wraps Query method which
// executes given query and returns list of Metrics
// as result
type Querier interface {
Query(ctx context.Context, query string) ([]Metric, error)
}
// Metric is the basic entity which should be return by datasource
// It represents single data point with full list of labels
type Metric struct {

View File

@@ -0,0 +1,8 @@
ARG base_image
FROM $base_image
EXPOSE 8880
ENTRYPOINT ["/vmalert-prod"]
ARG src_binary
COPY $src_binary ./vmalert-prod

View File

@@ -4,131 +4,210 @@ import (
"context"
"flag"
"fmt"
"net"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/provider"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/metrics"
)
var (
configPath = flag.String("config", "config.yaml", "Path to alert configuration file")
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
datasourceURL = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428")
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username to use for -datasource.url")
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password to use for -datasource.url")
rulePath = flagutil.NewArray("rule", `Path to the file with alert rules.
Supports patterns. Flag can be specified multiple times.
Examples:
-rule /path/to/file. Path to a single file with alerting rules
-rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder,
absolute path to all .yaml files in root.`)
validateTemplates = flag.Bool("rule.validateTemplates", true, "Indicates to validate annotation and label templates")
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
datasourceURL = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428")
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username to use for -datasource.url")
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password to use for -datasource.url")
remoteWriteURL = flag.String("remotewrite.url", "", "Optional URL to remote-write compatible storage where to write timeseries"+
"based on active alerts. E.g. http://127.0.0.1:8428")
evaluationInterval = flag.Duration("evaluationInterval", 1*time.Minute, "How often to evaluate the rules. Default 1m")
providerURL = flag.String("provider.url", "", "Prometheus alertmanager url. Required parameter. e.g. http://127.0.0.1:9093")
notifierURL = flag.String("notifier.url", "", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
)
// TODO: hot configuration reload
// TODO: alerts state persistence
func main() {
envflag.Parse()
buildinfo.Init()
logger.Init()
checkFlags()
ctx, cancel := context.WithCancel(context.Background())
logger.Infof("reading alert rules configuration file from %s", *configPath)
alertGroups, err := config.Parse(*configPath)
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
if err != nil {
logger.Fatalf("Cannot parse configuration file %s", err)
logger.Fatalf("can not get external url:%s ", err)
}
addr := getWebServerAddr(*httpListenAddr, false)
notifier.InitTemplateFunc(eu)
logger.Infof("reading alert rules configuration file from %s", strings.Join(*rulePath, ";"))
groups, err := Parse(*rulePath, *validateTemplates)
if err != nil {
logger.Fatalf("cannot parse configuration file: %s", err)
}
w := &watchdog{
storage: datasource.NewVMStorage(*datasourceURL, *basicAuthUsername, *basicAuthPassword, &http.Client{}),
alertProvider: provider.NewAlertManager(*providerURL, func(group, name string) string {
return addr + fmt.Sprintf("/%s/%s/status", group, name)
alertProvider: notifier.NewAlertManager(*notifierURL, func(group, name string) string {
return fmt.Sprintf("%s/api/v1/%s/%s/status", eu, group, name)
}, &http.Client{}),
}
for id := range alertGroups {
go func(group config.Group) {
w.run(ctx, group, *evaluationInterval)
}(alertGroups[id])
}
go func() {
httpserver.Serve(*httpListenAddr, func(w http.ResponseWriter, r *http.Request) bool {
panic("not implemented")
if *remoteWriteURL != "" {
c, err := remotewrite.NewClient(ctx, remotewrite.Config{
Addr: *remoteWriteURL,
FlushInterval: *evaluationInterval,
})
}()
if err != nil {
logger.Fatalf("failed to init remotewrite client: %s", err)
}
w.rw = c
}
wg := sync.WaitGroup{}
for i := range groups {
wg.Add(1)
go func(group Group) {
w.run(ctx, group, *evaluationInterval)
wg.Done()
}(groups[i])
}
go httpserver.Serve(*httpListenAddr, (&requestHandler{groups: groups}).handler)
sig := procutil.WaitForSigterm()
logger.Infof("service received signal %s", sig)
if err := httpserver.Stop(*httpListenAddr); err != nil {
logger.Fatalf("cannot stop the webservice: %s", err)
}
cancel()
w.stop()
if w.rw != nil {
err := w.rw.Close()
if err != nil {
logger.Fatalf("cannot stop the remotewrite: %s", err)
}
}
wg.Wait()
}
type watchdog struct {
storage *datasource.VMStorage
alertProvider provider.AlertProvider
alertProvider notifier.Notifier
rw *remotewrite.Client
}
func (w *watchdog) run(ctx context.Context, a config.Group, evaluationInterval time.Duration) {
var (
iterationTotal = metrics.NewCounter(`vmalert_iteration_total`)
iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
execTotal = metrics.NewCounter(`vmalert_execution_total`)
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
alertsSent = metrics.NewCounter(`vmalert_alerts_sent_total`)
alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
remoteWriteSent = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
)
func (w *watchdog) run(ctx context.Context, group Group, evaluationInterval time.Duration) {
logger.Infof("watchdog for %s has been started", group.Name)
t := time.NewTicker(evaluationInterval)
var metrics []datasource.Metric
var err error
var alerts []provider.Alert
defer t.Stop()
for {
select {
case <-t.C:
for _, r := range a.Rules {
if metrics, err = w.storage.Query(ctx, r.Expr); err != nil {
logger.Errorf("error reading metrics %s", err)
continue
}
// todo check for and calculate alert states
if len(metrics) < 1 {
continue
}
alerts = provider.AlertsFromMetrics(metrics, a.Name, r)
// todo save to storage
if err := w.alertProvider.Send(alerts); err != nil {
logger.Errorf("error sending alerts %s", err)
continue
}
// todo is alert still active/pending?
}
iterationTotal.Inc()
iterationStart := time.Now()
for _, rule := range group.Rules {
execTotal.Inc()
execStart := time.Now()
err := rule.Exec(ctx, w.storage)
execDuration.UpdateDuration(execStart)
if err != nil {
execErrors.Inc()
logger.Errorf("failed to execute rule %q.%q: %s", group.Name, rule.Name, err)
continue
}
var alertsToSend []notifier.Alert
for _, a := range rule.alerts {
if a.State != notifier.StatePending {
alertsToSend = append(alertsToSend, *a)
}
if a.State == notifier.StateInactive || w.rw == nil {
continue
}
tss := rule.AlertToTimeSeries(a, execStart)
for _, ts := range tss {
remoteWriteSent.Inc()
if err := w.rw.Push(ts); err != nil {
remoteWriteErrors.Inc()
logger.Errorf("failed to push timeseries to remotewrite: %s", err)
}
}
}
alertsSent.Add(len(alertsToSend))
if err := w.alertProvider.Send(alertsToSend); err != nil {
alertsSendErrors.Inc()
logger.Errorf("failed to send alert for rule %q.%q: %s", group.Name, rule.Name, err)
}
}
iterationDuration.UpdateDuration(iterationStart)
case <-ctx.Done():
logger.Infof("%s receive stop signal", a.Name)
logger.Infof("%s received stop signal", group.Name)
return
}
}
}
func getWebServerAddr(httpListenAddr string, isSecure bool) string {
if strings.Index(httpListenAddr, ":") != 0 {
if isSecure {
return "https://" + httpListenAddr
}
return "http://" + httpListenAddr
func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL, error) {
if externalURL != "" {
return url.Parse(externalURL)
}
addrs, err := net.InterfaceAddrs()
hname, err := os.Hostname()
if err != nil {
panic("error getting the interface addresses ")
return nil, err
}
for _, a := range addrs {
if ipnet, ok := a.(*net.IPNet); ok && !ipnet.IP.IsLoopback() {
if ipnet.IP.To4() != nil {
return "http://" + ipnet.IP.String() + httpListenAddr
}
}
port := ""
if ipport := strings.Split(httpListenAddr, ":"); len(ipport) > 1 {
port = ":" + ipport[1]
}
// no loopback ip return internal address
return "http://127.0.0.1" + httpListenAddr
schema := "http://"
if isSecure {
schema = "https://"
}
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
}
func (w *watchdog) stop() {
panic("not implemented")
func checkFlags() {
if *notifierURL == "" {
flag.PrintDefaults()
logger.Fatalf("notifier.url is empty")
}
if *datasourceURL == "" {
flag.PrintDefaults()
logger.Fatalf("datasource.url is empty")
}
}

View File

@@ -0,0 +1,105 @@
package notifier
import (
"bytes"
"fmt"
"io"
"strings"
"text/template"
"time"
)
// Alert the triggered alert
// TODO: Looks like alert name isn't unique
type Alert struct {
Group string
Name string
Labels map[string]string
Annotations map[string]string
State AlertState
Start time.Time
End time.Time
Value float64
ID uint64
}
// AlertState type indicates the Alert state
type AlertState int
const (
// StateInactive is the state of an alert that is neither firing nor pending.
StateInactive AlertState = iota
// StatePending is the state of an alert that has been active for less than
// the configured threshold duration.
StatePending
// StateFiring is the state of an alert that has been active for longer than
// the configured threshold duration.
StateFiring
)
// String stringer for AlertState
func (as AlertState) String() string {
switch as {
case StateFiring:
return "firing"
case StatePending:
return "pending"
}
return "inactive"
}
type alertTplData struct {
Labels map[string]string
Value float64
}
const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}`
// ExecTemplate executes the Alert template for give
// map of annotations.
func (a *Alert) ExecTemplate(annotations map[string]string) (map[string]string, error) {
tplData := alertTplData{Value: a.Value, Labels: a.Labels}
return templateAnnotations(annotations, tplHeader, tplData)
}
// ValidateTemplates validate annotations for possible template error, uses empty data for template population
func ValidateTemplates(annotations map[string]string) error {
_, err := templateAnnotations(annotations, tplHeader, alertTplData{
Labels: map[string]string{},
Value: 0,
})
return err
}
func templateAnnotations(annotations map[string]string, header string, data alertTplData) (map[string]string, error) {
var builder strings.Builder
var buf bytes.Buffer
eg := errGroup{}
r := make(map[string]string, len(annotations))
for key, text := range annotations {
r[key] = text
buf.Reset()
builder.Reset()
builder.Grow(len(header) + len(text))
builder.WriteString(header)
builder.WriteString(text)
if err := templateAnnotation(&buf, builder.String(), data); err != nil {
eg.errs = append(eg.errs, fmt.Sprintf("key %s, template %s:%s", key, text, err))
continue
}
r[key] = buf.String()
}
return r, eg.err()
}
func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
tpl, err := template.New("").Funcs(tmplFunc).Option("missingkey=zero").Parse(text)
if err != nil {
return fmt.Errorf("error parsing annotation:%w", err)
}
if err = tpl.Execute(dst, data); err != nil {
return fmt.Errorf("error evaluating annotation template:%w", err)
}
return nil
}

View File

@@ -0,0 +1,65 @@
package notifier
import (
"fmt"
"testing"
)
func TestAlert_ExecTemplate(t *testing.T) {
testCases := []struct {
alert *Alert
annotations map[string]string
expTpl map[string]string
}{
{
alert: &Alert{},
annotations: map[string]string{},
expTpl: map[string]string{},
},
{
alert: &Alert{
Value: 1e4,
Labels: map[string]string{
"instance": "localhost",
},
},
annotations: map[string]string{},
expTpl: map[string]string{},
},
{
alert: &Alert{
Value: 1e4,
Labels: map[string]string{
"job": "staging",
"instance": "localhost",
},
},
annotations: map[string]string{
"summary": "Too high connection number for {{$labels.instance}} for job {{$labels.job}}",
"description": "It is {{ $value }} connections for {{$labels.instance}}",
},
expTpl: map[string]string{
"summary": "Too high connection number for localhost for job staging",
"description": "It is 10000 connections for localhost",
},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
tpl, err := tc.alert.ExecTemplate(tc.annotations)
if err != nil {
t.Fatal(err)
}
if len(tpl) != len(tc.expTpl) {
t.Fatalf("expected %d elements; got %d", len(tc.expTpl), len(tpl))
}
for k := range tc.expTpl {
got, exp := tpl[k], tc.expTpl[k]
if got != exp {
t.Fatalf("expected %q=%q; got %q=%q", k, exp, k, got)
}
}
})
}
}

View File

@@ -1,58 +1,51 @@
package provider
package notifier
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"net/http"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
const alertsPath = "/api/v2/alerts"
var pool = sync.Pool{New: func() interface{} {
return &bytes.Buffer{}
}}
// AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager
type AlertManager struct {
alertURL string
argFunc AlertURLGenerator
client *http.Client
}
// AlertURLGenerator returns URL to single alert by given name
type AlertURLGenerator func(group, name string) string
// NewAlertManager is a constructor for AlertManager
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, c *http.Client) *AlertManager {
return &AlertManager{
alertURL: strings.TrimSuffix(alertManagerURL, "/") + alertsPath,
argFunc: fn,
client: c,
}
}
// Send an alert or resolve message
func (am *AlertManager) Send(alerts []Alert) error {
b := pool.Get().(*bytes.Buffer)
b.Reset()
defer pool.Put(b)
b := &bytes.Buffer{}
writeamRequest(b, alerts, am.argFunc)
resp, err := am.client.Post(am.alertURL, "application/json", b)
if err != nil {
return err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
b.Reset()
if _, err := io.Copy(b, resp.Body); err != nil {
logger.Errorf("unable to copy error response body to buffer %s", err)
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response from %q: %s", am.alertURL, err)
}
return fmt.Errorf("invalid response from alertmanager %s", b)
return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, am.alertURL, string(body))
}
return nil
}
// AlertURLGenerator returns URL to single alert by given name
type AlertURLGenerator func(group, id string) string
const alertManagerPath = "/api/v2/alerts"
// NewAlertManager is a constructor for AlertManager
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, c *http.Client) *AlertManager {
return &AlertManager{
alertURL: strings.TrimSuffix(alertManagerURL, "/") + alertManagerPath,
argFunc: fn,
client: c,
}
}

View File

@@ -1,4 +1,5 @@
{% import (
"strconv"
"time"
) %}
{% stripspace %}
@@ -8,14 +9,14 @@
{% for i, alert := range alerts %}
{
"startsAt":{%q= alert.Start.Format(time.RFC3339Nano) %},
"generatorURL": {%q= generatorURL(alert.Group, alert.Name) %},
"generatorURL": {%q= generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)) %},
{% if !alert.End.IsZero() %}
"endsAt":{%q= alert.End.Format(time.RFC3339Nano) %},
{% endif %}
"labels": {
"alertname":{%q= alert.Name %}
{% for _,v := range alert.Labels %}
,{%q= v.Name %}:{%q= v.Value %}
{% for k,v := range alert.Labels %}
,{%q= k %}:{%q= v %}
{% endfor %}
},
"annotations": {

View File

@@ -0,0 +1,131 @@
// Code generated by qtc from "alertmanager_request.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmalert/notifier/alertmanager_request.qtpl:1
package notifier
//line app/vmalert/notifier/alertmanager_request.qtpl:1
import (
"strconv"
"time"
)
//line app/vmalert/notifier/alertmanager_request.qtpl:7
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmalert/notifier/alertmanager_request.qtpl:7
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmalert/notifier/alertmanager_request.qtpl:7
func streamamRequest(qw422016 *qt422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
//line app/vmalert/notifier/alertmanager_request.qtpl:7
qw422016.N().S(`[`)
//line app/vmalert/notifier/alertmanager_request.qtpl:9
for i, alert := range alerts {
//line app/vmalert/notifier/alertmanager_request.qtpl:9
qw422016.N().S(`{"startsAt":`)
//line app/vmalert/notifier/alertmanager_request.qtpl:11
qw422016.N().Q(alert.Start.Format(time.RFC3339Nano))
//line app/vmalert/notifier/alertmanager_request.qtpl:11
qw422016.N().S(`,"generatorURL":`)
//line app/vmalert/notifier/alertmanager_request.qtpl:12
qw422016.N().Q(generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)))
//line app/vmalert/notifier/alertmanager_request.qtpl:12
qw422016.N().S(`,`)
//line app/vmalert/notifier/alertmanager_request.qtpl:13
if !alert.End.IsZero() {
//line app/vmalert/notifier/alertmanager_request.qtpl:13
qw422016.N().S(`"endsAt":`)
//line app/vmalert/notifier/alertmanager_request.qtpl:14
qw422016.N().Q(alert.End.Format(time.RFC3339Nano))
//line app/vmalert/notifier/alertmanager_request.qtpl:14
qw422016.N().S(`,`)
//line app/vmalert/notifier/alertmanager_request.qtpl:15
}
//line app/vmalert/notifier/alertmanager_request.qtpl:15
qw422016.N().S(`"labels": {"alertname":`)
//line app/vmalert/notifier/alertmanager_request.qtpl:17
qw422016.N().Q(alert.Name)
//line app/vmalert/notifier/alertmanager_request.qtpl:18
for k, v := range alert.Labels {
//line app/vmalert/notifier/alertmanager_request.qtpl:18
qw422016.N().S(`,`)
//line app/vmalert/notifier/alertmanager_request.qtpl:19
qw422016.N().Q(k)
//line app/vmalert/notifier/alertmanager_request.qtpl:19
qw422016.N().S(`:`)
//line app/vmalert/notifier/alertmanager_request.qtpl:19
qw422016.N().Q(v)
//line app/vmalert/notifier/alertmanager_request.qtpl:20
}
//line app/vmalert/notifier/alertmanager_request.qtpl:20
qw422016.N().S(`},"annotations": {`)
//line app/vmalert/notifier/alertmanager_request.qtpl:23
c := len(alert.Annotations)
//line app/vmalert/notifier/alertmanager_request.qtpl:24
for k, v := range alert.Annotations {
//line app/vmalert/notifier/alertmanager_request.qtpl:25
c = c - 1
//line app/vmalert/notifier/alertmanager_request.qtpl:26
qw422016.N().Q(k)
//line app/vmalert/notifier/alertmanager_request.qtpl:26
qw422016.N().S(`:`)
//line app/vmalert/notifier/alertmanager_request.qtpl:26
qw422016.N().Q(v)
//line app/vmalert/notifier/alertmanager_request.qtpl:26
if c > 0 {
//line app/vmalert/notifier/alertmanager_request.qtpl:26
qw422016.N().S(`,`)
//line app/vmalert/notifier/alertmanager_request.qtpl:26
}
//line app/vmalert/notifier/alertmanager_request.qtpl:27
}
//line app/vmalert/notifier/alertmanager_request.qtpl:27
qw422016.N().S(`}}`)
//line app/vmalert/notifier/alertmanager_request.qtpl:30
if i != len(alerts)-1 {
//line app/vmalert/notifier/alertmanager_request.qtpl:30
qw422016.N().S(`,`)
//line app/vmalert/notifier/alertmanager_request.qtpl:30
}
//line app/vmalert/notifier/alertmanager_request.qtpl:31
}
//line app/vmalert/notifier/alertmanager_request.qtpl:31
qw422016.N().S(`]`)
//line app/vmalert/notifier/alertmanager_request.qtpl:33
}
//line app/vmalert/notifier/alertmanager_request.qtpl:33
func writeamRequest(qq422016 qtio422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
//line app/vmalert/notifier/alertmanager_request.qtpl:33
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/notifier/alertmanager_request.qtpl:33
streamamRequest(qw422016, alerts, generatorURL)
//line app/vmalert/notifier/alertmanager_request.qtpl:33
qt422016.ReleaseWriter(qw422016)
//line app/vmalert/notifier/alertmanager_request.qtpl:33
}
//line app/vmalert/notifier/alertmanager_request.qtpl:33
func amRequest(alerts []Alert, generatorURL func(string, string) string) string {
//line app/vmalert/notifier/alertmanager_request.qtpl:33
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/notifier/alertmanager_request.qtpl:33
writeamRequest(qb422016, alerts, generatorURL)
//line app/vmalert/notifier/alertmanager_request.qtpl:33
qs422016 := string(qb422016.B)
//line app/vmalert/notifier/alertmanager_request.qtpl:33
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmalert/notifier/alertmanager_request.qtpl:33
return qs422016
//line app/vmalert/notifier/alertmanager_request.qtpl:33
}

View File

@@ -1,4 +1,4 @@
package provider
package notifier
import (
"encoding/json"
@@ -14,7 +14,7 @@ func TestAlertManager_Send(t *testing.T) {
t.Errorf("should not be called")
})
c := -1
mux.HandleFunc(alertsPath, func(w http.ResponseWriter, r *http.Request) {
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
c++
if r.Method != http.MethodPost {
t.Errorf("expected POST method got %s", r.Method)
@@ -40,7 +40,7 @@ func TestAlertManager_Send(t *testing.T) {
if len(a) != 1 {
t.Errorf("expected 1 alert in array got %d", len(a))
}
if a[0].GeneratorURL != "group0alert0" {
if a[0].GeneratorURL != "group0" {
t.Errorf("exptected alert0 as generatorURL got %s", a[0].GeneratorURL)
}
if a[0].Labels["alertname"] != "alert0" {
@@ -66,7 +66,7 @@ func TestAlertManager_Send(t *testing.T) {
t.Error("expected wrong http code error got nil")
}
if err := am.Send([]Alert{{
Group: "group0",
Group: "group",
Name: "alert0",
Start: time.Now().UTC(),
End: time.Now().UTC(),

View File

@@ -0,0 +1,6 @@
package notifier
// Notifier is common interface for alert manager provider
type Notifier interface {
Send(alerts []Alert) error
}

View File

@@ -0,0 +1,171 @@
// Copyright 2013 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package notifier
import (
"fmt"
html_template "html/template"
"math"
"net/url"
"regexp"
"strings"
text_template "text/template"
"time"
)
var tmplFunc text_template.FuncMap
// InitTemplateFunc returns template helper functions
func InitTemplateFunc(externalURL *url.URL) {
tmplFunc = text_template.FuncMap{
"args": func(args ...interface{}) map[string]interface{} {
result := make(map[string]interface{})
for i, a := range args {
result[fmt.Sprintf("arg%d", i)] = a
}
return result
},
"reReplaceAll": func(pattern, repl, text string) string {
re := regexp.MustCompile(pattern)
return re.ReplaceAllString(text, repl)
},
"safeHtml": func(text string) html_template.HTML {
return html_template.HTML(text)
},
"match": regexp.MatchString,
"title": strings.Title,
"toUpper": strings.ToUpper,
"toLower": strings.ToLower,
"humanize": func(v float64) string {
if v == 0 || math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v)
}
if math.Abs(v) >= 1 {
prefix := ""
for _, p := range []string{"k", "M", "G", "T", "P", "E", "Z", "Y"} {
if math.Abs(v) < 1000 {
break
}
prefix = p
v /= 1000
}
return fmt.Sprintf("%.4g%s", v, prefix)
}
prefix := ""
for _, p := range []string{"m", "u", "n", "p", "f", "a", "z", "y"} {
if math.Abs(v) >= 1 {
break
}
prefix = p
v *= 1000
}
return fmt.Sprintf("%.4g%s", v, prefix)
},
"humanize1024": func(v float64) string {
if math.Abs(v) <= 1 || math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v)
}
prefix := ""
for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} {
if math.Abs(v) < 1024 {
break
}
prefix = p
v /= 1024
}
return fmt.Sprintf("%.4g%s", v, prefix)
},
"humanizeDuration": func(v float64) string {
if math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v)
}
if v == 0 {
return fmt.Sprintf("%.4gs", v)
}
if math.Abs(v) >= 1 {
sign := ""
if v < 0 {
sign = "-"
v = -v
}
seconds := int64(v) % 60
minutes := (int64(v) / 60) % 60
hours := (int64(v) / 60 / 60) % 24
days := int64(v) / 60 / 60 / 24
// For days to minutes, we display seconds as an integer.
if days != 0 {
return fmt.Sprintf("%s%dd %dh %dm %ds", sign, days, hours, minutes, seconds)
}
if hours != 0 {
return fmt.Sprintf("%s%dh %dm %ds", sign, hours, minutes, seconds)
}
if minutes != 0 {
return fmt.Sprintf("%s%dm %ds", sign, minutes, seconds)
}
// For seconds, we display 4 significant digits.
return fmt.Sprintf("%s%.4gs", sign, v)
}
prefix := ""
for _, p := range []string{"m", "u", "n", "p", "f", "a", "z", "y"} {
if math.Abs(v) >= 1 {
break
}
prefix = p
v *= 1000
}
return fmt.Sprintf("%.4g%ss", v, prefix)
},
"humanizePercentage": func(v float64) string {
return fmt.Sprintf("%.4g%%", v*100)
},
"humanizeTimestamp": func(v float64) string {
if math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v)
}
t := TimeFromUnixNano(int64(v * 1e9)).Time().UTC()
return fmt.Sprint(t)
},
"pathPrefix": func() string {
return externalURL.Path
},
"externalURL": func() string {
return externalURL.String()
},
}
}
// Time is the number of milliseconds since the epoch
// (1970-01-01 00:00 UTC) excluding leap seconds.
type Time int64
// TimeFromUnixNano returns the Time equivalent to the Unix Time
// t provided in nanoseconds.
func TimeFromUnixNano(t int64) Time {
return Time(t / nanosPerTick)
}
// The number of nanoseconds per minimum tick.
const nanosPerTick = int64(minimumTick / time.Nanosecond)
// MinimumTick is the minimum supported time resolution. This has to be
// at least time.Second in order for the code below to work.
const minimumTick = time.Millisecond
// second is the Time duration equivalent to one second.
const second = int64(time.Second / minimumTick)
// Time returns the time.Time representation of t.
func (t Time) Time() time.Time {
return time.Unix(int64(t)/second, (int64(t)%second)*nanosPerTick)
}

View File

@@ -0,0 +1,21 @@
package notifier
import (
"fmt"
"strings"
)
type errGroup struct {
errs []string
}
func (eg *errGroup) err() error {
if eg == nil || len(eg.errs) == 0 {
return nil
}
return eg
}
func (eg *errGroup) Error() string {
return fmt.Sprintf("errors:%s", strings.Join(eg.errs, "\n"))
}

View File

@@ -1,130 +0,0 @@
// Code generated by qtc from "alert_manager_request.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmalert/provider/alert_manager_request.qtpl:1
package provider
//line app/vmalert/provider/alert_manager_request.qtpl:1
import (
"time"
)
//line app/vmalert/provider/alert_manager_request.qtpl:6
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmalert/provider/alert_manager_request.qtpl:6
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmalert/provider/alert_manager_request.qtpl:6
func streamamRequest(qw422016 *qt422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
//line app/vmalert/provider/alert_manager_request.qtpl:6
qw422016.N().S(`[`)
//line app/vmalert/provider/alert_manager_request.qtpl:8
for i, alert := range alerts {
//line app/vmalert/provider/alert_manager_request.qtpl:8
qw422016.N().S(`{"startsAt":`)
//line app/vmalert/provider/alert_manager_request.qtpl:10
qw422016.N().Q(alert.Start.Format(time.RFC3339Nano))
//line app/vmalert/provider/alert_manager_request.qtpl:10
qw422016.N().S(`,"generatorURL":`)
//line app/vmalert/provider/alert_manager_request.qtpl:11
qw422016.N().Q(generatorURL(alert.Group, alert.Name))
//line app/vmalert/provider/alert_manager_request.qtpl:11
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:12
if !alert.End.IsZero() {
//line app/vmalert/provider/alert_manager_request.qtpl:12
qw422016.N().S(`"endsAt":`)
//line app/vmalert/provider/alert_manager_request.qtpl:13
qw422016.N().Q(alert.End.Format(time.RFC3339Nano))
//line app/vmalert/provider/alert_manager_request.qtpl:13
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:14
}
//line app/vmalert/provider/alert_manager_request.qtpl:14
qw422016.N().S(`"labels": {"alertname":`)
//line app/vmalert/provider/alert_manager_request.qtpl:16
qw422016.N().Q(alert.Name)
//line app/vmalert/provider/alert_manager_request.qtpl:17
for _, v := range alert.Labels {
//line app/vmalert/provider/alert_manager_request.qtpl:17
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:18
qw422016.N().Q(v.Name)
//line app/vmalert/provider/alert_manager_request.qtpl:18
qw422016.N().S(`:`)
//line app/vmalert/provider/alert_manager_request.qtpl:18
qw422016.N().Q(v.Value)
//line app/vmalert/provider/alert_manager_request.qtpl:19
}
//line app/vmalert/provider/alert_manager_request.qtpl:19
qw422016.N().S(`},"annotations": {`)
//line app/vmalert/provider/alert_manager_request.qtpl:22
c := len(alert.Annotations)
//line app/vmalert/provider/alert_manager_request.qtpl:23
for k, v := range alert.Annotations {
//line app/vmalert/provider/alert_manager_request.qtpl:24
c = c - 1
//line app/vmalert/provider/alert_manager_request.qtpl:25
qw422016.N().Q(k)
//line app/vmalert/provider/alert_manager_request.qtpl:25
qw422016.N().S(`:`)
//line app/vmalert/provider/alert_manager_request.qtpl:25
qw422016.N().Q(v)
//line app/vmalert/provider/alert_manager_request.qtpl:25
if c > 0 {
//line app/vmalert/provider/alert_manager_request.qtpl:25
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:25
}
//line app/vmalert/provider/alert_manager_request.qtpl:26
}
//line app/vmalert/provider/alert_manager_request.qtpl:26
qw422016.N().S(`}}`)
//line app/vmalert/provider/alert_manager_request.qtpl:29
if i != len(alerts)-1 {
//line app/vmalert/provider/alert_manager_request.qtpl:29
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:29
}
//line app/vmalert/provider/alert_manager_request.qtpl:30
}
//line app/vmalert/provider/alert_manager_request.qtpl:30
qw422016.N().S(`]`)
//line app/vmalert/provider/alert_manager_request.qtpl:32
}
//line app/vmalert/provider/alert_manager_request.qtpl:32
func writeamRequest(qq422016 qtio422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
//line app/vmalert/provider/alert_manager_request.qtpl:32
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/provider/alert_manager_request.qtpl:32
streamamRequest(qw422016, alerts, generatorURL)
//line app/vmalert/provider/alert_manager_request.qtpl:32
qt422016.ReleaseWriter(qw422016)
//line app/vmalert/provider/alert_manager_request.qtpl:32
}
//line app/vmalert/provider/alert_manager_request.qtpl:32
func amRequest(alerts []Alert, generatorURL func(string, string) string) string {
//line app/vmalert/provider/alert_manager_request.qtpl:32
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/provider/alert_manager_request.qtpl:32
writeamRequest(qb422016, alerts, generatorURL)
//line app/vmalert/provider/alert_manager_request.qtpl:32
qs422016 := string(qb422016.B)
//line app/vmalert/provider/alert_manager_request.qtpl:32
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmalert/provider/alert_manager_request.qtpl:32
return qs422016
//line app/vmalert/provider/alert_manager_request.qtpl:32
}

View File

@@ -1,65 +0,0 @@
package provider
import (
"sort"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
)
// AlertProvider is common interface for alert manager provider
type AlertProvider interface {
Send(alerts []Alert) error
}
// Alert the triggered alert
type Alert struct {
Group string
Name string
Labels []datasource.Label
Annotations map[string]string
Start time.Time
End time.Time
Value float64
}
// AlertsFromMetrics converts metrics to alerts by alert Rule
func AlertsFromMetrics(metrics []datasource.Metric, group string, rule config.Rule) []Alert {
alerts := make([]Alert, 0, len(metrics))
for i, m := range metrics {
a := Alert{
Group: group,
Name: rule.Name,
Labels: metrics[i].Labels,
// todo eval template in annotations
Annotations: rule.Annotations,
Start: time.Unix(m.Timestamp, 0),
}
for k, v := range rule.Labels {
a.Labels = append(a.Labels, datasource.Label{
Name: k,
Value: v,
})
}
a.Labels = removeDuplicated(a.Labels)
alerts = append(alerts, a)
}
return alerts
}
func removeDuplicated(l []datasource.Label) []datasource.Label {
sort.Slice(l, func(i, j int) bool {
return l[i].Name < l[j].Name
})
j := 0
for i := 1; i < len(l); i++ {
if l[j] == l[i] {
continue
}
j++
l[j] = l[i]
}
return l[:j+1]
}

View File

@@ -0,0 +1,187 @@
package remotewrite
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"net/http"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/golang/snappy"
)
// Client is an asynchronous HTTP client for writing
// timeseries via remote write protocol.
type Client struct {
addr string
c *http.Client
input chan prompbmarshal.TimeSeries
baUser, baPass string
flushInterval time.Duration
maxBatchSize int
maxQueueSize int
wg sync.WaitGroup
doneCh chan struct{}
}
type Config struct {
// Addr of remote storage
Addr string
BasicAuthUser string
BasicAuthPass string
// MaxBatchSize defines max number of timeseries
// to be flushed at once
MaxBatchSize int
// MaxQueueSize defines max length of input queue
// populated by Push method
MaxQueueSize int
// FlushInterval defines time interval for flushing batches
FlushInterval time.Duration
// WriteTimeout defines timeout for HTTP write request
// to remote storage
WriteTimeout time.Duration
}
const (
defaultMaxBatchSize = 1e3
defaultMaxQueueSize = 100
defaultFlushInterval = 5 * time.Second
defaultWriteTimeout = 30 * time.Second
)
const writePath = "/api/v1/write"
// NewClient returns asynchronous client for
// writing timeseries via remotewrite protocol.
func NewClient(ctx context.Context, cfg Config) (*Client, error) {
if cfg.Addr == "" {
return nil, fmt.Errorf("config.Addr can't be empty")
}
if cfg.MaxBatchSize == 0 {
cfg.MaxBatchSize = defaultMaxBatchSize
}
if cfg.MaxQueueSize == 0 {
cfg.MaxQueueSize = defaultMaxQueueSize
}
if cfg.FlushInterval == 0 {
cfg.FlushInterval = defaultFlushInterval
}
if cfg.WriteTimeout == 0 {
cfg.WriteTimeout = defaultWriteTimeout
}
c := &Client{
c: &http.Client{
Timeout: cfg.WriteTimeout,
},
addr: strings.TrimSuffix(cfg.Addr, "/") + writePath,
baUser: cfg.BasicAuthUser,
baPass: cfg.BasicAuthPass,
flushInterval: cfg.FlushInterval,
maxBatchSize: cfg.MaxBatchSize,
doneCh: make(chan struct{}),
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
}
c.run(ctx)
return c, nil
}
// Push adds timeseries into queue for writing into remote storage.
// Push returns and error if client is stopped or if queue is full.
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
select {
case <-c.doneCh:
return fmt.Errorf("client is closed")
case c.input <- s:
return nil
default:
return fmt.Errorf("failed to push timeseries - queue is full (%d entries)",
c.maxQueueSize)
}
}
// Close stops the client and waits for all goroutines
// to exit.
func (c *Client) Close() error {
if c.doneCh == nil {
return fmt.Errorf("client is already closed")
}
close(c.input)
close(c.doneCh)
c.wg.Wait()
return nil
}
func (c *Client) run(ctx context.Context) {
ticker := time.NewTicker(c.flushInterval)
wr := prompbmarshal.WriteRequest{}
shutdown := func() {
for ts := range c.input {
wr.Timeseries = append(wr.Timeseries, ts)
}
lastCtx, cancel := context.WithTimeout(context.Background(), time.Second*10)
c.flush(lastCtx, wr)
cancel()
}
c.wg.Add(1)
go func() {
defer c.wg.Done()
defer ticker.Stop()
for {
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
c.flush(ctx, wr)
wr = prompbmarshal.WriteRequest{}
case ts := <-c.input:
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
wr = prompbmarshal.WriteRequest{}
}
}
}
}()
}
func (c *Client) flush(ctx context.Context, wr prompbmarshal.WriteRequest) {
if len(wr.Timeseries) < 1 {
return
}
data, err := wr.Marshal()
if err != nil {
logger.Errorf("failed to marshal WriteRequest: %s", err)
return
}
req, err := http.NewRequest("POST", c.addr, bytes.NewReader(snappy.Encode(nil, data)))
if err != nil {
logger.Errorf("failed to create new HTTP request: %s", err)
return
}
if c.baPass != "" {
req.SetBasicAuth(c.baUser, c.baPass)
}
resp, err := c.c.Do(req.WithContext(ctx))
if err != nil {
logger.Errorf("error getting response from %s:%s", req.URL, err)
return
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusNoContent {
body, _ := ioutil.ReadAll(resp.Body)
logger.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL, body)
return
}
}

269
app/vmalert/rule.go Normal file
View File

@@ -0,0 +1,269 @@
package main
import (
"context"
"errors"
"fmt"
"hash/fnv"
"sort"
"strconv"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// Group grouping array of alert
type Group struct {
Name string
Rules []*Rule
}
// Rule is basic alert entity
type Rule struct {
Name string `yaml:"alert"`
Expr string `yaml:"expr"`
For time.Duration `yaml:"for"`
Labels map[string]string `yaml:"labels"`
Annotations map[string]string `yaml:"annotations"`
group *Group
// guard status fields
mu sync.RWMutex
// stores list of active alerts
alerts map[uint64]*notifier.Alert
// stores last moment of time Exec was called
lastExecTime time.Time
// stores last error that happened in Exec func
// resets on every successful Exec
// may be used as Health state
lastExecError error
}
// Validate validates rule
func (r *Rule) Validate() error {
if r.Name == "" {
return errors.New("rule name can not be empty")
}
if r.Expr == "" {
return fmt.Errorf("expression for rule %q can't be empty", r.Name)
}
if _, err := metricsql.Parse(r.Expr); err != nil {
return fmt.Errorf("invalid expression for rule %q: %w", r.Name, err)
}
return nil
}
// Exec executes Rule expression via the given Querier.
// Based on the Querier results Rule maintains notifier.Alerts
func (r *Rule) Exec(ctx context.Context, q datasource.Querier) error {
qMetrics, err := q.Query(ctx, r.Expr)
r.mu.Lock()
defer r.mu.Unlock()
r.lastExecError = err
r.lastExecTime = time.Now()
if err != nil {
return fmt.Errorf("failed to execute query %q: %s", r.Expr, err)
}
for h, a := range r.alerts {
// cleanup inactive alerts from previous Eval
if a.State == notifier.StateInactive {
delete(r.alerts, h)
}
}
updated := make(map[uint64]struct{})
// update list of active alerts
for _, m := range qMetrics {
h := hash(m)
updated[h] = struct{}{}
if _, ok := r.alerts[h]; ok {
continue
}
a, err := r.newAlert(m)
if err != nil {
r.lastExecError = err
return fmt.Errorf("failed to create alert: %s", err)
}
a.ID = h
a.State = notifier.StatePending
r.alerts[h] = a
}
for h, a := range r.alerts {
// if alert wasn't updated in this iteration
// means it is resolved already
if _, ok := updated[h]; !ok {
a.State = notifier.StateInactive
// set endTime to last execution time
// so it can be sent by notifier on next step
a.End = r.lastExecTime
continue
}
if a.State == notifier.StatePending && time.Since(a.Start) >= r.For {
a.State = notifier.StateFiring
alertsFired.Inc()
}
if a.State == notifier.StateFiring {
a.End = r.lastExecTime.Add(3 * *evaluationInterval)
}
}
return nil
}
// TODO: consider hashing algorithm in VM
func hash(m datasource.Metric) uint64 {
hash := fnv.New64a()
labels := m.Labels
sort.Slice(labels, func(i, j int) bool {
return labels[i].Name < labels[j].Name
})
for _, l := range labels {
hash.Write([]byte(l.Name))
hash.Write([]byte(l.Value))
hash.Write([]byte("\xff"))
}
return hash.Sum64()
}
func (r *Rule) newAlert(m datasource.Metric) (*notifier.Alert, error) {
a := &notifier.Alert{
Group: r.group.Name,
Name: r.Name,
Labels: map[string]string{},
Value: m.Value,
Start: time.Now(),
// TODO: support End time
}
// 1. use data labels
for _, l := range m.Labels {
a.Labels[l.Name] = l.Value
}
// 2. template rule labels with data labels
rLabels, err := a.ExecTemplate(r.Labels)
if err != nil {
return a, err
}
// 3. merge data labels and rule labels
// metric labels may be overridden by
// rule labels
for k, v := range rLabels {
a.Labels[k] = v
}
// 4. template merged labels
a.Labels, err = a.ExecTemplate(a.Labels)
if err != nil {
return a, err
}
a.Annotations, err = a.ExecTemplate(r.Annotations)
return a, err
}
// AlertAPI generates APIAlert object from alert by its id(hash)
func (r *Rule) AlertAPI(id uint64) *APIAlert {
r.mu.RLock()
defer r.mu.RUnlock()
a, ok := r.alerts[id]
if !ok {
return nil
}
return r.newAlertAPI(*a)
}
// AlertsAPI generates list of APIAlert objects from existing alerts
func (r *Rule) AlertsAPI() []*APIAlert {
var alerts []*APIAlert
r.mu.RLock()
for _, a := range r.alerts {
alerts = append(alerts, r.newAlertAPI(*a))
}
r.mu.RUnlock()
return alerts
}
func (r *Rule) newAlertAPI(a notifier.Alert) *APIAlert {
return &APIAlert{
ID: a.ID,
Name: a.Name,
Group: a.Group,
Expression: r.Expr,
Labels: a.Labels,
Annotations: a.Annotations,
State: a.State.String(),
ActiveAt: a.Start,
Value: strconv.FormatFloat(a.Value, 'e', -1, 64),
}
}
const (
// AlertMetricName is the metric name for synthetic alert timeseries.
alertMetricName = "ALERTS"
// AlertForStateMetricName is the metric name for 'for' state of alert.
alertForStateMetricName = "ALERTS_FOR_STATE"
// AlertNameLabel is the label name indicating the name of an alert.
alertNameLabel = "alertname"
// AlertStateLabel is the label name indicating the state of an alert.
alertStateLabel = "alertstate"
)
func (r *Rule) AlertToTimeSeries(a *notifier.Alert, timestamp time.Time) []prompbmarshal.TimeSeries {
var tss []prompbmarshal.TimeSeries
tss = append(tss, alertToTimeSeries(r.Name, a, timestamp))
if r.For > 0 {
tss = append(tss, alertForToTimeSeries(r.Name, a, timestamp))
}
return tss
}
func alertToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
labels := make(map[string]string)
for k, v := range a.Labels {
labels[k] = v
}
labels["__name__"] = alertMetricName
labels[alertNameLabel] = name
labels[alertStateLabel] = a.State.String()
return newTimeSeries(1, labels, timestamp)
}
func alertForToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
labels := make(map[string]string)
for k, v := range a.Labels {
labels[k] = v
}
labels["__name__"] = alertForStateMetricName
labels[alertNameLabel] = name
return newTimeSeries(float64(a.Start.Unix()), labels, timestamp)
}
func newTimeSeries(value float64, labels map[string]string, timestamp time.Time) prompbmarshal.TimeSeries {
ts := prompbmarshal.TimeSeries{}
ts.Samples = append(ts.Samples, prompbmarshal.Sample{
Value: value,
Timestamp: timestamp.UnixNano() / 1e6,
})
keys := make([]string, 0, len(labels))
for k := range labels {
keys = append(keys, k)
}
sort.Strings(keys)
for _, key := range keys {
ts.Labels = append(ts.Labels, prompbmarshal.Label{
Name: key,
Value: labels[key],
})
}
return ts
}

399
app/vmalert/rule_test.go Normal file
View File

@@ -0,0 +1,399 @@
package main
import (
"context"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func TestRule_Validate(t *testing.T) {
if err := (&Rule{}).Validate(); err == nil {
t.Errorf("exptected empty name error")
}
if err := (&Rule{Name: "alert"}).Validate(); err == nil {
t.Errorf("exptected empty expr error")
}
if err := (&Rule{Name: "alert", Expr: "test{"}).Validate(); err == nil {
t.Errorf("exptected invalid expr error")
}
if err := (&Rule{Name: "alert", Expr: "test>0"}).Validate(); err != nil {
t.Errorf("exptected valid rule got %s", err)
}
}
func TestRule_AlertToTimeSeries(t *testing.T) {
timestamp := time.Now()
testCases := []struct {
rule *Rule
alert *notifier.Alert
expTS []prompbmarshal.TimeSeries
}{
{
newTestRule("instant", 0),
&notifier.Alert{State: notifier.StateFiring},
[]prompbmarshal.TimeSeries{
newTimeSeries(1, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "instant",
}, timestamp),
},
},
{
newTestRule("instant extra labels", 0),
&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
"job": "foo",
"instance": "bar",
}},
[]prompbmarshal.TimeSeries{
newTimeSeries(1, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "instant extra labels",
"job": "foo",
"instance": "bar",
}, timestamp),
},
},
{
newTestRule("instant labels override", 0),
&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
alertStateLabel: "foo",
"__name__": "bar",
}},
[]prompbmarshal.TimeSeries{
newTimeSeries(1, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "instant labels override",
}, timestamp),
},
},
{
newTestRule("for", time.Second),
&notifier.Alert{State: notifier.StateFiring, Start: timestamp.Add(time.Second)},
[]prompbmarshal.TimeSeries{
newTimeSeries(1, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
alertNameLabel: "for",
}, timestamp),
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
"__name__": alertForStateMetricName,
alertNameLabel: "for",
}, timestamp),
},
},
{
newTestRule("for pending", 10*time.Second),
&notifier.Alert{State: notifier.StatePending, Start: timestamp.Add(time.Second)},
[]prompbmarshal.TimeSeries{
newTimeSeries(1, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StatePending.String(),
alertNameLabel: "for pending",
}, timestamp),
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
"__name__": alertForStateMetricName,
alertNameLabel: "for pending",
}, timestamp),
},
},
}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
tss := tc.rule.AlertToTimeSeries(tc.alert, timestamp)
if len(tc.expTS) != len(tss) {
t.Fatalf("expected number of timeseries %d; got %d", len(tc.expTS), len(tss))
}
for i := range tc.expTS {
expTS, gotTS := tc.expTS[i], tss[i]
if len(expTS.Samples) != len(gotTS.Samples) {
t.Fatalf("expected number of samples %d; got %d", len(expTS.Samples), len(gotTS.Samples))
}
for i, exp := range expTS.Samples {
got := gotTS.Samples[i]
if got.Value != exp.Value {
t.Errorf("expected value %.2f; got %.2f", exp.Value, got.Value)
}
if got.Timestamp != exp.Timestamp {
t.Errorf("expected timestamp %d; got %d", exp.Timestamp, got.Timestamp)
}
}
if len(expTS.Labels) != len(gotTS.Labels) {
t.Fatalf("expected number of labels %d; got %d", len(expTS.Labels), len(gotTS.Labels))
}
for i, exp := range expTS.Labels {
got := gotTS.Labels[i]
if got.Name != exp.Name {
t.Errorf("expected label name %q; got %q", exp.Name, got.Name)
}
if got.Value != exp.Value {
t.Errorf("expected label value %q; got %q", exp.Value, got.Value)
}
}
}
})
}
}
func newTestRule(name string, waitFor time.Duration) *Rule {
return &Rule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor}
}
func TestRule_Exec(t *testing.T) {
testCases := []struct {
rule *Rule
steps [][]datasource.Metric
expAlerts map[uint64]*notifier.Alert
}{
{
newTestRule("empty", 0),
[][]datasource.Metric{},
map[uint64]*notifier.Alert{},
},
{
newTestRule("single-firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
},
},
{
newTestRule("single-firing=>inactive", 0),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
},
},
{
newTestRule("single-firing=>inactive=>firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{},
{metricWithLabels(t, "__name__", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
},
},
{
newTestRule("single-firing=>inactive=>firing=>inactive", 0),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{},
{metricWithLabels(t, "__name__", "foo")},
{},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
},
},
{
newTestRule("single-firing=>inactive=>firing=>inactive=>empty", 0),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{},
{metricWithLabels(t, "__name__", "foo")},
{},
{},
},
map[uint64]*notifier.Alert{},
},
{
newTestRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{},
{metricWithLabels(t, "__name__", "foo")},
{},
{},
{metricWithLabels(t, "__name__", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
},
},
{
newTestRule("multiple-firing", 0),
[][]datasource.Metric{
{
metricWithLabels(t, "__name__", "foo"),
metricWithLabels(t, "__name__", "foo1"),
metricWithLabels(t, "__name__", "foo2"),
},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
hash(metricWithLabels(t, "__name__", "foo1")): {State: notifier.StateFiring},
hash(metricWithLabels(t, "__name__", "foo2")): {State: notifier.StateFiring},
},
},
{
newTestRule("multiple-steps-firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{metricWithLabels(t, "__name__", "foo1")},
{metricWithLabels(t, "__name__", "foo2")},
},
// 1: fire first alert
// 2: fire second alert, set first inactive
// 3: fire third alert, set second inactive, delete first one
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo1")): {State: notifier.StateInactive},
hash(metricWithLabels(t, "__name__", "foo2")): {State: notifier.StateFiring},
},
},
{
newTestRule("duplicate", 0),
[][]datasource.Metric{
{
// metrics with the same labelset should result in one alert
metricWithLabels(t, "__name__", "foo", "type", "bar"),
metricWithLabels(t, "type", "bar", "__name__", "foo"),
},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo", "type", "bar")): {State: notifier.StateFiring},
},
},
{
newTestRule("for-pending", time.Minute),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StatePending},
},
},
{
newTestRule("for-fired", time.Millisecond),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{metricWithLabels(t, "__name__", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
},
},
{
newTestRule("for-pending=>inactive", time.Millisecond),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{metricWithLabels(t, "__name__", "foo")},
// empty step to reset pending alerts
{},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
},
},
{
newTestRule("for-pending=>firing=>inactive", time.Millisecond),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{metricWithLabels(t, "__name__", "foo")},
// empty step to reset pending alerts
{},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
},
},
{
newTestRule("for-pending=>firing=>inactive=>pending", time.Millisecond),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{metricWithLabels(t, "__name__", "foo")},
// empty step to reset pending alerts
{},
{metricWithLabels(t, "__name__", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StatePending},
},
},
{
newTestRule("for-pending=>firing=>inactive=>pending=>firing", time.Millisecond),
[][]datasource.Metric{
{metricWithLabels(t, "__name__", "foo")},
{metricWithLabels(t, "__name__", "foo")},
// empty step to reset pending alerts
{},
{metricWithLabels(t, "__name__", "foo")},
{metricWithLabels(t, "__name__", "foo")},
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
},
},
}
fakeGroup := &Group{Name: "TestRule_Exec"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.group = fakeGroup
for _, step := range tc.steps {
fq.reset()
fq.add(t, step...)
if err := tc.rule.Exec(context.TODO(), fq); err != nil {
t.Fatalf("unexpected err: %s", err)
}
// artificial delay between applying steps
time.Sleep(time.Millisecond)
}
if len(tc.rule.alerts) != len(tc.expAlerts) {
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
}
for key, exp := range tc.expAlerts {
got, ok := tc.rule.alerts[key]
if !ok {
t.Fatalf("expected to have key %d", key)
}
if got.State != exp.State {
t.Fatalf("expected state %d; got %d", exp.State, got.State)
}
}
})
}
}
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
t.Helper()
if len(labels) == 0 || len(labels)%2 != 0 {
t.Fatalf("expected to get even number of labels")
}
m := datasource.Metric{}
for i := 0; i < len(labels); i += 2 {
m.Labels = append(m.Labels, datasource.Label{
Name: labels[i],
Value: labels[i+1],
})
}
return m
}
type fakeQuerier struct {
metrics []datasource.Metric
}
func (fq *fakeQuerier) reset() {
fq.metrics = fq.metrics[:0]
}
func (fq *fakeQuerier) add(t *testing.T, metrics ...datasource.Metric) {
fq.metrics = append(fq.metrics, metrics...)
}
func (fq fakeQuerier) Query(ctx context.Context, query string) ([]datasource.Metric, error) {
return fq.metrics, nil
}

View File

@@ -1 +0,0 @@
package storage

View File

@@ -0,0 +1,19 @@
groups:
- name: group
rules:
- alert: InvalidAnnotations
for: 5m
expr: vm_rows > 0
labels:
label: bar
annotations:
summary: "{{ $value }"
description: "{{$labels}}"
- alert: UnkownAnnotationsFunction
for: 5m
expr: vm_rows > 0
labels:
label: bar
annotations:
summary: "{{ value|query }}"
description: "{{$labels}}"

View File

@@ -0,0 +1,13 @@
groups:
- name: duplicatedGroupDiffFiles
rules:
- alert: VMRows
for: 5m
expr: vm_rows > 0
labels:
label: bar
annotations:
summary: "{{ $value|humanize }}"
description: "{{$labels}}"

View File

@@ -0,0 +1,22 @@
groups:
- name: sameGroup
rules:
- alert: alert
for: 5m
expr: vm_rows > 0
labels:
label: bar
annotations:
summary: "{{ $value }}"
description: "{{$labels}}"
- name: sameGroup
rules:
- alert: alert
for: 5m
expr: vm_rows > 0
labels:
label: bar
annotations:
summary: "{{ $value }}"
description: "{{$labels}}"

View File

@@ -0,0 +1,13 @@
groups:
- name: duplicatedGroupDiffFiles
rules:
- alert: VMRows
for: 5m
expr: vm_rows > 0
labels:
label: bar
annotations:
summary: "{{ $value }}"
description: "{{$labels}}"

View File

@@ -0,0 +1,11 @@
groups:
- name: group
rules:
- alert: UnkownLabelFunction
for: 5m
expr: vm_rows > 0
labels:
label: bar
summary: "{{ value|query }}"
annotations:
description: "{{$labels}}"

28
app/vmalert/testdata/rules0-bad.rules vendored Normal file
View File

@@ -0,0 +1,28 @@
groups:
- name: group
rules:
- alert: InvalidExpr
for: 5m
expr: vm_rows{ > 0
labels:
label: bar
annotations:
summary: "{{ $value }}"
description: "{{$labels}}"
- alert: EmptyExpr
for: 5m
expr: ""
labels:
label: bar
annotations:
summary: "{{ $value }}"
description: "{{$labels}}"
- alert: ""
for: 5m
expr: vm_rows > 0
labels:
label: foo
annotations:
summary: "{{ $value }}"
description: "{{$labels}}"

23
app/vmalert/testdata/rules0-good.rules vendored Normal file
View File

@@ -0,0 +1,23 @@
groups:
- name: groupGorSingleAlert
rules:
- alert: VMRows
for: 10s
expr: vm_rows > 0
labels:
label: bar
template: "{{ $value|humanize }}"
annotations:
summary: "{{ $value|humanize }}"
description: "{{$labels}}"
- name: TestGroup
rules:
- alert: Conns
expr: sum(vm_tcplistener_conns) by(instance) > 1
annotations:
summary: "Too high connection number for {{$labels.instance}}"
description: "It is {{ $value }} connections for {{$labels.instance}}"
- alert: ExampleAlertAlwaysFiring
expr: sum by(job)
(up == 1)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

134
app/vmalert/web.go Normal file
View File

@@ -0,0 +1,134 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
)
// APIAlert has info for an alert.
type APIAlert struct {
ID uint64 `json:"id"`
Name string `json:"name"`
Group string `json:"group"`
Expression string `json:"expression"`
State string `json:"state"`
Value string `json:"value"`
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
}
type requestHandler struct {
groups []Group
}
var pathList = [][]string{
{"/api/v1/alerts", "list all active alerts"},
{"/api/v1/groupName/alertID/status", "get alert status by ID"},
// /metrics is served by httpserver by default
{"/metrics", "list of application metrics"},
}
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
resph := responseHandler{w}
switch r.URL.Path {
case "/":
for _, path := range pathList {
p, doc := path[0], path[1]
fmt.Fprintf(w, "<a href='%s'>%q</a> - %s<br/>", p, p, doc)
}
return true
case "/api/v1/alerts":
resph.handle(rh.list())
return true
default:
// /api/v1/<groupName>/<alertID>/status
if strings.HasSuffix(r.URL.Path, "/status") {
resph.handle(rh.alert(r.URL.Path))
return true
}
return false
}
}
type listAlertsResponse struct {
Data struct {
Alerts []*APIAlert `json:"alerts"`
} `json:"data"`
Status string `json:"status"`
}
func (rh *requestHandler) list() ([]byte, error) {
lr := listAlertsResponse{Status: "success"}
for _, g := range rh.groups {
for _, r := range g.Rules {
lr.Data.Alerts = append(lr.Data.Alerts, r.AlertsAPI()...)
}
}
// sort list of alerts for deterministic output
sort.Slice(lr.Data.Alerts, func(i, j int) bool {
return lr.Data.Alerts[i].Name < lr.Data.Alerts[j].Name
})
b, err := json.Marshal(lr)
if err != nil {
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`error encoding list of active alerts: %s`, err),
StatusCode: http.StatusInternalServerError,
}
}
return b, nil
}
func (rh *requestHandler) alert(path string) ([]byte, error) {
parts := strings.SplitN(strings.TrimPrefix(path, "/api/v1/"), "/", 3)
if len(parts) != 3 {
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`path %q cointains /status suffix but doesn't match pattern "/group/alert/status"`, path),
StatusCode: http.StatusBadRequest,
}
}
group := strings.TrimRight(parts[0], "/")
idStr := strings.TrimRight(parts[1], "/")
id, err := strconv.ParseUint(idStr, 10, 0)
if err != nil {
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`cannot parse int from %q`, idStr),
StatusCode: http.StatusBadRequest,
}
}
for _, g := range rh.groups {
if g.Name != group {
continue
}
for i := range g.Rules {
if apiAlert := g.Rules[i].AlertAPI(id); apiAlert != nil {
return json.Marshal(apiAlert)
}
}
}
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`cannot find alert %s in %q`, idStr, group),
StatusCode: http.StatusNotFound,
}
}
// responseHandler wrapper on http.ResponseWriter with sugar
type responseHandler struct{ http.ResponseWriter }
func (w responseHandler) handle(b []byte, err error) {
if err != nil {
httpserver.Errorf(w, "%s", err)
return
}
w.Header().Set("Content-Type", "application/json")
w.Write(b)
}

72
app/vmalert/web_test.go Normal file
View File

@@ -0,0 +1,72 @@
package main
import (
"encoding/json"
"net/http"
"net/http/httptest"
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
func TestHandler(t *testing.T) {
rule := &Rule{
Name: "alert",
alerts: map[uint64]*notifier.Alert{
0: {},
},
}
rh := &requestHandler{
groups: []Group{{
Name: "group",
Rules: []*Rule{rule},
}},
}
getResp := func(url string, to interface{}, code int) {
t.Helper()
resp, err := http.Get(url)
if err != nil {
t.Errorf("unexpected err %s", err)
}
if code != resp.StatusCode {
t.Errorf("unexpected status code %d want %d", resp.StatusCode, code)
}
defer func() {
if err := resp.Body.Close(); err != nil {
t.Errorf("err closing body %s", err)
}
}()
if to != nil {
if err = json.NewDecoder(resp.Body).Decode(to); err != nil {
t.Errorf("unexpected err %s", err)
}
}
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rh.handler(w, r) }))
defer ts.Close()
t.Run("/api/v1/alerts", func(t *testing.T) {
lr := listAlertsResponse{}
getResp(ts.URL+"/api/v1/alerts", &lr, 200)
if length := len(lr.Data.Alerts); length != 1 {
t.Errorf("expected 1 alert got %d", length)
}
})
t.Run("/api/v1/group/0/status", func(t *testing.T) {
alert := &APIAlert{}
getResp(ts.URL+"/api/v1/group/0/status", alert, 200)
expAlert := rule.newAlertAPI(*rule.alerts[0])
if !reflect.DeepEqual(alert, expAlert) {
t.Errorf("expected %v is equal to %v", alert, expAlert)
}
})
t.Run("/api/v1/group/1/status", func(t *testing.T) {
getResp(ts.URL+"/api/v1/group/1/status", nil, 404)
})
t.Run("/api/v1/unknown-group/0/status", func(t *testing.T) {
getResp(ts.URL+"/api/v1/unknown-group/0/status", nil, 404)
})
t.Run("/", func(t *testing.T) {
getResp(ts.URL, nil, 200)
})
}

View File

@@ -140,14 +140,28 @@ Run `vmbackup -help` in order to see all the available options:
-dst string
Where to put the backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
-envflag.enable
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
-envflag.prefix string
Prefix for environment variables if -envflag.enable is set
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
Output for the logs. Supported values: stderr, stdout (default "stderr")
-maxBytesPerSecond int
The maximum upload speed. There is no limit if it is set to 0
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
-origin string
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
-snapshot.createURL string
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snaphsot/create
-snapshot.deleteURL string
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted.Example: http://victoriametrics:8428/snaphsot/delete
-snapshotName string
Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots
-storageDataPath string
@@ -164,7 +178,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make vmbackup` from the root folder of the repository.
It builds `vmbackup` binary and puts it into the `bin` folder.
@@ -179,3 +193,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-vmbackup
```

View File

@@ -1,7 +1,6 @@
ARG certs_image
FROM $certs_image AS certs
FROM scratch
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
ARG base_image
FROM $base_image
ENTRYPOINT ["/vmbackup-prod"]
ARG src_binary
COPY $src_binary ./vmbackup-prod
ENTRYPOINT ["/vmbackup-prod"]

View File

@@ -4,7 +4,9 @@ import (
"flag"
"fmt"
"os"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmbackup/snapshot"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/actions"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
@@ -14,9 +16,13 @@ import (
)
var (
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage")
snapshotName = flag.String("snapshotName", "", "Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots")
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage")
snapshotName = flag.String("snapshotName", "", "Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots")
snapshotCreateURL = flag.String("snapshot.createURL", "", "VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup."+
"Example: http://victoriametrics:8428/snaphsot/create")
snapshotDeleteURL = flag.String("snapshot.deleteURL", "", "VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted."+
"Example: http://victoriametrics:8428/snaphsot/delete")
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir\n"+
"-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded")
origin = flag.String("origin", "", "Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups")
@@ -29,6 +35,34 @@ func main() {
envflag.Parse()
buildinfo.Init()
if len(*snapshotCreateURL) > 0 {
logger.Infof("%s", "Snapshots enabled")
logger.Infof("Snapshot create url %s", *snapshotCreateURL)
if len(*snapshotDeleteURL) <= 0 {
err := flag.Set("snapshot.deleteURL", strings.Replace(*snapshotCreateURL, "/create", "/delete", 1))
if err != nil {
logger.Fatalf("Failed to set snapshot.deleteURL flag: %v", err)
}
}
logger.Infof("Snapshot delete url %s", *snapshotDeleteURL)
name, err := snapshot.Create(*snapshotCreateURL)
if err != nil {
logger.Fatalf("%s", err)
}
err = flag.Set("snapshotName", name)
if err != nil {
logger.Fatalf("Failed to set snapshotName flag: %v", err)
}
defer func() {
err := snapshot.Delete(*snapshotDeleteURL, name)
if err != nil {
logger.Fatalf("%s", err)
}
}()
}
srcFS, err := newSrcFS()
if err != nil {
logger.Fatalf("%s", err)
@@ -67,7 +101,7 @@ See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/a
func newSrcFS() (*fslocal.FS, error) {
if len(*snapshotName) == 0 {
return nil, fmt.Errorf("`-snapshotName` cannot be empty")
return nil, fmt.Errorf("`-snapshotName` or `-snapshot.createURL` must be provided")
}
snapshotPath := *storageDataPath + "/snapshots/" + *snapshotName

View File

@@ -0,0 +1,91 @@
package snapshot
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
type snapshot struct {
Status string `json:"status"`
Snapshot string `json:"snapshot"`
Msg string `json:"msg"`
}
// Create creates a snapshot and the provided api endpoint and returns
// the snapshot name
func Create(createSnapshotURL string) (string, error) {
logger.Infof("%s", "Creating snapshot")
u, err := url.Parse(createSnapshotURL)
if err != nil {
return "", err
}
resp, err := http.Get(u.String())
if err != nil {
return "", err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
snap := snapshot{}
err = json.Unmarshal(body, &snap)
if err != nil {
return "", err
}
if snap.Status == "ok" {
logger.Infof("Snapshot %s created", snap.Snapshot)
return snap.Snapshot, nil
} else if snap.Status == "error" {
return "", errors.New(snap.Msg)
} else {
return "", fmt.Errorf("Unkown status: %v", snap.Status)
}
}
// Delete deletes a snapshot and the provided api endpoint returns any failure
func Delete(deleteSnapshotURL string, snapshotName string) error {
logger.Infof("Deleting snapshot %s", snapshotName)
formData := url.Values{
"snapshot": {snapshotName},
}
u, err := url.Parse(deleteSnapshotURL)
if err != nil {
return err
}
resp, err := http.PostForm(u.String(), formData)
if err != nil {
return err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
snap := snapshot{}
err = json.Unmarshal(body, &snap)
if err != nil {
return err
}
if snap.Status == "ok" {
logger.Infof("Snapshot %s deleted", snapshotName)
return nil
} else if snap.Status == "error" {
return errors.New(snap.Msg)
} else {
return fmt.Errorf("Unkown status: %v", snap.Status)
}
}

View File

@@ -0,0 +1,106 @@
package snapshot
import (
"io"
"net/http"
"net/http/httptest"
"testing"
)
func TestCreateSnapshot(t *testing.T) {
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/snapshot/create" {
_, err := io.WriteString(w, `{"status":"ok","snapshot":"mysnapshot"}`)
if err != nil {
t.Fatalf("Failed to write response output: %v", err)
}
} else {
t.Fatalf("Invalid path, got %v", r.URL.Path)
}
})
server := httptest.NewServer(http.HandlerFunc(handler))
defer server.Close()
snapshotName, err := Create(server.URL + "/snapshot/create")
if err != nil {
t.Fatalf("Failed taking snapshot: %v", err)
}
if snapshotName != "mysnapshot" {
t.Fatalf("Snapshot name is not correct, got %v", snapshotName)
}
}
func TestCreateSnapshotFailed(t *testing.T) {
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/snapshot/create" {
_, err := io.WriteString(w, `{"status":"error","msg":"I am unwell"}`)
if err != nil {
t.Fatalf("Failed to write response output: %v", err)
}
} else {
t.Fatalf("Invalid path, got %v", r.URL.Path)
}
})
server := httptest.NewServer(http.HandlerFunc(handler))
defer server.Close()
snapshotName, err := Create(server.URL + "/snapshot/create")
if err == nil {
t.Fatalf("Snapshot did not fail, got snapshot: %v", snapshotName)
}
}
func TestDeleteSnapshot(t *testing.T) {
snapshotName := "mysnapshot"
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/snapshot/delete" {
_, err := io.WriteString(w, `{"status":"ok"}`)
if err != nil {
t.Fatalf("Failed to write response output: %v", err)
}
} else {
t.Fatalf("Invalid path, got %v", r.URL.Path)
}
if r.FormValue("snapshot") != snapshotName {
t.Fatalf("Invalid snapshot name, got %v", snapshotName)
}
})
server := httptest.NewServer(http.HandlerFunc(handler))
defer server.Close()
err := Delete(server.URL+"/snapshot/delete", snapshotName)
if err != nil {
t.Fatalf("Failed to delete snapshot: %v", err)
}
}
func TestDeleteSnapshotFailed(t *testing.T) {
snapshotName := "mysnapshot"
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/snapshot/delete" {
_, err := io.WriteString(w, `{"status":"error", "msg":"failed to delete"}`)
if err != nil {
t.Fatalf("Failed to write response output: %v", err)
}
} else {
t.Fatalf("Invalid path, got %v", r.URL.Path)
}
if r.FormValue("snapshot") != snapshotName {
t.Fatalf("Invalid snapshot name, got %v", snapshotName)
}
})
server := httptest.NewServer(http.HandlerFunc(handler))
defer server.Close()
err := Delete(server.URL+"/snapshot/delete", snapshotName)
if err == nil {
t.Fatalf("Snapshot should have failed, got: %v", err)
}
}

View File

@@ -47,12 +47,24 @@ Run `vmrestore -help` in order to see all the available options:
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-envflag.enable
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
-envflag.prefix string
Prefix for environment variables if -envflag.enable is set
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
Output for the logs. Supported values: stderr, stdout (default "stderr")
-maxBytesPerSecond int
The maximum download speed. There is no limit if it is set to 0
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
-skipBackupCompleteCheck
Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file
-src string
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
-storageDataPath string
@@ -69,7 +81,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make vmrestore` from the root folder of the repository.
It builds `vmrestore` binary and puts it into the `bin` folder.
@@ -84,3 +96,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-vmrestore
```

View File

@@ -1,7 +1,6 @@
ARG certs_image
FROM $certs_image AS certs
FROM scratch
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
ARG base_image
FROM $base_image
ENTRYPOINT ["/vmrestore-prod"]
ARG src_binary
COPY $src_binary ./vmrestore-prod
ENTRYPOINT ["/vmrestore-prod"]

View File

@@ -8,11 +8,9 @@ import (
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
@@ -24,7 +22,7 @@ var (
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
"It shouldn't be high, since a single request can saturate all the CPU cores. See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
resetCacheAuthKey = flag.String("search.resetCacheAuthKey", "", "Optional authKey for resetting rollup cache via /internal/resetCache call")
resetCacheAuthKey = flag.String("search.resetCacheAuthKey", "", "Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call")
)
func getDefaultMaxConcurrentRequests() int {
@@ -43,9 +41,6 @@ func getDefaultMaxConcurrentRequests() int {
// Init initializes vmselect
func Init() {
tmpDirPath := *vmstorage.DataPath + "/tmp"
fs.RemoveDirContents(tmpDirPath)
netstorage.InitTmpBlocksDir(tmpDirPath)
promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
@@ -179,6 +174,14 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
return true
case "/api/v1/status/tsdb":
tsdbStatusRequests.Inc()
if err := prometheus.TSDBStatusHandler(startTime, w, r); err != nil {
tsdbStatusErrors.Inc()
sendPrometheusError(w, r, err)
return true
}
return true
case "/api/v1/export":
exportRequests.Inc()
if err := prometheus.ExportHandler(startTime, w, r); err != nil {
@@ -191,7 +194,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
federateRequests.Inc()
if err := prometheus.FederateHandler(startTime, w, r); err != nil {
federateErrors.Inc()
httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
return true
@@ -233,7 +236,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
logger.Errorf("error in %q: %s", r.RequestURI, err)
logger.Warnf("error in %q: %s", r.RequestURI, err)
w.Header().Set("Content-Type", "application/json")
statusCode := http.StatusUnprocessableEntity
@@ -266,6 +269,9 @@ var (
labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels/count"}`)
labelsCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels/count"}`)
tsdbStatusRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/tsdb"}`)
tsdbStatusErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/tsdb"}`)
deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)

View File

@@ -53,9 +53,8 @@ type Results struct {
fetchData bool
deadline Deadline
tbf *tmpBlocksFile
packedTimeseries []packedTimeseries
sr *storage.Search
}
// Len returns the number of results in rss.
@@ -65,8 +64,12 @@ func (rss *Results) Len() int {
// Cancel cancels rss work.
func (rss *Results) Cancel() {
putTmpBlocksFile(rss.tbf)
rss.tbf = nil
rss.mustClose()
}
func (rss *Results) mustClose() {
putStorageSearch(rss.sr)
rss.sr = nil
}
// RunParallel runs in parallel f for all the results from rss.
@@ -76,10 +79,7 @@ func (rss *Results) Cancel() {
//
// rss becomes unusable after the call to RunParallel.
func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
defer func() {
putTmpBlocksFile(rss.tbf)
rss.tbf = nil
}()
defer rss.mustClose()
workersCount := 1 + len(rss.packedTimeseries)/32
if workersCount > gomaxprocs {
@@ -106,7 +106,7 @@ func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
break
}
if err = pts.Unpack(rss.tbf, rs, rss.tr, rss.fetchData, maxWorkersCount); err != nil {
if err = pts.Unpack(rs, rss.tr, rss.fetchData, maxWorkersCount); err != nil {
break
}
if len(rs.Timestamps) == 0 && rss.fetchData {
@@ -156,18 +156,18 @@ var gomaxprocs = runtime.GOMAXPROCS(-1)
type packedTimeseries struct {
metricName string
addrs []tmpBlockAddr
brs []storage.BlockRef
}
// Unpack unpacks pts to dst.
func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.TimeRange, fetchData bool, maxWorkersCount int) error {
func (pts *packedTimeseries) Unpack(dst *Result, tr storage.TimeRange, fetchData bool, maxWorkersCount int) error {
dst.reset()
if err := dst.MetricName.Unmarshal(bytesutil.ToUnsafeBytes(pts.metricName)); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %s", pts.metricName, err)
}
workersCount := 1 + len(pts.addrs)/32
workersCount := 1 + len(pts.brs)/32
if workersCount > maxWorkersCount {
workersCount = maxWorkersCount
}
@@ -175,19 +175,19 @@ func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.
logger.Panicf("BUG: workersCount cannot be zero")
}
sbs := make([]*sortBlock, 0, len(pts.addrs))
sbs := make([]*sortBlock, 0, len(pts.brs))
var sbsLock sync.Mutex
workCh := make(chan tmpBlockAddr, workersCount)
workCh := make(chan storage.BlockRef, workersCount)
doneCh := make(chan error)
// Start workers
for i := 0; i < workersCount; i++ {
go func() {
var err error
for addr := range workCh {
for br := range workCh {
sb := getSortBlock()
if err = sb.unpackFrom(tbf, addr, tr, fetchData); err != nil {
if err = sb.unpackFrom(br, tr, fetchData); err != nil {
break
}
@@ -204,10 +204,10 @@ func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.
}
// Feed workers with work
for _, addr := range pts.addrs {
workCh <- addr
for _, br := range pts.brs {
workCh <- br
}
pts.addrs = pts.addrs[:0]
pts.brs = pts.brs[:0]
close(workCh)
// Wait until workers finish
@@ -314,8 +314,8 @@ func (sb *sortBlock) reset() {
sb.NextIdx = 0
}
func (sb *sortBlock) unpackFrom(tbf *tmpBlocksFile, addr tmpBlockAddr, tr storage.TimeRange, fetchData bool) error {
tbf.MustReadBlockAt(&sb.b, addr)
func (sb *sortBlock) unpackFrom(br storage.BlockRef, tr storage.TimeRange, fetchData bool) error {
br.MustReadBlock(&sb.b, fetchData)
if fetchData {
if err := sb.b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal block: %s", err)
@@ -449,6 +449,15 @@ func GetLabelEntries(deadline Deadline) ([]storage.TagEntry, error) {
return labelEntries, nil
}
// GetTSDBStatusForDate returns tsdb status according to https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
func GetTSDBStatusForDate(deadline Deadline, date uint64, topN int) (*storage.TSDBStatus, error) {
status, err := vmstorage.GetTSDBStatusForDate(date, topN)
if err != nil {
return nil, fmt.Errorf("error during tsdb status request: %s", err)
}
return status, nil
}
// GetSeriesCount returns the number of unique series.
func GetSeriesCount(deadline Deadline) (uint64, error) {
n, err := vmstorage.GetSeriesCount()
@@ -474,6 +483,8 @@ func putStorageSearch(sr *storage.Search) {
var ssPool sync.Pool
// ProcessSearchQuery performs sq on storage nodes until the given deadline.
//
// Results.RunParallel or Results.Cancel must be called on the returned Results.
func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadline) (*Results, error) {
// Setup search.
tfss, err := setupTfss(sq.TagFilterss)
@@ -489,60 +500,40 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadli
defer vmstorage.WG.Done()
sr := getStorageSearch()
defer putStorageSearch(sr)
sr.Init(vmstorage.Storage, tfss, tr, fetchData, *maxMetricsPerSearch)
sr.Init(vmstorage.Storage, tfss, tr, *maxMetricsPerSearch)
tbf := getTmpBlocksFile()
m := make(map[string][]tmpBlockAddr)
m := make(map[string][]storage.BlockRef)
var orderedMetricNames []string
blocksRead := 0
bb := tmpBufPool.Get()
defer tmpBufPool.Put(bb)
for sr.NextMetricBlock() {
blocksRead++
bb.B = storage.MarshalBlock(bb.B[:0], sr.MetricBlock.Block)
addr, err := tbf.WriteBlockData(bb.B)
if err != nil {
putTmpBlocksFile(tbf)
return nil, fmt.Errorf("cannot write data block #%d to temporary blocks file: %s", blocksRead, err)
}
if time.Until(deadline.Deadline) < 0 {
putTmpBlocksFile(tbf)
return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.String())
}
metricName := sr.MetricBlock.MetricName
m[string(metricName)] = append(m[string(metricName)], addr)
metricName := sr.MetricBlockRef.MetricName
brs := m[string(metricName)]
if len(brs) == 0 {
orderedMetricNames = append(orderedMetricNames, string(metricName))
}
m[string(metricName)] = append(brs, *sr.MetricBlockRef.BlockRef)
}
if err := sr.Error(); err != nil {
putTmpBlocksFile(tbf)
return nil, fmt.Errorf("search error after reading %d data blocks: %s", blocksRead, err)
}
if err := tbf.Finalize(); err != nil {
putTmpBlocksFile(tbf)
return nil, fmt.Errorf("cannot finalize temporary blocks file with %d blocks: %s", blocksRead, err)
}
var rss Results
rss.packedTimeseries = make([]packedTimeseries, len(m))
rss.tr = tr
rss.fetchData = fetchData
rss.deadline = deadline
rss.tbf = tbf
i := 0
for metricName, addrs := range m {
pts := &rss.packedTimeseries[i]
i++
pts.metricName = metricName
pts.addrs = addrs
pts := make([]packedTimeseries, len(orderedMetricNames))
for i, metricName := range orderedMetricNames {
pts[i] = packedTimeseries{
metricName: metricName,
brs: m[metricName],
}
}
// Sort rss.packedTimeseries by the first addr offset in order
// to reduce the number of disk seeks during unpacking in RunParallel.
// In this case tmpBlocksFile must be read almost sequentially.
sort.Slice(rss.packedTimeseries, func(i, j int) bool {
pts := rss.packedTimeseries
return pts[i].addrs[0].offset < pts[j].addrs[0].offset
})
rss.packedTimeseries = pts
rss.sr = sr
return &rss, nil
}
@@ -576,6 +567,7 @@ func setupTfss(tagFilterss [][]storage.TagFilter) ([]*storage.TagFilters, error)
}
}
tfss = append(tfss, tfs)
tfss = append(tfss, tfs.Finalize()...)
}
return tfss, nil
}

View File

@@ -1,185 +0,0 @@
package netstorage
import (
"fmt"
"io/ioutil"
"os"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
)
// InitTmpBlocksDir initializes directory to store temporary search results.
//
// It stores data in system-defined temporary directory if tmpDirPath is empty.
func InitTmpBlocksDir(tmpDirPath string) {
if len(tmpDirPath) == 0 {
tmpDirPath = os.TempDir()
}
tmpBlocksDir = tmpDirPath + "/searchResults"
fs.MustRemoveAll(tmpBlocksDir)
if err := fs.MkdirAllIfNotExist(tmpBlocksDir); err != nil {
logger.Panicf("FATAL: cannot create %q: %s", tmpBlocksDir, err)
}
}
var tmpBlocksDir string
func maxInmemoryTmpBlocksFile() int {
mem := memory.Allowed()
maxLen := mem / 1024
if maxLen < 64*1024 {
return 64 * 1024
}
if maxLen > 4*1024*1024 {
return 4 * 1024 * 1024
}
return maxLen
}
var _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 {
return float64(maxInmemoryTmpBlocksFile())
})
type tmpBlocksFile struct {
buf []byte
f *os.File
r *fs.ReaderAt
offset uint64
}
func getTmpBlocksFile() *tmpBlocksFile {
v := tmpBlocksFilePool.Get()
if v == nil {
return &tmpBlocksFile{
buf: make([]byte, 0, maxInmemoryTmpBlocksFile()),
}
}
return v.(*tmpBlocksFile)
}
func putTmpBlocksFile(tbf *tmpBlocksFile) {
tbf.MustClose()
tbf.buf = tbf.buf[:0]
tbf.f = nil
tbf.r = nil
tbf.offset = 0
tmpBlocksFilePool.Put(tbf)
}
var tmpBlocksFilePool sync.Pool
type tmpBlockAddr struct {
offset uint64
size int
}
func (addr tmpBlockAddr) String() string {
return fmt.Sprintf("offset %d, size %d", addr.offset, addr.size)
}
var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_total`)
// WriteBlockData writes b to tbf.
//
// It returns errors since the operation may fail on space shortage
// and this must be handled.
func (tbf *tmpBlocksFile) WriteBlockData(b []byte) (tmpBlockAddr, error) {
var addr tmpBlockAddr
addr.offset = tbf.offset
addr.size = len(b)
tbf.offset += uint64(addr.size)
if len(tbf.buf)+len(b) <= cap(tbf.buf) {
// Fast path - the data fits tbf.buf
tbf.buf = append(tbf.buf, b...)
return addr, nil
}
// Slow path: flush the data from tbf.buf to file.
if tbf.f == nil {
f, err := ioutil.TempFile(tmpBlocksDir, "")
if err != nil {
return addr, err
}
tbf.f = f
tmpBlocksFilesCreated.Inc()
}
_, err := tbf.f.Write(tbf.buf)
tbf.buf = append(tbf.buf[:0], b...)
if err != nil {
return addr, fmt.Errorf("cannot write block to %q: %s", tbf.f.Name(), err)
}
return addr, nil
}
func (tbf *tmpBlocksFile) Finalize() error {
if tbf.f == nil {
return nil
}
fname := tbf.f.Name()
if _, err := tbf.f.Write(tbf.buf); err != nil {
return fmt.Errorf("cannot write the remaining %d bytes to %q: %s", len(tbf.buf), fname, err)
}
tbf.buf = tbf.buf[:0]
r, err := fs.OpenReaderAt(fname)
if err != nil {
logger.Panicf("FATAL: cannot open %q: %s", fname, err)
}
// Hint the OS that the file is read almost sequentiallly.
// This should reduce the number of disk seeks, which is important
// for HDDs.
r.MustFadviseSequentialRead(true)
tbf.r = r
return nil
}
func (tbf *tmpBlocksFile) MustReadBlockAt(dst *storage.Block, addr tmpBlockAddr) {
var buf []byte
if tbf.f == nil {
buf = tbf.buf[addr.offset : addr.offset+uint64(addr.size)]
} else {
bb := tmpBufPool.Get()
defer tmpBufPool.Put(bb)
bb.B = bytesutil.Resize(bb.B, addr.size)
tbf.r.MustReadAt(bb.B, int64(addr.offset))
buf = bb.B
}
tail, err := storage.UnmarshalBlock(dst, buf)
if err != nil {
logger.Panicf("FATAL: cannot unmarshal data at %s: %s", addr, err)
}
if len(tail) > 0 {
logger.Panicf("FATAL: unexpected non-empty tail left after unmarshaling data at %s; len(tail)=%d", addr, len(tail))
}
}
var tmpBufPool bytesutil.ByteBufferPool
func (tbf *tmpBlocksFile) MustClose() {
if tbf.f == nil {
return
}
if tbf.r != nil {
// tbf.r could be nil if Finalize wasn't called.
tbf.r.MustClose()
}
fname := tbf.f.Name()
// Remove the file at first, then close it.
// This way the OS shouldn't try to flush file contents to storage
// on close.
if err := os.Remove(fname); err != nil {
logger.Panicf("FATAL: cannot remove %q: %s", fname, err)
}
if err := tbf.f.Close(); err != nil {
logger.Panicf("FATAL: cannot close %q: %s", fname, err)
}
tbf.f = nil
}

View File

@@ -1,153 +0,0 @@
package netstorage
import (
"fmt"
"math/rand"
"os"
"reflect"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
func TestMain(m *testing.M) {
rand.Seed(time.Now().UnixNano())
tmpDir := "TestTmpBlocks"
InitTmpBlocksDir(tmpDir)
statusCode := m.Run()
if err := os.RemoveAll(tmpDir); err != nil {
logger.Panicf("cannot remove %q: %s", tmpDir, err)
}
os.Exit(statusCode)
}
func TestTmpBlocksFileSerial(t *testing.T) {
if err := testTmpBlocksFile(); err != nil {
t.Fatalf("unexpected error: %s", err)
}
}
func TestTmpBlocksFileConcurrent(t *testing.T) {
concurrency := 3
ch := make(chan error, concurrency)
for i := 0; i < concurrency; i++ {
go func() {
ch <- testTmpBlocksFile()
}()
}
for i := 0; i < concurrency; i++ {
select {
case err := <-ch:
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
case <-time.After(30 * time.Second):
t.Fatalf("timeout")
}
}
}
func testTmpBlocksFile() error {
createBlock := func() *storage.Block {
rowsCount := rand.Intn(8000) + 1
var timestamps, values []int64
ts := int64(rand.Intn(1023434))
for i := 0; i < rowsCount; i++ {
ts += int64(rand.Intn(1000) + 1)
timestamps = append(timestamps, ts)
values = append(values, int64(i*i+rand.Intn(20)))
}
tsid := &storage.TSID{
MetricID: 234211,
}
scale := int16(rand.Intn(123))
precisionBits := uint8(rand.Intn(63) + 1)
var b storage.Block
b.Init(tsid, timestamps, values, scale, precisionBits)
_, _, _ = b.MarshalData(0, 0)
return &b
}
for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile() / 2, 2 * maxInmemoryTmpBlocksFile()} {
err := func() error {
tbf := getTmpBlocksFile()
defer putTmpBlocksFile(tbf)
// Write blocks until their summary size exceeds `size`.
var addrs []tmpBlockAddr
var blocks []*storage.Block
bb := tmpBufPool.Get()
defer tmpBufPool.Put(bb)
for tbf.offset < uint64(size) {
b := createBlock()
bb.B = storage.MarshalBlock(bb.B[:0], b)
addr, err := tbf.WriteBlockData(bb.B)
if err != nil {
return fmt.Errorf("cannot write block at offset %d: %s", tbf.offset, err)
}
if addr.offset+uint64(addr.size) != tbf.offset {
return fmt.Errorf("unexpected addr=%+v for offset %v", &addr, tbf.offset)
}
addrs = append(addrs, addr)
blocks = append(blocks, b)
}
if err := tbf.Finalize(); err != nil {
return fmt.Errorf("cannot finalize tbf: %s", err)
}
// Read blocks in parallel and verify them
concurrency := 2
workCh := make(chan int)
doneCh := make(chan error)
for i := 0; i < concurrency; i++ {
go func() {
doneCh <- func() error {
var b1 storage.Block
for idx := range workCh {
addr := addrs[idx]
b := blocks[idx]
if err := b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal data from the original block: %s", err)
}
b1.Reset()
tbf.MustReadBlockAt(&b1, addr)
if err := b1.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal data from tbf: %s", err)
}
if b1.RowsCount() != b.RowsCount() {
return fmt.Errorf("unexpected number of rows in tbf block; got %d; want %d", b1.RowsCount(), b.RowsCount())
}
if !reflect.DeepEqual(b1.Timestamps(), b.Timestamps()) {
return fmt.Errorf("unexpected timestamps; got\n%v\nwant\n%v", b1.Timestamps(), b.Timestamps())
}
if !reflect.DeepEqual(b1.Values(), b.Values()) {
return fmt.Errorf("unexpected values; got\n%v\nwant\n%v", b1.Values(), b.Values())
}
}
return nil
}()
}()
}
for i := range addrs {
workCh <- i
}
close(workCh)
for i := 0; i < concurrency; i++ {
select {
case err := <-doneCh:
if err != nil {
return err
}
case <-time.After(time.Second):
return fmt.Errorf("timeout")
}
}
return nil
}()
if err != nil {
return err
}
}
return nil
}

View File

@@ -30,7 +30,12 @@ var (
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for search query execution")
maxQueryLen = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
maxLookback = flag.Duration("search.maxLookback", 0, "Synonim to -search.lookback-delta from Prometheus. "+
"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg")
"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. "+
"See also '-search.maxStalenessInterval' flag, which has the same meaining due to historical reasons")
maxStalenessInterval = flag.Duration("search.maxStalenessInterval", 0, "The maximum interval for staleness calculations. "+
"By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning "+
"Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. "+
"See also '-search.maxLookback' flag, which has the same meanining due to historical reasons")
)
// Default step used if not set.
@@ -372,7 +377,6 @@ func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Requ
if err != nil {
return fmt.Errorf(`cannot obtain label entries: %s`, err)
}
w.Header().Set("Content-Type", "application/json")
WriteLabelsCountResponse(w, labelEntries)
labelsCountDuration.UpdateDuration(startTime)
@@ -381,6 +385,52 @@ func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Requ
var labelsCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels/count"}`)
const secsPerDay = 3600 * 24
// TSDBStatusHandler processes /api/v1/status/tsdb request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := getDeadlineForQuery(r)
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse form values: %s", err)
}
date := time.Now().Unix() / secsPerDay
dateStr := r.FormValue("date")
if len(dateStr) > 0 {
t, err := time.Parse("2006-01-02", dateStr)
if err != nil {
return fmt.Errorf("cannot parse `date` arg %q: %s", dateStr, err)
}
date = t.Unix() / secsPerDay
}
topN := 10
topNStr := r.FormValue("topN")
if len(topNStr) > 0 {
n, err := strconv.Atoi(topNStr)
if err != nil {
return fmt.Errorf("cannot parse `topN` arg %q: %s", topNStr, err)
}
if n <= 0 {
n = 1
}
if n > 1000 {
n = 1000
}
topN = n
}
status, err := netstorage.GetTSDBStatusForDate(deadline, uint64(date), topN)
if err != nil {
return fmt.Errorf(`cannot obtain tsdb status for date=%d, topN=%d: %s`, date, topN, err)
}
w.Header().Set("Content-Type", "application/json")
WriteTSDBStatusResponse(w, status)
tsdbStatusDuration.UpdateDuration(startTime)
return nil
}
var tsdbStatusDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/status/tsdb"}`)
// LabelsHandler processes /api/v1/labels request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
@@ -576,20 +626,23 @@ func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) e
if err != nil {
return err
}
queryOffset := getLatencyOffsetMilliseconds()
step, err := getDuration(r, "step", queryOffset)
if err != nil {
return err
}
deadline := getDeadlineForQuery(r)
lookbackDelta, err := getMaxLookback(r)
if err != nil {
return err
}
step, err := getDuration(r, "step", lookbackDelta)
if err != nil {
return err
}
if step <= 0 {
step = defaultStep
}
deadline := getDeadlineForQuery(r)
if len(query) > *maxQueryLen {
return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), *maxQueryLen)
}
queryOffset := getLatencyOffsetMilliseconds()
if !getBool(r, "nocache") && ct-start < queryOffset {
// Adjust start time only if `nocache` arg isn't set.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/241
@@ -840,7 +893,15 @@ func getTime(r *http.Request, argKey string, defaultValue int64) (int64, error)
case prometheusMaxTimeFormatted:
return maxTimeMsecs, nil
}
return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
// Try parsing duration relative to the current time
d, err1 := time.ParseDuration(argValue)
if err1 != nil {
return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
}
if d > 0 {
d = -d
}
t = time.Now().Add(d)
}
secs = float64(t.UnixNano()) / 1e9
}
@@ -891,17 +952,20 @@ func getDuration(r *http.Request, argKey string, defaultValue int64) (int64, err
const maxDurationMsecs = 100 * 365 * 24 * 3600 * 1000
func getMaxLookback(r *http.Request) (int64, error) {
d := int64(*maxLookback / time.Millisecond)
d := maxLookback.Milliseconds()
if d == 0 {
d = maxStalenessInterval.Milliseconds()
}
return getDuration(r, "max_lookback", d)
}
func getDeadlineForQuery(r *http.Request) netstorage.Deadline {
dMax := int64(maxQueryDuration.Seconds() * 1e3)
dMax := maxQueryDuration.Milliseconds()
return getDeadlineWithMaxDuration(r, dMax, "-search.maxQueryDuration")
}
func getDeadlineForExport(r *http.Request) netstorage.Deadline {
dMax := int64(maxExportDuration.Seconds() * 1e3)
dMax := maxExportDuration.Milliseconds()
return getDeadlineWithMaxDuration(r, dMax, "-search.maxExportDuration")
}
@@ -944,7 +1008,7 @@ func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error)
}
func getLatencyOffsetMilliseconds() int64 {
d := int64(*latencyOffset / time.Millisecond)
d := latencyOffset.Milliseconds()
if d <= 1000 {
d = 1000
}

View File

@@ -0,0 +1,28 @@
{% import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" %}
{% stripspace %}
TSDBStatusResponse generates response for /api/v1/status/tsdb .
{% func TSDBStatusResponse(status *storage.TSDBStatus) %}
{
"status":"success",
"data":{
"seriesCountByMetricName":{%= tsdbStatusEntries(status.SeriesCountByMetricName) %},
"labelValueCountByLabelName":{%= tsdbStatusEntries(status.LabelValueCountByLabelName) %},
"seriesCountByLabelValuePair":{%= tsdbStatusEntries(status.SeriesCountByLabelValuePair) %}
}
}
{% endfunc %}
{% func tsdbStatusEntries(a []storage.TopHeapEntry) %}
[
{% for i, e := range a %}
{
"name":{%q= e.Name %},
"value":{%d= int(e.Count) %}
}
{% if i+1 < len(a) %},{% endif %}
{% endfor %}
]
{% endfunc %}
{% endstripspace %}

View File

@@ -0,0 +1,123 @@
// Code generated by qtc from "tsdb_status_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmselect/prometheus/tsdb_status_response.qtpl:1
package prometheus
//line app/vmselect/prometheus/tsdb_status_response.qtpl:1
import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
// TSDBStatusResponse generates response for /api/v1/status/tsdb .
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
func StreamTSDBStatusResponse(qw422016 *qt422016.Writer, status *storage.TSDBStatus) {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:5
qw422016.N().S(`{"status":"success","data":{"seriesCountByMetricName":`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
streamtsdbStatusEntries(qw422016, status.SeriesCountByMetricName)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
qw422016.N().S(`,"labelValueCountByLabelName":`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:10
streamtsdbStatusEntries(qw422016, status.LabelValueCountByLabelName)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:10
qw422016.N().S(`,"seriesCountByLabelValuePair":`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:11
streamtsdbStatusEntries(qw422016, status.SeriesCountByLabelValuePair)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:11
qw422016.N().S(`}}`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
}
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
func WriteTSDBStatusResponse(qq422016 qtio422016.Writer, status *storage.TSDBStatus) {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
StreamTSDBStatusResponse(qw422016, status)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
qt422016.ReleaseWriter(qw422016)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
}
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
func TSDBStatusResponse(status *storage.TSDBStatus) string {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
WriteTSDBStatusResponse(qb422016, status)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
qs422016 := string(qb422016.B)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
return qs422016
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
}
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
func streamtsdbStatusEntries(qw422016 *qt422016.Writer, a []storage.TopHeapEntry) {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
qw422016.N().S(`[`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
for i, e := range a {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
qw422016.N().S(`{"name":`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:20
qw422016.N().Q(e.Name)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:20
qw422016.N().S(`,"value":`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:21
qw422016.N().D(int(e.Count))
//line app/vmselect/prometheus/tsdb_status_response.qtpl:21
qw422016.N().S(`}`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
if i+1 < len(a) {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
qw422016.N().S(`,`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
}
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
}
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
qw422016.N().S(`]`)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
}
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
func writetsdbStatusEntries(qq422016 qtio422016.Writer, a []storage.TopHeapEntry) {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
streamtsdbStatusEntries(qw422016, a)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
qt422016.ReleaseWriter(qw422016)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
}
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
func tsdbStatusEntries(a []storage.TopHeapEntry) string {
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
writetsdbStatusEntries(qb422016, a)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
qs422016 := string(qb422016.B)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
return qs422016
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
}

View File

@@ -338,7 +338,7 @@ func TestExecSuccess(t *testing.T) {
q := `timestamp(123)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
Values: []float64{900, 1100, 1300, 1500, 1700, 1900},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -349,7 +349,7 @@ func TestExecSuccess(t *testing.T) {
q := `timestamp(time())`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
Values: []float64{900, 1100, 1300, 1500, 1700, 1900},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -360,7 +360,7 @@ func TestExecSuccess(t *testing.T) {
q := `timestamp(456/time()+123)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
Values: []float64{900, 1100, 1300, 1500, 1700, 1900},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
@@ -371,7 +371,7 @@ func TestExecSuccess(t *testing.T) {
q := `timestamp(time()>=1600)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 1600, 1800, 2000},
Values: []float64{nan, nan, nan, nan, 1700, 1900},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}

View File

@@ -15,9 +15,9 @@ import (
"github.com/valyala/histogram"
)
var maxStalenessInterval = flag.Duration("search.maxStalenessInterval", 0, "The maximum interval for staleness calculations. "+
"By default it is automatically calculated from the median interval between samples. This flag can be useful for tuning "+
"Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details")
var minStalenessInterval = flag.Duration("search.minStalenessInterval", 0, "The mimimum interval for staleness calculations. "+
"This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. "+
"See also '-search.maxStalenessInterval'")
var rollupFuncs = map[string]newRollupFunc{
// Standard rollup funcs from PromQL.
@@ -72,6 +72,11 @@ var rollupFuncs = map[string]newRollupFunc{
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
// `timestamp` function must return timestamp for the last datapoint on the current window
// in order to properly handle offset and timestamps unaligned to the current step.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415 for details.
"timestamp": newRollupFuncOneArg(rollupTimestamp),
}
// rollupAggrFuncs are functions that can be passed to `aggr_over_time()`
@@ -153,6 +158,8 @@ var rollupFuncsKeepMetricGroup = map[string]bool{
"geomean_over_time": true,
"hoeffding_bound_lower": true,
"hoeffding_bound_upper": true,
"first_over_time": true,
"last_over_time": true,
}
func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) {
@@ -445,15 +452,15 @@ func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, valu
dstValues = decimal.ExtendFloat64sCapacity(dstValues, len(rc.Timestamps))
scrapeInterval := getScrapeInterval(timestamps)
if *maxStalenessInterval > 0 {
if si := maxStalenessInterval.Milliseconds(); scrapeInterval > si {
scrapeInterval = si
}
}
maxPrevInterval := getMaxPrevInterval(scrapeInterval)
if rc.LookbackDelta > 0 && maxPrevInterval > rc.LookbackDelta {
maxPrevInterval = rc.LookbackDelta
}
if *minStalenessInterval > 0 {
if msi := minStalenessInterval.Milliseconds(); msi > 0 && maxPrevInterval < msi {
maxPrevInterval = msi
}
}
window := rc.Window
if window <= 0 {
window = rc.Step
@@ -1507,6 +1514,19 @@ func rollupLow(rfa *rollupFuncArg) float64 {
return min
}
func rollupTimestamp(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
timestamps := rfa.timestamps
if len(timestamps) == 0 {
// Do not take into account rfa.prevTimestamp, since it may lead
// to inconsistent results comparing to Prometheus on broken time series
// with irregular data points.
return nan
}
return float64(timestamps[len(timestamps)-1]) / 1e3
}
func rollupFirst(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.

View File

@@ -53,9 +53,9 @@ var transformFuncs = map[string]transformFunc{
"sort_desc": newTransformFuncSort(true),
"sqrt": newTransformFuncOneArg(transformSqrt),
"time": transformTime,
"timestamp": transformTimestamp,
"vector": transformVector,
"year": newTransformFuncDateTime(transformYear),
// "timestamp" has been moved to rollup funcs. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415
"vector": transformVector,
"year": newTransformFuncDateTime(transformYear),
// New funcs
"label_set": transformLabelSet,
@@ -1516,25 +1516,6 @@ func transformTime(tfa *transformFuncArg) ([]*timeseries, error) {
return evalTime(tfa.ec), nil
}
func transformTimestamp(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
}
rvs := args[0]
for _, ts := range rvs {
ts.MetricName.ResetMetricGroup()
values := ts.Values
for i, t := range ts.Timestamps {
v := values[i]
if !math.IsNaN(v) {
values[i] = float64(t) / 1e3
}
}
}
return rvs, nil
}
func transformVector(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {

View File

@@ -9,6 +9,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
@@ -119,6 +120,14 @@ func SearchTagEntries(maxTagKeys, maxTagValues int) ([]storage.TagEntry, error)
return tagEntries, err
}
// GetTSDBStatusForDate returns TSDB status for the given date.
func GetTSDBStatusForDate(date uint64, topN int) (*storage.TSDBStatus, error) {
WG.Add(1)
status, err := Storage.GetTSDBStatusForDate(date, topN)
WG.Done()
return status, err
}
// GetSeriesCount returns the number of time series in the storage.
func GetSeriesCount() (uint64, error) {
WG.Add(1)
@@ -247,6 +256,10 @@ func registerStorageMetrics() {
return &sm.IndexDBMetrics
}
metrics.NewGauge(fmt.Sprintf(`vm_free_disk_space_bytes{path=%q}`, *DataPath), func() float64 {
return float64(fs.MustGetFreeSpace(*DataPath))
})
metrics.NewGauge(`vm_active_merges{type="storage/big"}`, func() float64 {
return float64(tm().ActiveBigMerges)
})

View File

@@ -14,7 +14,7 @@
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.5.2"
"version": "6.7.1"
},
{
"type": "panel",
@@ -60,12 +60,12 @@
}
]
},
"description": "Overview for single node VictoriaMetrics v1.32.8 or higher",
"description": "Overview for single node VictoriaMetrics v1.34.0 or higher",
"editable": true,
"gnetId": 10229,
"graphTooltip": 0,
"id": null,
"iteration": 1580634216692,
"iteration": 1585340890561,
"links": [
{
"icon": "doc",
@@ -127,7 +127,6 @@
}
],
"mode": "html",
"options": {},
"timeFrom": null,
"timeShift": null,
"title": "Version",
@@ -146,7 +145,6 @@
},
"id": 4,
"links": [],
"options": {},
"pageSize": null,
"scroll": true,
"showHeader": true,
@@ -157,6 +155,7 @@
"styles": [
{
"alias": "",
"align": "auto",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
@@ -171,6 +170,7 @@
},
{
"alias": "",
"align": "auto",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
@@ -187,6 +187,7 @@
},
{
"alias": "",
"align": "auto",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
@@ -196,7 +197,7 @@
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": ".*",
"pattern": "/.*/",
"thresholds": [],
"type": "hidden",
"unit": "short"
@@ -259,7 +260,6 @@
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
@@ -344,7 +344,6 @@
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
@@ -428,7 +427,6 @@
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
@@ -2878,7 +2876,7 @@
}
],
"refresh": "30s",
"schemaVersion": 21,
"schemaVersion": 22,
"style": "dark",
"tags": [],
"templating": {
@@ -2890,6 +2888,7 @@
"definition": "label_values(vm_app_version, job)",
"hide": 0,
"includeAll": false,
"index": -1,
"label": null,
"multi": false,
"name": "job",
@@ -2912,6 +2911,7 @@
"definition": "label_values(vm_app_version{job=\"$job\"}, version)",
"hide": 2,
"includeAll": false,
"index": -1,
"label": null,
"multi": false,
"name": "version",
@@ -2961,5 +2961,8 @@
"timezone": "",
"title": "VictoriaMetrics",
"uid": "wNf0q_kZk",
"version": 2
"variables": {
"list": []
},
"version": 1
}

View File

@@ -1,18 +1,27 @@
# All these commands must run from repository root.
DOCKER_NAMESPACE := docker.io/victoriametrics
BUILDER_IMAGE := local/builder:go1.14.1
CERTS_IMAGE := local/certs:1.0.3
DOCKER_NAMESPACE := victoriametrics
package-certs:
(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(CERTS_IMAGE)$$') \
|| docker build -t $(CERTS_IMAGE) deployment/docker/certs
ROOT_IMAGE ?= scratch
CERTS_IMAGE := alpine:3.11
GO_BUILDER_IMAGE := golang:1.14.2
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr : _)
BASE_IMAGE := local/base:1.1.1-$(shell echo $(ROOT_IMAGE) | tr : _)-$(shell echo $(CERTS_IMAGE) | tr : _)
package-base:
(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(BASE_IMAGE)$$') \
|| docker build -t $(BASE_IMAGE) \
--build-arg root_image=$(ROOT_IMAGE) \
--build-arg certs_image=$(CERTS_IMAGE) \
deployment/docker/base
package-builder:
(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(BUILDER_IMAGE)$$') \
|| docker build -t $(BUILDER_IMAGE) deployment/docker/builder
|| docker build -t $(BUILDER_IMAGE) \
--build-arg go_builder_image=$(GO_BUILDER_IMAGE) \
deployment/docker/builder
app-via-docker: package-certs package-builder
app-via-docker: package-base package-builder
mkdir -p gocache-for-docker
docker run --rm \
--user $(shell id -u):$(shell id -g) \
@@ -31,7 +40,7 @@ package-via-docker:
$(MAKE) app-via-docker && \
docker build \
--build-arg src_binary=$(APP_NAME)$(APP_SUFFIX)-prod \
--build-arg certs_image=$(CERTS_IMAGE) \
--build-arg base_image=$(BASE_IMAGE) \
-t $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(APP_SUFFIX)$(RACE) \
-f app/$(APP_NAME)/deployment/Dockerfile bin)

View File

@@ -0,0 +1,10 @@
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
ARG certs_image
ARG root_image
FROM $certs_image as certs
RUN apk --update --no-cache add ca-certificates
FROM $root_image
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt

View File

@@ -1,2 +1,3 @@
FROM golang:1.14.1
ARG go_builder_image
FROM $go_builder_image
STOPSIGNAL SIGINT

View File

@@ -1,3 +0,0 @@
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
FROM alpine:3.10 as certs
RUN apk --update add ca-certificates

View File

@@ -2,7 +2,7 @@ version: '3.5'
services:
prometheus:
container_name: prometheus
image: prom/prometheus:v2.15.2
image: prom/prometheus:v2.17.2
depends_on:
- "victoriametrics"
ports:
@@ -35,7 +35,7 @@ services:
restart: always
grafana:
container_name: grafana
image: grafana/grafana:6.5.2
image: grafana/grafana:6.7.2
entrypoint: >
/bin/sh -c "
cd /var/lib/grafana &&

View File

@@ -1,5 +1,8 @@
# Articles
## Our articles
* [Open-sourcing VictoriaMetrics](https://medium.com/@valyala/open-sourcing-victoriametrics-f31e34485c2b)
* [How we created VictoriaMetrics](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac)
* [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 40K unique time series](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
@@ -21,5 +24,11 @@
* [Improving histogram usability for Prometheus and Grafana](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
* [Prometheus storage: tech terms for humans](https://medium.com/@valyala/prometheus-storage-technical-terms-for-humans-4ab4de6c3d48)
* [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)
## Third-party articles
* [Better Prometheus rate() function with VictoriaMetrics](https://www.percona.com/blog/2020/02/28/better-prometheus-rate-function-with-victoriametrics/)
* [Disk usage: VictoriaMetrics vs Prometheus](https://stas.starikevich.com/posts/disk-usage-for-vm-versus-prometheus/)
* [Benchmarking time series workloads on Apache Kudu using TSBS](https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/)
* [What are Open Source Time Series Databases?](https://www.iunera.com/kraken/fabric/time-series-database/)

View File

@@ -1,16 +1,16 @@
## Case studies and talks
# Case studies and talks
Below are approved public case studies and talks from VictoriaMetrics users. Join our [community Slack channel](http://slack.victoriametrics.com/)
and feel free asking for references, reviews and additional case studies from real VictoriaMetrics users there.
### Adidas
## Adidas
See [slides](https://promcon.io/2019-munich/slides/remote-write-storage-wars.pdf) and [video](https://youtu.be/OsH6gPdxR4s)
from [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk at [PromCon 2019](https://promcon.io/2019-munich/).
VictoriaMetrics is compared to Thanos, Corex and M3DB in the talk.
### COLOPL
## COLOPL
[COLOPL](http://www.colopl.co.jp/en/) is Japaneese Game Development company. It started using VictoriaMetrics
after evaulating the following remote storage solutions for Prometheus:
@@ -24,7 +24,7 @@ See [slides](https://speakerdeck.com/inletorder/monitoring-platform-with-victori
from `Large-scale, super-load system monitoring platform built with VictoriaMetrics` talk at [Prometheus Meetup Tokyo #3](https://prometheus.connpass.com/event/157721/).
### Wix.com
## Wix.com
[Wix.com](https://en.wikipedia.org/wiki/Wix.com) is the leading web development platform.
@@ -55,7 +55,7 @@ Numbers:
Alex Ulstein, Head of Monitoring, Wix.com
### Wedos.com
## Wedos.com
> [Wedos](https://www.wedos.com/) is the Biggest Czech Hosting. We have our own private data center, that holds only our servers and technologies. The second data center, where the servers will be cooled in an oil bath, is being built. We started using [cluster VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md) to store Prometheus metrics from all our infrastructure after receiving positive references from our friends who successfully use VictoriaMetrics.
@@ -68,7 +68,7 @@ Numbers:
> We like configuration simplicity and zero maintenance for VictoriaMetrics - once installed and forgot about it. It works out of the box without any issues.
### Synthesio
## Synthesio
[Synthesio](https://www.synthesio.com/) is the leading social intelligence tool for social media monitoring & social analytics.
@@ -85,11 +85,11 @@ Numbers:
- Churn rate - 100 new time series per hour
### MHI Vestas Offshore Wind
## MHI Vestas Offshore Wind
The mission of [MHI Vestas Offshore Wind](http://www.mhivestasoffshore.com) is to co-develop offshore wind as an economically viable and sustainable energy resource to benefit future generations.
MHI Vestas Offshore Wind is using VictoriaMetrics to ingest and visualize sensor data from offshore wind turbines. The very efficient storage and ability to backfill was key in chosing VictoriaMetrics. MHI Vestas Offshore Wind is running the cluster version of VictoriaMetrics on Kubernetes using the Helm charts for deployment to be able to scale up capacity as the solution will be rolled out. Current roll-out is
MHI Vestas Offshore Wind is using VictoriaMetrics to ingest and visualize sensor data from offshore wind turbines. The very efficient storage and ability to backfill was key in chosing VictoriaMetrics. MHI Vestas Offshore Wind is running the cluster version of VictoriaMetrics on Kubernetes using the Helm charts for deployment to be able to scale up capacity as the solution will be rolled out.
Numbers with current limited roll out:
@@ -100,7 +100,7 @@ Numbers with current limited roll out:
- Retention time: 3 years
### Dreamteam
## Dreamteam
[Dreamteam](https://dreamteam.gg/) successfully uses single-node VictoriaMetrics in multiple environments.
@@ -114,3 +114,151 @@ Numbers:
VictoriaMetrics in production environment runs on 2 M5 EC2 instances in "HA" mode, managed by Terraform and Ansible TF module.
2 Prometheus instances are writing to both VMs, with 2 [Promxy](https://github.com/jacksontj/promxy) replicas
as load balancer for reads.
## Brandwatch
[Brandwatch](https://www.brandwatch.com/) is the world's pioneering digital consumer intelligence suite,
helping over 2,000 of the world's most admired brands and agencies to make insightful, data-driven business decisions.
The engineering department at Brandwatch has been using InfluxDB for storing application metrics for many years
and when End-of-Life of InfluxDB version 1.x was announced we decided to re-evaluate our whole metrics collection and storage stack.
Main goals for the new metrics stack were:
- improved performance
- lower maintenance
- support for native clustering in open source version
- the less metrics shipment had to change, the better
- achieving longer data retention would be great but not critical
We initially looked at CrateDB and TimescaleDB which both turned out to have limitations or requirements in the open source versions
that made them unfit for our use case. Prometheus was also considered but push vs. pull metrics was a big change we did not want
to include in the already significant change.
Once we found VictoriaMetrics it solved the following problems:
- it is very light weight and we can now run virtual machines instead of dedicated hardware machines for metrics storage
- very short startup time and any possible gaps in data can easily be filled in by using Promxy
- we could continue using Telegraf as our metrics agent and ship identical metrics to both InfluxDB and VictoriaMetrics during a migration period (migration just about to start)
- compression is really good so we can store more metrics and we can spin up new VictoriaMetrics instances
for new data and keep read-only nodes with older data if we need to extend our retention period further
than single virtual machine disks allow and we can aggregate all the data from VictoriaMetrics with Promxy
High availability is done the same way we did with InfluxDB, by running parallel single nodes of VictoriaMetrics.
Numbers:
- active time series: up to 25 million
- ingestion rate: ~300 000
- total number of datapoints: 380 billion and growing
- total number of entries in inverted index: 575 million and growing
- daily time series churn rate: ~550 000
- data size on disk: ~660GB and growing
- index size on disk: ~9,3GB and growing
- average datapoint size on disk: ~1.75 bytes
Query rates are insignificant as we have concentrated on data ingestion so far.
Anders Bomberg, Monitoring and Infrastructure Team Lead, brandwatch.com
## Adsterra
[Adsterra Network](https://adsterra.com) is a leading digital advertising company that offers
performance-based solutions for advertisers and media partners worldwide.
We used to collect and store our metrics via Prometheus. Over time the amount of our servers
and metrics increased so we were gradually reducing the retention. When retention became 7 days
we started to look for alternative solutions. We were choosing among Thanos, VictoriaMetrics and Prometheus federation.
We end up with the following configuration:
- Local Prometheus'es with VictoriaMetrics as remote storage on our backend servers.
- A single Prometheus on our monitoring server scrapes metrics from other servers and writes to VictoriaMetrics.
- A separate Prometheus that federates from other Prometheus'es and processes alerts.
Turns out that remote write protocol generates too much traffic and connections. So after 8 months we started to look for alternatives.
Around the same time VictoriaMetrics released [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
We tried to scrape all the metrics via a single insance of vmagent. But that didn't work - vmgent wasn't able to catch up with writes
into VictoriaMetrics. We tested different options and end up with the following scheme:
- We removed Prometheus from our setup.
- VictoriaMetrics [can scrape targets](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-scrape-prometheus-exporters-such-as-node-exporter) as well,
so we removed vmagent. Now VictoriaMetrics scrapes all the metrics from 110 jobs and 5531 targets.
- We use [Promxy](https://github.com/jacksontj/promxy) for alerting.
Such a scheme has the following benefits comparing to Prometheus:
- We can store more metrics.
- We need less RAM and CPU for the same workload.
Cons are the following:
- VictoriaMetrics doesn't support replication - we run extra instance of VictoriaMetrics and Promxy in front of VictoriaMetrics pair for high availability.
- VictoriaMetrics stores 1 extra month for defined retention (if retention is set to N months, then VM stores N+1 months of data), but this is still better than other solutions.
Some numbers from our single-node VictoriaMetrics setup:
- active time series: 10M
- ingestion rate: 800K samples/sec
- total number of datapoints: more than 2 trillion
- total number of entries in inverted index: more than 1 billion
- daily time series churn rate: 2.6M
- data size on disk: 1.5 TB
- index size on disk: 27 GB
- average datapoint size on disk: 0.75 bytes
- range query rate: 16 rps
- instant query rate: 25 rps
- range query duration: max: 0.5s; median: 0.05s; 97th percentile: 0.29s
- instant query duration: max: 2.1s; median: 0.04s; 97th percentile: 0.15s
VictoriaMetrics consumes about 50GiB of RAM.
Setup:
We have 2 single-node instances of VictoriaMetircs. The first instance collects and stores high-resolution metrics (10s scrape interval) for a month.
The second instance collects and stores low-resolution metrics (300s scrape interval) for a month.
We use Promxy + Alertmanager for global view and alerts evaluation.
## ARNES
[The Academic and Research Network of Slovenia](https://www.arnes.si/en/) (ARNES) is a public institute that provides network services to research,
educational and cultural organizations, and enables them to establish connections and cooperation with each other and with related organizations abroad.
After using Cacti, Graphite and StatsD for years, we wanted to upgrade our monitoring stack to something that:
- has native alerting support
- can run on-prem
- has multi-dimension metrics
- lower hardware requirements
- is scalable
- simple client provisioning and discovery with Puppet
We were running Prometheus for about a year in a test environment and it worked great. But there was a need/wish for a few years of retention time,
like the old systems provided. We tested Thanos, which was a bit resource hungry back then, but it worked great for about half a year
until we discovered VictoriaMetrics. As our scale is not that big, we don't have on-prem S3 and no Kubernetes, VM's single node instance provided
the same result with less maintenance overhead and lower hardware requirements.
After testing it a few months and having great support from the maintainers on [Slack](http://slack.victoriametrics.com/),
we decided to go with it. VM's support for ingesting InfluxDB metrics was an additional bonus, since our hardware team uses
SNMPCollector to collect metrics from network devices and switching from InfluxDB to VictoriaMetrics was a simple change in the config file for them.
Numbers:
- 2 single node instances per DC (one for prometheus and one for influxdb metrics)
- Active time series per VictoriaMetrics instance: ~500k (prometheus) + ~320k (influxdb)
- Ingestion rate per VictoriaMetrics instance: 45k/s (prometheus) / 30k/s (influxdb)
- Query duration: median is ~5ms, 99th percentile is ~45ms
- Total number of datapoints per instance: 390B (prometheus), 110B (influxdb)
- Average datapoint size on drive: 0.4 bytes
- Disk usage per VictoriaMetrics instance: 125GB (prometheus), 185GB (influxdb)
- Index size per VictoriaMetrics instance: 1.6GB (prometheus), 1.2GB (influcdb)
We are running 1 Prometheus, 1 VictoriaMetrics and 1 Grafana server in each datacenter on baremetal servers, scraping 350+ targets
(and 3k+ devices collected via SNMPCollector sending metrics directly to VM). Each Prometheus is scraping all targets,
so we have all metrics in both VictoriaMetrics instances. We are using [Promxy](https://github.com/jacksontj/promxy) to deduplicate metrics from both instances.
Grafana has a LB infront, so if one DC has problems, we can still view all metrics from both DCs on the other Grafana instance.
We are still in the process of migration, but we are really happy with the whole stack. It has proven as an essential piece
for insight into our services during COVID-19 and has enabled us to provide better service and spot problems faster.

View File

@@ -1,3 +1,5 @@
<img alt="Victoria Metrics" src="logo.png">
# Cluster version
VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.
@@ -51,13 +53,6 @@ Docker images for cluster version are available here:
Source code for cluster version is available at [cluster branch](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
### Development Builds
1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
2. Run `make` from the repository root. It should build `vmstorage`, `vmselect`
and `vminsert` binaries and put them into the `bin` folder.
### Production builds
There is no need in installing Go on a host system since binaries are built
@@ -69,8 +64,7 @@ So [install docker](https://docs.docker.com/install/) and run the following comm
make vminsert-prod vmselect-prod vmstorage-prod
```
Production binaries are built into statically linked binaries for `GOARCH=amd64`, `GOOS=linux`.
They are put into `bin` folder with `-prod` suffixes:
Production binaries are built into statically linked binaries. They are put into `bin` folder with `-prod` suffixes:
```
$ make vminsert-prod vmselect-prod vmstorage-prod
$ ls -1 bin
@@ -79,6 +73,13 @@ vmselect-prod
vmstorage-prod
```
### Development Builds
1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make` from the repository root. It should build `vmstorage`, `vmselect`
and `vminsert` binaries and put them into the `bin` folder.
### Building docker images
Run `make package`. It will build the following docker images locally:
@@ -90,7 +91,12 @@ Run `make package`. It will build the following docker images locally:
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package`.
By default images are built on top of `scratch` image. It is possible to build on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds images on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package
```
## Operation
@@ -113,6 +119,15 @@ Ports may be altered by setting `-httpListenAddr` on the corresponding nodes.
It is recommended setting up [monitoring](#monitoring) for the cluster.
#### Environment variables
Each flag values can be set thru environment variables by following these rules:
- The `-envflag.enable` flag must be set
- Each `.` in flag names must be substituted by `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`)
- For repeating flags, an alternative syntax can be used by joining the different values into one using `,` as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`)
- It is possible setting prefix for environment vars with `-envflag.prefix`. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_`
### Monitoring
@@ -123,7 +138,8 @@ By default the following TCP ports are used:
- `vmstorage` - 8482
It is recommended setting up Prometheus to scrape `/metrics` pages from all the cluster components, so they can be monitored and analyzed
with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176).
with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11831).
### URL format

View File

@@ -54,11 +54,11 @@ Yes. Prometheus continues writing data to local storage after enabling remote st
and new data is available for querying via Prometheus as usual.
### How does VictoriaMetrics compare to other clustered TSDBs on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.?
### How does VictoriaMetrics compare to other remote storage solutions for Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.?
VictoriaMetrics is simpler, faster, more cost-effective and it provides [MetricsQL with useful extensions for PromQL](MetricsQL). The simplicity is twofold:
- It is simpler to configure and operate. There is no need in configuring third-party [sidecars](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md)
or fighting with [gossip protocol](https://github.com/thanos-io/thanos/blob/master/docs/proposals/completed/201809_gossip-removal.md).
or fighting with [gossip protocol](https://github.com/improbable-eng/thanos/blob/030bc345c12c446962225221795f4973848caab5/docs/proposals/completed/201809_gossip-removal.md).
- VictoriaMetrics has simpler architecture, which means less bugs and more useful features in the long run comparing to competing TSDBs.
See [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
@@ -74,7 +74,7 @@ VictoriaMetrics is similar to Cortex in the following aspects:
i.e. there is no need in running sidecars unlike in [Thanos](https://github.com/thanos-io/thanos) case.
- Both systems support multi-tenancy out of the box. See [the corresponding docs for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format).
The main differences between Corex and VictoriaMetrics:
The main differences between Cortex and VictoriaMetrics:
- Cortex re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
- Cortex provides [Ruler](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ruler) and [Alertmanager](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#alertmanager) components,
which are currently missing in VictoriaMetrics. However, these components can be substituted by [Promxy](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
@@ -118,7 +118,7 @@ TimescaleDB insists on using SQL as a query language. While SQL is more powerful
Additionally, VictoriaMetrics requires [up to 70x less storage space comparing to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data.
### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex)?
### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex)?
No. VictoriaMetrics core is written in Go from scratch by [fasthttp](https://github.com/valyala/fasthttp) [author](https://github.com/valyala).
The architecture is [optimized for storing and querying large amounts of time series data with high cardinality](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac). VictoriaMetrics storage uses [certain ideas from ClickHouse](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). Special thanks to [Alexey Milovidov](https://github.com/alexey-milovidov).
@@ -129,6 +129,8 @@ The architecture is [optimized for storing and querying large amounts of time se
Yes:
* [Benchmarking time series workloads on Apache Kudu using TSBS](https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/)
* [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)
* [Measuring vertical scalability for time series databases: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
* [Measuring insert performance on high-cardinality time series: VictoriaMetrics vs InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893)
* [TSBS benchmark on high-cardinality time series: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)

View File

@@ -1,3 +1,13 @@
[![Latest Release](https://img.shields.io/github/release/VictoriaMetrics/VictoriaMetrics.svg?style=flat-square)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
[![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics.svg?maxAge=604800)](https://hub.docker.com/r/victoriametrics/victoria-metrics)
[![Slack](https://img.shields.io/badge/join%20slack-%23victoriametrics-brightgreen.svg)](http://slack.victoriametrics.com/)
[![GitHub license](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/workflows/main/badge.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg)](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
![Victoria Metrics logo](logo.png "Victoria Metrics")
## VictoriaMetrics
VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
@@ -16,6 +26,9 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* [Synthesio](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#synthesio)
* [MHI Vestas Offshore Wind](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#mhi-vestas-offshore-wind)
* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
* [Brandwatch](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#brandwatch)
* [Adsterra](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adsterra)
* [ARNES](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#arnes)
## Prominent features
@@ -104,11 +117,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* [Backfilling](#backfilling)
* [Profiling](#profiling)
* [Integrations](#integrations)
* [Roadmap](#roadmap)
* [Third-party contributions](#third-party-contributions)
* [Contacts](#contacts)
* [Community and contributions](#community-and-contributions)
* [Third-party contributions](#third-party-contributions)
* [Reporting bugs](#reporting-bugs)
* [Roadmap](#roadmap)
* [Victoria Metrics Logo](#victoria-metrics-logo)
* [Logo Usage Guidelines](#logo-usage-guidelines)
* [Font used](#font-used)
@@ -137,6 +150,7 @@ Each flag values can be set thru environment variables by following these rules:
* The `-envflag.enable` flag must be set
* Each `.` in flag names must be substituted by `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`)
* For repeating flags, an alternative syntax can be used by joining the different values into one using `,` as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`)
* It is possible setting prefix for environment vars with `-envflag.prefix`. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_`
### Prometheus setup
@@ -194,6 +208,10 @@ Read more about tuning remote write for Prometheus [here](https://prometheus.io/
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
since the previous versions may have issues with `remote_write`.
Take a look also at [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md),
which can be used as faster and less resource-hungry alternative to Prometheus in certain cases.
### Grafana setup
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
@@ -241,6 +259,9 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
* [static_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config)
* [file_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config)
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
In the future other `*_sd_config` types will be supported.
@@ -258,7 +279,8 @@ For instance, put the following lines into `Telegraf` config, so it sends data t
Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag.
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag
and stream plain Influx line protocol data to the configured TCP and/or UDP addresses.
VictoriaMetrics maps Influx data using the following rules:
@@ -429,10 +451,10 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
* `<column_pos>` is the position of the CSV column (field). Column numbering starts from 1. The order of parsing rules may be arbitrary.
* `<type>` describes the column type. Supported types are:
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value. The metric name is read from the `<context>`.
CSV line must have at least a single metric field.
* `metric` - the corresponding CSV column at `<column_pos>` contains metric value, which must be integer or floating-point number.
The metric name is read from the `<context>`. CSV line must have at least a single metric field. Multiple metric fields per CSV line is OK.
* `label` - the corresponding CSV column at `<column_pos>` contains label value. The label name is read from the `<context>`.
CSV line may have arbitrary number of label fields. All these fields are attached to all the configured metrics.
CSV line may have arbitrary number of label fields. All these labels are attached to all the configured metrics.
* `time` - the corresponding CSV column at `<column_pos>` contains metric time. CSV line may contain either one or zero columns with time.
If CSV line has no time, then the current time is used. The time is applied to all the configured metrics.
The format of the time is configured via `<context>`. Supported time formats are:
@@ -442,7 +464,7 @@ The `format` query arg must contain comma-separated list of parsing rules for CS
* `rfc3339` - timestamp in [RFC3339](https://tools.ietf.org/html/rfc3339) format, i.e. `2006-01-02T15:04:05Z`.
* `custom:<layout>` - custom layout for the timestamp. The `<layout>` may contain arbitrary time layout according to [time.Parse rules in Go](https://golang.org/pkg/time/#Parse).
Each request to `/api/v1/import/csv` can contain arbitrary number of CSV lines.
Each request to `/api/v1/import/csv` may contain arbitrary number of CSV lines.
Example for importing CSV data via `/api/v1/import/csv`:
@@ -465,6 +487,8 @@ The following response should be returned:
{"metric":{"__name__":"ask","market":"NYSE","ticker":"GOOG"},"values":[1.23],"timestamps":[1583865146495]}
```
Note that it could be required to flush response cache after importing historical data. See [these docs](#backfilling) for detail.
### Prometheus querying API usage
@@ -475,6 +499,7 @@ VictoriaMetrics supports the following handlers from [Prometheus querying API](h
* [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
* [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
* [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
* [/api/v1/status/tsdb](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats)
These handlers can be queried from Prometheus-compatible clients such as Grafana or curl.
@@ -495,11 +520,11 @@ Additionally VictoriaMetrics provides the following handlers:
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) instead of building VictoriaMetrics
from sources. Building from sources is reasonable when developing additional features specific
to your needs.
to your needs or when testing bugfixes.
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make victoria-metrics` from the root folder of the repository.
It builds `victoria-metrics` binary and puts it into the `bin` folder.
@@ -515,7 +540,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
#### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make victoria-metrics-arm` or `make victoria-metrics-arm64` from the root folder of the repository.
It builds `victoria-metrics-arm` or `victoria-metrics-arm64` binary respectively and puts it into the `bin` folder.
@@ -531,7 +556,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
This is an experimental mode, which may result in a lower compression ratio and slower decompression performance.
Use it with caution!
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make victoria-metrics-pure` from the root folder of the repository.
It builds `victoria-metrics-pure` binary and puts it into the `bin` folder.
@@ -541,6 +566,13 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-victoria-metrics
```
### Start with docker-compose
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
@@ -585,11 +617,13 @@ Steps for restoring from a snapshot:
Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
the deleted time series isn't freed instantly - it is freed during subsequent [background merges of data files](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
before actually deleting the metrics.
The `/api/v1/admin/tsdb/delete_series` handler may be protected with `authKey` if `-deleteAuthKey` command-line flag is set.
The delete API is intended mainly for the following cases:
* One-off deleting of accidentally written invalid (or undesired) time series.
@@ -597,10 +631,11 @@ The delete API is intended mainly for the following cases:
It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
* Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
* Regular cleanups for unneeded data. Just prevent writing unneeded data into VictoriaMetrics.
This can be done with relabeling in [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
See [this article](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts) for details.
* Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
time series occupy disk space until the next merge operation, which can never occur.
* Reducing disk space usage by deleting unneeded time series. This doesn't work as expected, since the deleted
time series occupy disk space until the next merge operation, which can never occur when deleting too old data.
It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
@@ -666,6 +701,8 @@ curl -H 'Accept-Encoding: gzip' http://source-victoriametrics:8428/api/v1/export
curl -X POST -H 'Content-Encoding: gzip' http://destination-victoriametrics:8428/api/v1/import -T exported_data.jsonl.gz
```
Note that it could be required to flush response cache after importing historical data. See [these docs](#backfilling) for detail.
Each request to `/api/v1/import` can load up to a single vCPU core on VictoriaMetrics. Import speed can be improved by splitting the original file into smaller parts
and importing them concurrently. Note that the original file must be split on newlines.
@@ -820,6 +857,7 @@ Consider setting the following command-line flags:
with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
* `-search.resetCacheAuthKey` for protecting `/internal/resetRollupResultCache` endpoint. See [backfilling](#backfilling) for more details.
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
@@ -858,10 +896,9 @@ The most interesting metrics are:
* `vm_rows{type="indexdb"}` - the number of rows in inverted index. High value for this number usually mean high churn rate for time series.
* Sum of `vm_rows{type="storage/big"}` and `vm_rows{type="storage/small"}` - total number of `(timestamp, value)` data points
in the database.
* Sum of all the `vm_cache_size_bytes` metrics - the total size of all the caches in the database.
* `vm_allowed_memory_bytes` - the maximum allowed size for caches in the database. It is calculated as `system_memory * <-memory.allowedPercent> / 100`,
where `system_memory` is the amount of system memory and `-memory.allowedPercent` is the corresponding flag value.
* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
* `sum(vm_data_size_bytes)` - the total data size on disk.
### Troubleshooting
@@ -877,7 +914,9 @@ The most interesting metrics are:
* VictoriaMetrics requires free disk space for [merging data files to bigger ones](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
It may slow down when there is no enough free space left. So make sure `-storageDataPath` directory
has at least 20% of free space comparing to disk size.
has at least 20% of free space comparing to disk size. The remaining amount of free space
can be [monitored](#monitoring) via `vm_free_disk_space_bytes` metric. The total size of data
stored on the disk can be monitored via sum of `vm_data_size_bytes` metrics.
* If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors,
then just remove directories with broken parts. This will recover VictoriaMetrics at the cost
@@ -888,9 +927,22 @@ The most interesting metrics are:
If this removes gaps on the graphs, then it is likely data with timestamps older than `-search.cacheTimestampOffset`
is ingested into VictoriaMetrics. Make sure that data sources have synchronized time with VictoriaMetrics.
If the gaps are related to irregular intervals between samples, then try adjusting `-search.minStalenessInterval` command-line flag
to value close to the maximum interval between samples.
* If you are switching from InfluxDB or TimescaleDB, then take a look at `-search.maxStalenessInterval` command-line flag.
It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes
each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals.
* Metrics and labels leading to high cardinality or high churn rate can be determined at `/api/v1/status/tsdb` page.
See [these docs](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) for details.
VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
while `topN` equals to 10.
### Backfilling
VictoriaMetrics accepts historical data in arbitrary order of time.
VictoriaMetrics accepts historical data in arbitrary order of time via [any supported ingestion method](#how-to-import-time-series-data).
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
@@ -928,18 +980,16 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
See [these docs](https://github.com/netdata/netdata#integrations).
* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend.
See [this example](/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
* [Ansible role for installing VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
See [this example](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
* [Ansible role for installing single-node VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
* [Ansible role for installing cluster VictoriaMetrics](https://github.com/Slapper/ansible-victoriametrics-cluster-role).
## Roadmap
## Third-party contributions
* [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
* [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
* [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
* [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
* [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
* [Prometheus Oauth proxy](https://gitlab.com/optima_public/prometheus_oauth_proxy) - see [this article](https://medium.com/@richard.holly/powerful-saas-solution-for-detection-metrics-c67b9208d362) for details.
## Contacts
@@ -972,17 +1022,21 @@ We are open to third-party pull requests provided they follow [KISS design princ
Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.
## Third-party contributions
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
* [Prometheus Oauth proxy](https://gitlab.com/optima_public/prometheus_oauth_proxy) - see [this article](https://medium.com/@richard.holly/powerful-saas-solution-for-detection-metrics-c67b9208d362) for details.
## Reporting bugs
Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
## Roadmap
* [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
* [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
* [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
* [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
* [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.
## Victoria Metrics Logo
[Zip](VM_logo.zip) contains three folders with different image orientations (main color and inverted version).

BIN
docs/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

2
docs/robots.txt Normal file
View File

@@ -0,0 +1,2 @@
user-agent: *
allow: /

View File

@@ -1,7 +1,8 @@
## vmagent
`vmagent` is a tiny but brave agent, which helps you collecting metrics from various sources
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
or any other Prometheus-compatible storage system that supports `remote_write` protocol.
<img alt="vmagent" src="vmagent.png">
@@ -105,6 +106,14 @@ data among long-term remote storage, short-term remote storage and real-time ana
Note that each destination can receive its own subset of the collected data thanks to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
#### Prometheus remote_write proxy
`vmagent` may be used as a proxy for Prometheus data sent via Prometheus `remote_write` protocol. It can accept data via `remote_write` API
at `/api/v1/write` endpoint, apply relabeling and filtering and then proxy it to another `remote_write` systems.
The `vmagent` can be configured to encrypt the incoming `remote_write` requests with `-tls*` command-line flags.
Additionally, Basic Auth can be enabled for the incoming `remote_write` requests with `-httpAuth.*` command-line flags.
### How to collect metrics in Prometheus format
@@ -122,12 +131,21 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
* `static_configs` - for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
* `file_sd_configs` - for scraping targets defined in external files aka file-based service discover.
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
* `kubernetes_sd_configs` - for scraping targets in Kubernetes (k8s).
See [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) for details.
* `ec2_sd_configs` - for scraping targets in Amazone EC2.
See [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) for details.
`vmagent` doesn't support `role_arn` config param yet.
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
`vmagent` provides the following additional functionality `gce_sd_config`:
* if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
* if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
* if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
* `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
The following service discovery mechanisms will be added to `vmagent` soon:
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
@@ -173,7 +191,7 @@ Read more about relabeling in the following articles:
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format. This page also exports information on improperly configured scrape configs.
### Troubleshooting
@@ -181,11 +199,14 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
since `vmagent` establishes at least a single TCP connection per each target.
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
and `vmagent_remotewrite_pending_data_bytes` metric exported by `vmagent` at `/metrics` page constantly grows.
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
The directory can grow big when remote storage is unvailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
The directory can grow big when remote storage is unavailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
If you don't want sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
@@ -196,7 +217,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make vmagent` from the root folder of the repository.
It builds `vmagent` binary and puts it into the `bin` folder.
@@ -211,3 +232,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-vmagent
```

View File

@@ -140,14 +140,28 @@ Run `vmbackup -help` in order to see all the available options:
-dst string
Where to put the backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
-envflag.enable
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
-envflag.prefix string
Prefix for environment variables if -envflag.enable is set
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
Output for the logs. Supported values: stderr, stdout (default "stderr")
-maxBytesPerSecond int
The maximum upload speed. There is no limit if it is set to 0
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
-origin string
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
-snapshot.createURL string
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snaphsot/create
-snapshot.deleteURL string
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshotCreateURL if not provided. All created snaphosts will be automatically deleted.Example: http://victoriametrics:8428/snaphsot/delete
-snapshotName string
Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots
-storageDataPath string
@@ -164,7 +178,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make vmbackup` from the root folder of the repository.
It builds `vmbackup` binary and puts it into the `bin` folder.
@@ -179,3 +193,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-vmbackup
```

View File

@@ -47,12 +47,24 @@ Run `vmrestore -help` in order to see all the available options:
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-envflag.enable
Whether to enable reading flags from environment variables additionally to command line. Command line flag values have priority over values from environment vars. Flags are read only from command line if this flag isn't set
-envflag.prefix string
Prefix for environment variables if -envflag.enable is set
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
Output for the logs. Supported values: stderr, stdout (default "stderr")
-maxBytesPerSecond int
The maximum download speed. There is no limit if it is set to 0
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
Allowed percent of system memory VictoriaMetrics caches may occupy. Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. Too high value may evict too much data from OS page cache, which will result in higher disk IO usage (default 60)
-skipBackupCompleteCheck
Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file
-src string
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
-storageDataPath string
@@ -69,7 +81,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
2. Run `make vmrestore` from the root folder of the repository.
It builds `vmrestore` binary and puts it into the `bin` folder.
@@ -84,3 +96,10 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
```bash
ROOT_IMAGE=alpine:3.11 make package-vmrestore
```

29
go.mod
View File

@@ -1,27 +1,30 @@
module github.com/VictoriaMetrics/VictoriaMetrics
require (
cloud.google.com/go v0.54.0 // indirect
cloud.google.com/go v0.56.0 // indirect
cloud.google.com/go/storage v1.6.0
github.com/VictoriaMetrics/fastcache v1.5.7
github.com/VictoriaMetrics/metrics v1.11.2
github.com/aws/aws-sdk-go v1.29.22
github.com/aws/aws-sdk-go v1.30.13
github.com/cespare/xxhash/v2 v2.1.1
github.com/golang/protobuf v1.4.0 // indirect
github.com/golang/snappy v0.0.1
github.com/jmespath/go-jmespath v0.0.0-20200310193758-2437e8417af5 // indirect
github.com/klauspost/compress v1.10.3
github.com/valyala/fasthttp v1.9.0
github.com/valyala/fastjson v1.5.0
github.com/klauspost/compress v1.10.5
github.com/valyala/fasthttp v1.12.0
github.com/valyala/fastjson v1.5.1
github.com/valyala/fastrand v1.0.0
github.com/valyala/gozstd v1.6.4
github.com/valyala/gozstd v1.7.0
github.com/valyala/histogram v1.0.1
github.com/valyala/quicktemplate v1.4.1
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527
golang.org/x/tools v0.0.0-20200312153518-5e2df02acb1e // indirect
google.golang.org/api v0.20.0
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672 // indirect
google.golang.org/grpc v1.28.0 // indirect
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd // indirect
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f
golang.org/x/tools v0.0.0-20200423205358-59e73619c742 // indirect
google.golang.org/api v0.22.0
google.golang.org/appengine v1.6.6 // indirect
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215 // indirect
google.golang.org/grpc v1.29.1 // indirect
gopkg.in/yaml.v2 v2.2.8
)
go 1.12
go 1.13

87
go.sum
View File

@@ -9,8 +9,8 @@ cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6T
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
cloud.google.com/go v0.53.0 h1:MZQCQQaRwOrAcuKjiHWHrgKykt4fZyuwF2dtiG3fGW8=
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
cloud.google.com/go v0.54.0 h1:3ithwDMr7/3vpAMXiH+ZQnYbuIsh+OPhUPMFC9enmn0=
cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
cloud.google.com/go v0.56.0 h1:WRz29PgAsVEyPSDHyk+0fpEkwEFyfhHn+JbksT6gIL4=
cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0 h1:sAbMqjY1PEQKZBWfbu6Y6bsupJ9c4QdHnzg/VvYTLcE=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
@@ -40,8 +40,8 @@ github.com/VictoriaMetrics/metrics v1.11.2 h1:t/ceLP6SvagUqypCKU7cI7+tQn54+TIV/t
github.com/VictoriaMetrics/metrics v1.11.2/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
github.com/aws/aws-sdk-go v1.29.22 h1:3WmsCj3C30l6/4f50mPkDZoTPWSvaRCjcVJOWdCJoIE=
github.com/aws/aws-sdk-go v1.29.22/go.mod h1:1KvfttTE3SPKMpo8g2c6jL3ZKfXtFvKscTgahTma5Xg=
github.com/aws/aws-sdk-go v1.30.13 h1:fBDYaJzInlOHpoKFaTEze5MvZ/pw7mhYkzDE8HAmD74=
github.com/aws/aws-sdk-go v1.30.13/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
@@ -72,15 +72,21 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3 h1:gyjaxf+svBWX08ZjK86iN9geUJF0H6gp2IRKX6Nf6/I=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.4 h1:87PNWwrRvUSnqS4dlcBU/ftvOIBep4sYuBLlh6rX2wk=
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0 h1:oOuy+ugB+P/kBdUnG5QaMXSIyJ1q38wWSojYCb3z5VQ=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@@ -105,22 +111,20 @@ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5m
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.0.0-20200310193758-2437e8417af5 h1:uHQ3zN9bw90YSNUeGWrboinB/fjXSDA7dNSg7Dznw18=
github.com/jmespath/go-jmespath v0.0.0-20200310193758-2437e8417af5/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1 h1:6QPYqodiu3GuPL+7mfx+NwDdp2eTkp9IfEUpgAwUN0o=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.10.3 h1:OP96hzwJVBIHYU52pVTI6CczrxPvrGfgqF9N5eTO0Q8=
github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.4 h1:jFzIFaf586tquEB5EhzQG0HwGNSlgAJpG53G6Ss11wc=
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.5 h1:7q6vHIqubShURwQz8cQK6yIe/xC3IF0Vm7TGfqjewrc=
github.com/klauspost/compress v1.10.5/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
@@ -140,19 +144,21 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.2.0/go.mod h1:4vX61m6KN+xDduDNwXrhIAVZaZaZiQ1luJk8LWSxF3s=
github.com/valyala/fasthttp v1.9.0 h1:hNpmUdy/+ZXYpGy0OBfm7K0UQTzb73W0T0U4iJIVrMw=
github.com/valyala/fasthttp v1.9.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
github.com/valyala/fastjson v1.5.0 h1:DGrb4wEYso2HdGLyLmNoyNCQnCWfjd8yhghPv5/5YQg=
github.com/valyala/fastjson v1.5.0/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
github.com/valyala/fasthttp v1.12.0 h1:TsB9qkSeiMXB40ELWWSRMjlsE+8IkqXHcs01y2d9aw0=
github.com/valyala/fasthttp v1.12.0/go.mod h1:229t1eWu9UXTPmoUkbpN/fctKPBY4IJoFXQnxHGXy6E=
github.com/valyala/fastjson v1.5.1 h1:SXaQZVSwLjZOVhDEhjiCcDtnX0Feu7Z7A1+C5atpoHM=
github.com/valyala/fastjson v1.5.1/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/gozstd v1.6.4 h1:nFLddjEf90SFl5cVWyElSHozQDsbvLljPK703/skBS0=
github.com/valyala/gozstd v1.6.4/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
github.com/valyala/gozstd v1.7.0 h1:Ljh5c9zboqLhwTI33al32R72iCZfn0mCbVGcFWbGwRQ=
github.com/valyala/gozstd v1.7.0/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
github.com/valyala/histogram v1.0.1 h1:FzA7n2Tz/wKRMejgu3PV1vw3htAklTjjuoI6z3d4KDg=
github.com/valyala/histogram v1.0.1/go.mod h1:lQy0xA4wUz2+IUnf97SivorsJIp8FxsnRd6x25q7Mto=
github.com/valyala/quicktemplate v1.4.1 h1:tEtkSN6mTCJlYVT7As5x4wjtkk2hj2thsb0M+AcAVeM=
github.com/valyala/quicktemplate v1.4.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2 h1:75k/FF0Q2YM8QYo07VPddOLBslDt1MZOdEslOHvmzAs=
@@ -211,7 +217,6 @@ golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
@@ -219,8 +224,10 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b h1:0mm1VjtFUOIlE1SbDlwjYaDxZVDP2S5ou6y0gSgXHu8=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd h1:QPwSajcTUrFriMF1nJ3XzgoqakqQEsnZf9LdXdi2nkI=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -234,6 +241,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a h1:WXEvlFVvvGxCJLG6REjsT03iWnKLEWinaScsxF2Vm2o=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -253,8 +262,10 @@ golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4 h1:sfkvUWPNGwSV+8/fNqctR5lS2
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527 h1:uYVVQ9WP/Ds2ROhcaGPeIdVq0RIXVLwsHlnvJ+cT1So=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -292,9 +303,9 @@ golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapK
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200312153518-5e2df02acb1e h1:bQaoCUKSo4FqzkHQQxNMOY9NXAZm/8d11bD7PGEzOfE=
golang.org/x/tools v0.0.0-20200312153518-5e2df02acb1e/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
golang.org/x/tools v0.0.0-20200423205358-59e73619c742 h1:9OGWpORUXvk8AsaBJlpzzDx7Srv/rSK6rvjcsJq4rJo=
golang.org/x/tools v0.0.0-20200423205358-59e73619c742/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
@@ -312,12 +323,16 @@ google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.20.0 h1:jz2KixHX7EcCPiQrySzPdnYT7DbINAypCqKZ1Z7GM40=
google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.22.0 h1:J1Pl9P2lnmYFSJvgs70DKELqHNh8CNWXPbud4njEE2s=
google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc=
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
@@ -336,9 +351,9 @@ google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce h1:1mbrb1tUU+Zmt5C94IGKADBTJZjZXAd+BubWi7r9EiI=
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672 h1:jiDSspVssiikoRPFHT6pYrL+CL6/yIc3b9AuHO/4xik=
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215 h1:0Uz5jLJQioKgVozXa1gzGbzYxbb/rhQEVvSWxzw5oUs=
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
@@ -351,6 +366,14 @@ google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0 h1:bO/TA4OxCOummhSf10siHuG7vJOiwh7SpRpFZDkOgl4=
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
google.golang.org/grpc v1.29.1 h1:EC2SB8S04d2r73uptxphDSUG+kTKVgjRPF+N3xpxRB4=
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0 h1:qdOKuR/EIArgaWNjetjgTzgVTAZ+S/WXVrq9HW9zimw=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

View File

@@ -164,7 +164,7 @@ func TestMaxUpExponent(t *testing.T) {
e := maxUpExponent(v)
if e != eExpected {
t.Fatalf("unexpected e for v=%d; got %d; epxecting %d", v, e, eExpected)
t.Fatalf("unexpected e for v=%d; got %d; expecting %d", v, e, eExpected)
}
e = maxUpExponent(-v)
if e != eExpected {

View File

@@ -70,9 +70,9 @@ func TestMarshalInt64ArraySize(t *testing.T) {
v += 30e3 + int64(rand.NormFloat64()*1e3)
}
testMarshalInt64ArraySize(t, va, 1, 500, 1300)
testMarshalInt64ArraySize(t, va, 2, 500, 1400)
testMarshalInt64ArraySize(t, va, 3, 800, 1800)
testMarshalInt64ArraySize(t, va, 1, 180, 1400)
testMarshalInt64ArraySize(t, va, 2, 250, 1550)
testMarshalInt64ArraySize(t, va, 3, 600, 1800)
testMarshalInt64ArraySize(t, va, 4, 1300, 2100)
testMarshalInt64ArraySize(t, va, 5, 2000, 3200)
testMarshalInt64ArraySize(t, va, 6, 3000, 4800)

View File

@@ -214,6 +214,6 @@ func testMarshalInt64ArraySize(t *testing.T, va []int64, precisionBits uint8, mi
t.Fatalf("too big size for marshaled %d items with precisionBits %d: got %d; expecting %d", len(va), precisionBits, len(b), maxSizeExpected)
}
if len(b) < minSizeExpected {
t.Fatalf("too small size for marshaled %d items with precisionBits %d: got %d; epxecting %d", len(va), precisionBits, len(b), minSizeExpected)
t.Fatalf("too small size for marshaled %d items with precisionBits %d: got %d; expecting %d", len(va), precisionBits, len(b), minSizeExpected)
}
}

View File

@@ -7,9 +7,12 @@ import (
"strings"
)
var enable = flag.Bool("envflag.enable", false, "Whether to enable reading flags from environment variables additionally to command line. "+
"Command line flag values have priority over values from environment vars. "+
"Flags are read only from command line if this flag isn't set")
var (
enable = flag.Bool("envflag.enable", false, "Whether to enable reading flags from environment variables additionally to command line. "+
"Command line flag values have priority over values from environment vars. "+
"Flags are read only from command line if this flag isn't set")
prefix = flag.String("envflag.prefix", "", "Prefix for environment variables if -envflag.enable is set")
)
// Parse parses environment vars and command-line flags.
//
@@ -48,5 +51,6 @@ func Parse() {
func getEnvFlagName(s string) string {
// Substitute dots with underscores, since env var names cannot contain dots.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/311#issuecomment-586354129 for details.
return strings.ReplaceAll(s, ".", "_")
s = strings.ReplaceAll(s, ".", "_")
return *prefix + s
}

View File

@@ -5,7 +5,7 @@ import (
"strings"
)
// NewArray returns new Array with the given name and descprition.
// NewArray returns new Array with the given name and description.
func NewArray(name, description string) *Array {
var a Array
flag.Var(&a, name, description)

View File

@@ -1,10 +0,0 @@
package fs
import (
"os"
)
func fadviseSequentialRead(f *os.File, prefetch bool) error {
// TODO: implement this properly
return nil
}

View File

@@ -1,22 +0,0 @@
// +build linux freebsd
package fs
import (
"fmt"
"os"
"golang.org/x/sys/unix"
)
func fadviseSequentialRead(f *os.File, prefetch bool) error {
fd := int(f.Fd())
mode := unix.FADV_SEQUENTIAL
if prefetch {
mode |= unix.FADV_WILLNEED
}
if err := unix.Fadvise(int(fd), 0, 0, mode); err != nil {
return fmt.Errorf("error returned from unix.Fadvise(%d): %s", mode, err)
}
return nil
}

View File

@@ -67,15 +67,6 @@ func (r *ReaderAt) MustClose() {
readersCount.Dec()
}
// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
//
// if prefetch is set, then the OS is hinted to prefetch f data.
func (r *ReaderAt) MustFadviseSequentialRead(prefetch bool) {
if err := fadviseSequentialRead(r.f, prefetch); err != nil {
logger.Panicf("FATAL: error in fadviseSequentialRead(%q, %v): %s", r.f.Name(), prefetch, err)
}
}
// OpenReaderAt opens ReaderAt for reading from filename.
//
// MustClose must be called on the returned ReaderAt when it is no longer needed.
@@ -94,7 +85,6 @@ func OpenReaderAt(path string) (*ReaderAt, error) {
}
r.mmapData = data
}
r.MustFadviseSequentialRead(false)
readersCount.Inc()
return &r, nil
}

View File

@@ -32,7 +32,9 @@ var (
metricsAuthKey = flag.String("metricsAuthKey", "", "Auth key for /metrics. It overrides httpAuth settings")
pprofAuthKey = flag.String("pprofAuthKey", "", "Auth key for /debug/pprof. It overrides httpAuth settings")
disableResponseCompression = flag.Bool("http.disableResponseCompression", false, "Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth")
disableResponseCompression = flag.Bool("http.disableResponseCompression", false, "Disable compression of HTTP responses for saving CPU resources. By default compression is enabled to save network bandwidth")
maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, "The maximum duration for graceful shutdown of HTTP server. "+
"Highly loaded server may require increased value for graceful shutdown")
)
var (
@@ -130,25 +132,24 @@ func Stop(addr string) error {
if s == nil {
logger.Panicf("BUG: there is no http server at %q", addr)
}
ctx, cancelFunc := context.WithTimeout(context.Background(), 5*time.Second)
ctx, cancelFunc := context.WithTimeout(context.Background(), *maxGracefulShutdownDuration)
defer cancelFunc()
if err := s.Shutdown(ctx); err != nil {
return fmt.Errorf("cannot gracefully shutdown http server at %q: %s", addr, err)
return fmt.Errorf("cannot gracefully shutdown http server at %q in %.3fs: %s", addr, maxGracefulShutdownDuration.Seconds(), err)
}
return nil
}
func gzipHandler(rh RequestHandler) http.HandlerFunc {
hf := func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
w = maybeGzipResponseWriter(w, r)
handlerWrapper(w, r, rh)
if zrw, ok := w.(*gzipResponseWriter); ok {
if err := zrw.Close(); err != nil && !isTrivialNetworkError(err) {
logger.Errorf("gzipResponseWriter.Close: %s", err)
logger.Warnf("gzipResponseWriter.Close: %s", err)
}
}
}
return http.HandlerFunc(hf)
}
var metricsHandlerDuration = metrics.NewHistogram(`vm_http_request_duration_seconds{path="/metrics"}`)
@@ -328,10 +329,10 @@ func (zrw *gzipResponseWriter) WriteHeader(statusCode int) {
// Implements http.Flusher
func (zrw *gzipResponseWriter) Flush() {
if err := zrw.bw.Flush(); err != nil && !isTrivialNetworkError(err) {
logger.Errorf("gzipResponseWriter.Flush (buffer): %s", err)
logger.Warnf("gzipResponseWriter.Flush (buffer): %s", err)
}
if err := zrw.zw.Flush(); err != nil && !isTrivialNetworkError(err) {
logger.Errorf("gzipResponseWriter.Flush (gzip): %s", err)
logger.Warnf("gzipResponseWriter.Flush (gzip): %s", err)
}
if fw, ok := zrw.ResponseWriter.(http.Flusher); ok {
fw.Flush()
@@ -418,7 +419,7 @@ var (
// Errorf writes formatted error message to w and to logger.
func Errorf(w http.ResponseWriter, format string, args ...interface{}) {
errStr := fmt.Sprintf(format, args...)
logger.ErrorfSkipframes(1, "%s", errStr)
logger.WarnfSkipframes(1, "%s", errStr)
// Extract statusCode from args
statusCode := http.StatusBadRequest
@@ -451,3 +452,8 @@ func isTrivialNetworkError(err error) bool {
}
return false
}
// IsTLS indicates is tls enabled or not
func IsTLS() bool {
return *tlsEnable
}

View File

@@ -18,7 +18,7 @@ import (
)
var (
loggerLevel = flag.String("loggerLevel", "INFO", "Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC")
loggerLevel = flag.String("loggerLevel", "INFO", "Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC")
loggerFormat = flag.String("loggerFormat", "default", "Format for logs. Possible values: default, json")
loggerOutput = flag.String("loggerOutput", "stderr", "Output for the logs. Supported values: stderr, stdout")
)
@@ -51,10 +51,10 @@ var output io.Writer = os.Stderr
func validateLoggerLevel() {
switch *loggerLevel {
case "INFO", "ERROR", "FATAL", "PANIC":
case "INFO", "WARN", "ERROR", "FATAL", "PANIC":
default:
// We cannot use logger.Panicf here, since the logger isn't initialized yet.
panic(fmt.Errorf("FATAL: unsupported `-loggerLevel` value: %q; supported values are: INFO, ERROR, FATAL, PANIC", *loggerLevel))
panic(fmt.Errorf("FATAL: unsupported `-loggerLevel` value: %q; supported values are: INFO, WARN, ERROR, FATAL, PANIC", *loggerLevel))
}
}
@@ -79,11 +79,21 @@ func Infof(format string, args ...interface{}) {
logLevel("INFO", format, args...)
}
// Warnf logs warn message.
func Warnf(format string, args ...interface{}) {
logLevel("WARN", format, args...)
}
// Errorf logs error message.
func Errorf(format string, args ...interface{}) {
logLevel("ERROR", format, args...)
}
// WarnfSkipframes logs warn message and skips the given number of frames for the caller.
func WarnfSkipframes(skipframes int, format string, args ...interface{}) {
logLevelSkipframes(skipframes, "WARN", format, args...)
}
// ErrorfSkipframes logs error message and skips the given number of frames for the caller.
func ErrorfSkipframes(skipframes int, format string, args ...interface{}) {
logLevelSkipframes(skipframes, "ERROR", format, args...)
@@ -185,6 +195,13 @@ var mu sync.Mutex
func shouldSkipLog(level string) bool {
switch *loggerLevel {
case "WARN":
switch level {
case "WARN", "ERROR", "FATAL", "PANIC":
return false
default:
return true
}
case "ERROR":
switch level {
case "ERROR", "FATAL", "PANIC":

View File

@@ -8,7 +8,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
var allowedMemPercent = flag.Float64("memory.allowedPercent", 60, "Allowed percent of system memory VictoriaMetrics caches may occupy")
var allowedMemPercent = flag.Float64("memory.allowedPercent", 60, "Allowed percent of system memory VictoriaMetrics caches may occupy. "+
"Too low value may increase cache miss rate, which usually results in higher CPU and disk IO usage. "+
"Too high value may evict too much data from OS page cache, which will result in higher disk IO usage")
var (
allowedMemory int

View File

@@ -204,8 +204,6 @@ func (idxbc *indexBlockCache) MustClose() {
close(idxbc.cleanerStopCh)
idxbc.cleanerWG.Wait()
atomic.AddUint64(&indexBlockCacheRequests, idxbc.requests)
atomic.AddUint64(&indexBlockCacheMisses, idxbc.misses)
// It is safe returning idxbc.m to pool, since the Reset must be called
// when the idxbc entries are no longer accessed by concurrent goroutines.
for _, idxbe := range idxbc.m {
@@ -240,11 +238,6 @@ func (idxbc *indexBlockCache) cleanByTimeout() {
idxbc.mu.Unlock()
}
var (
indexBlockCacheRequests uint64
indexBlockCacheMisses uint64
)
func (idxbc *indexBlockCache) Get(k uint64) *indexBlock {
atomic.AddUint64(&idxbc.requests, 1)
idxbc.mu.RLock()
@@ -361,8 +354,6 @@ func (ibc *inmemoryBlockCache) MustClose() {
close(ibc.cleanerStopCh)
ibc.cleanerWG.Wait()
atomic.AddUint64(&inmemoryBlockCacheRequests, ibc.requests)
atomic.AddUint64(&inmemoryBlockCacheMisses, ibc.misses)
// It is safe returning ibc.m entries to pool, since the Reset function may be called
// only if no other goroutines access ibc entries.
for _, ibe := range ibc.m {
@@ -397,11 +388,6 @@ func (ibc *inmemoryBlockCache) cleanByTimeout() {
ibc.mu.Unlock()
}
var (
inmemoryBlockCacheRequests uint64
inmemoryBlockCacheMisses uint64
)
func (ibc *inmemoryBlockCache) Get(k inmemoryBlockCacheKey) *inmemoryBlock {
atomic.AddUint64(&ibc.requests, 1)

View File

@@ -19,6 +19,16 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
)
// These are global counters for cache requests and misses for parts
// which were already merged into another parts.
var (
historicalDataBlockCacheRequests uint64
historicalDataBlockCacheMisses uint64
historicalIndexBlockCacheRequests uint64
historicalIndexBlockCacheMisses uint64
)
// maxParts is the maximum number of parts in the table.
//
// This number may be reached when the insertion pace outreaches merger pace.
@@ -326,11 +336,11 @@ func (tb *Table) UpdateMetrics(m *TableMetrics) {
}
tb.partsLock.Unlock()
atomic.AddUint64(&m.DataBlocksCacheRequests, atomic.LoadUint64(&inmemoryBlockCacheRequests))
atomic.AddUint64(&m.DataBlocksCacheMisses, atomic.LoadUint64(&inmemoryBlockCacheMisses))
m.DataBlocksCacheRequests += atomic.LoadUint64(&historicalDataBlockCacheRequests)
m.DataBlocksCacheMisses += atomic.LoadUint64(&historicalDataBlockCacheMisses)
atomic.AddUint64(&m.IndexBlocksCacheRequests, atomic.LoadUint64(&indexBlockCacheRequests))
atomic.AddUint64(&m.IndexBlocksCacheMisses, atomic.LoadUint64(&indexBlockCacheMisses))
m.IndexBlocksCacheRequests += atomic.LoadUint64(&historicalIndexBlockCacheRequests)
m.IndexBlocksCacheMisses += atomic.LoadUint64(&historicalIndexBlockCacheMisses)
}
// AddItems adds the given items to the tb.
@@ -1300,6 +1310,10 @@ func removeParts(pws []*partWrapper, partsToRemove map[*partWrapper]bool) ([]*pa
dst := pws[:0]
for _, pw := range pws {
if partsToRemove[pw] {
atomic.AddUint64(&historicalDataBlockCacheRequests, pw.p.ibCache.Requests())
atomic.AddUint64(&historicalDataBlockCacheMisses, pw.p.ibCache.Misses())
atomic.AddUint64(&historicalIndexBlockCacheRequests, pw.p.idxbCache.Requests())
atomic.AddUint64(&historicalIndexBlockCacheMisses, pw.p.idxbCache.Misses())
removedParts++
continue
}

View File

@@ -57,6 +57,10 @@ var rollupFuncs = map[string]bool{
"aggr_over_time": true,
"hoeffding_bound_upper": true,
"hoeffding_bound_lower": true,
// `timestamp` func has been moved here because it must work properly with offsets and samples unaligned to the current step.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415 for details.
"timestamp": true,
}
// IsRollupFunc returns whether funcName is known rollup function.

View File

@@ -32,9 +32,9 @@ var transformFuncs = map[string]bool{
"sort_desc": true,
"sqrt": true,
"time": true,
"timestamp": true,
"vector": true,
"year": true,
// "timestamp" has been moved to rollup funcs. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415
"vector": true,
"year": true,
// New funcs from MetricsQL
"label_set": true,

135
lib/promauth/config.go Normal file
View File

@@ -0,0 +1,135 @@
package promauth
import (
"crypto/tls"
"crypto/x509"
"encoding/base64"
"fmt"
"io/ioutil"
)
// TLSConfig represents TLS config.
//
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config
type TLSConfig struct {
CAFile string `yaml:"ca_file"`
CertFile string `yaml:"cert_file"`
KeyFile string `yaml:"key_file"`
ServerName string `yaml:"server_name"`
InsecureSkipVerify bool `yaml:"insecure_skip_verify"`
}
// BasicAuthConfig represents basic auth config.
type BasicAuthConfig struct {
Username string `yaml:"username"`
Password string `yaml:"password"`
PasswordFile string `yaml:"password_file"`
}
// Config is auth config.
type Config struct {
// Optional `Authorization` header.
//
// It may contain `Basic ....` or `Bearer ....` string.
Authorization string
// Optional TLS config
TLSRootCA *x509.CertPool
TLSCertificate *tls.Certificate
TLSServerName string
TLSInsecureSkipVerify bool
}
// NewTLSConfig returns new TLS config for the given ac.
func (ac *Config) NewTLSConfig() *tls.Config {
tlsCfg := &tls.Config{
RootCAs: ac.TLSRootCA,
ClientSessionCache: tls.NewLRUClientSessionCache(0),
}
if ac.TLSCertificate != nil {
tlsCfg.Certificates = []tls.Certificate{*ac.TLSCertificate}
}
tlsCfg.ServerName = ac.TLSServerName
tlsCfg.InsecureSkipVerify = ac.TLSInsecureSkipVerify
return tlsCfg
}
// NewConfig creates auth config from the given args.
func NewConfig(baseDir string, basicAuth *BasicAuthConfig, bearerToken, bearerTokenFile string, tlsConfig *TLSConfig) (*Config, error) {
var authorization string
if basicAuth != nil {
if basicAuth.Username == "" {
return nil, fmt.Errorf("missing `username` in `basic_auth` section")
}
username := basicAuth.Username
password := basicAuth.Password
if basicAuth.PasswordFile != "" {
if basicAuth.Password != "" {
return nil, fmt.Errorf("both `password`=%q and `password_file`=%q are set in `basic_auth` section", basicAuth.Password, basicAuth.PasswordFile)
}
path := getFilepath(baseDir, basicAuth.PasswordFile)
pass, err := readPasswordFromFile(path)
if err != nil {
return nil, fmt.Errorf("cannot read password from `password_file`=%q set in `basic_auth` section: %s", basicAuth.PasswordFile, err)
}
password = pass
}
// See https://en.wikipedia.org/wiki/Basic_access_authentication
token := username + ":" + password
token64 := base64.StdEncoding.EncodeToString([]byte(token))
authorization = "Basic " + token64
}
if bearerTokenFile != "" {
if bearerToken != "" {
return nil, fmt.Errorf("both `bearer_token`=%q and `bearer_token_file`=%q are set", bearerToken, bearerTokenFile)
}
path := getFilepath(baseDir, bearerTokenFile)
token, err := readPasswordFromFile(path)
if err != nil {
return nil, fmt.Errorf("cannot read bearer token from `bearer_token_file`=%q: %s", bearerTokenFile, err)
}
bearerToken = token
}
if bearerToken != "" {
if authorization != "" {
return nil, fmt.Errorf("cannot use both `basic_auth` and `bearer_token`")
}
authorization = "Bearer " + bearerToken
}
var tlsRootCA *x509.CertPool
var tlsCertificate *tls.Certificate
tlsServerName := ""
tlsInsecureSkipVerify := false
if tlsConfig != nil {
tlsServerName = tlsConfig.ServerName
tlsInsecureSkipVerify = tlsConfig.InsecureSkipVerify
if tlsConfig.CertFile != "" || tlsConfig.KeyFile != "" {
certPath := getFilepath(baseDir, tlsConfig.CertFile)
keyPath := getFilepath(baseDir, tlsConfig.KeyFile)
cert, err := tls.LoadX509KeyPair(certPath, keyPath)
if err != nil {
return nil, fmt.Errorf("cannot load TLS certificate from `cert_file`=%q, `key_file`=%q: %s", tlsConfig.CertFile, tlsConfig.KeyFile, err)
}
tlsCertificate = &cert
}
if tlsConfig.CAFile != "" {
path := getFilepath(baseDir, tlsConfig.CAFile)
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("cannot read `ca_file` %q: %s", tlsConfig.CAFile, err)
}
tlsRootCA = x509.NewCertPool()
if !tlsRootCA.AppendCertsFromPEM(data) {
return nil, fmt.Errorf("cannot parse data from `ca_file` %q", tlsConfig.CAFile)
}
}
}
ac := &Config{
Authorization: authorization,
TLSRootCA: tlsRootCA,
TLSCertificate: tlsCertificate,
TLSServerName: tlsServerName,
TLSInsecureSkipVerify: tlsInsecureSkipVerify,
}
return ac, nil
}

24
lib/promauth/util.go Normal file
View File

@@ -0,0 +1,24 @@
package promauth
import (
"io/ioutil"
"path/filepath"
"strings"
"unicode"
)
func getFilepath(baseDir, path string) string {
if filepath.IsAbs(path) {
return path
}
return filepath.Join(baseDir, path)
}
func readPasswordFromFile(path string) (string, error) {
data, err := ioutil.ReadFile(path)
if err != nil {
return "", err
}
pass := strings.TrimRightFunc(string(data), unicode.IsSpace)
return pass, nil
}

View File

@@ -69,6 +69,20 @@ func removeEmptyLabels(labels []prompbmarshal.Label, labelsOffset int) []prompbm
return dst
}
// RemoveMetaLabels removes all the `__meta_` labels from src and puts the rest of labels to dst.
//
// See https://www.robustperception.io/life-of-a-label fo details.
func RemoveMetaLabels(dst, src []prompbmarshal.Label) []prompbmarshal.Label {
for i := range src {
label := &src[i]
if strings.HasPrefix(label.Name, "__meta_") {
continue
}
dst = append(dst, *label)
}
return dst
}
// FinalizeLabels finalizes labels according to relabel_config rules.
//
// It renames `__address__` to `instance` and removes labels with "__" in the beginning.
@@ -266,3 +280,14 @@ func GetLabelByName(labels []prompbmarshal.Label, name string) *prompbmarshal.La
}
return nil
}
// GetLabelValueByName returns value for label with the given name from labels.
//
// It returns empty string for non-existing label.
func GetLabelValueByName(labels []prompbmarshal.Label, name string) string {
label := GetLabelByName(labels, name)
if label == nil {
return ""
}
return label.Value
}

View File

@@ -628,3 +628,50 @@ func TestFinalizeLabels(t *testing.T) {
},
})
}
func TestRemoveMetaLabels(t *testing.T) {
f := func(labels, resultExpected []prompbmarshal.Label) {
t.Helper()
result := RemoveMetaLabels(nil, labels)
if !reflect.DeepEqual(result, resultExpected) {
t.Fatalf("unexpected result of RemoveMetaLabels;\ngot\n%v\nwant\n%v", result, resultExpected)
}
}
f(nil, nil)
f([]prompbmarshal.Label{
{
Name: "foo",
Value: "bar",
},
}, []prompbmarshal.Label{
{
Name: "foo",
Value: "bar",
},
})
f([]prompbmarshal.Label{
{
Name: "__meta_foo",
Value: "bar",
},
}, nil)
f([]prompbmarshal.Label{
{
Name: "__meta_foo",
Value: "bdffr",
},
{
Name: "foo",
Value: "bar",
},
{
Name: "__meta_xxx",
Value: "basd",
},
}, []prompbmarshal.Label{
{
Name: "foo",
Value: "bar",
},
})
}

View File

@@ -36,7 +36,7 @@ func newClient(sw *ScrapeWork) *client {
isTLS := string(u.Scheme()) == "https"
var tlsCfg *tls.Config
if isTLS {
tlsCfg = getTLSConfig(sw)
tlsCfg = sw.AuthConfig.NewTLSConfig()
}
if !strings.Contains(host, ":") {
if !isTLS {
@@ -64,7 +64,7 @@ func newClient(sw *ScrapeWork) *client {
scrapeURL: sw.ScrapeURL,
host: host,
requestURI: requestURI,
authHeader: sw.Authorization,
authHeader: sw.AuthConfig.Authorization,
}
}
@@ -79,8 +79,8 @@ func (c *client) ReadData(dst []byte) ([]byte, error) {
req.Header.Set("Authorization", c.authHeader)
}
resp := fasthttp.AcquireResponse()
// There is no need in calling DoTimeout, since the timeout is already set in c.hc.ReadTimeout.
err := c.hc.Do(req, resp)
err := doRequestWithPossibleRetry(c.hc, req, resp)
fasthttp.ReleaseRequest(req)
if err != nil {
fasthttp.ReleaseResponse(resp)
@@ -121,15 +121,15 @@ var (
scrapesGunzipFailed = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`)
)
func getTLSConfig(sw *ScrapeWork) *tls.Config {
tlsCfg := &tls.Config{
RootCAs: sw.TLSRootCA,
ClientSessionCache: tls.NewLRUClientSessionCache(0),
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response) error {
// There is no need in calling DoTimeout, since the timeout must be already set in hc.ReadTimeout.
err := hc.Do(req, resp)
if err == nil {
return nil
}
if sw.TLSCertificate != nil {
tlsCfg.Certificates = []tls.Certificate{*sw.TLSCertificate}
if err != fasthttp.ErrConnectionClosed {
return err
}
tlsCfg.ServerName = sw.TLSServerName
tlsCfg.InsecureSkipVerify = sw.TLSInsecureSkipVerify
return tlsCfg
// Retry request if the server closed the keep-alive connection during the first attempt.
return hc.Do(req, resp)
}

Some files were not shown because too many files have changed in this diff Show More