Compare commits

...

190 Commits

Author SHA1 Message Date
Aliaksandr Valialkin
0b1e877a7d docs/Single-server-VictoriaMetrics.md: sync with README.md 2020-03-03 21:39:05 +02:00
Aliaksandr Valialkin
0ba8ee6022 README.md: mention -search.cacheTimestampOffset in Backfilling section 2020-03-03 21:38:39 +02:00
Aliaksandr Valialkin
9a944fd169 lib/promscrape: consistency renaming: stopCh -> globalStopCh 2020-03-03 20:08:08 +02:00
Aliaksandr Valialkin
032c88561b app/vminsert/prompush: limit memory usage by pushing promscrape data in smaller blocks 2020-03-03 19:58:54 +02:00
Aliaksandr Valialkin
76036c1897 app/vmagent: add -remoteWrite.maxDiskUsagePerURL for limiting the maximum disk usage for each -remoteWrite.url buffer
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/352
2020-03-03 19:49:07 +02:00
Aliaksandr Valialkin
c31d640eb9 app/vmagent/remotewrite: do not reset empty relabelCtx 2020-03-03 15:01:03 +02:00
Aliaksandr Valialkin
02b55c72dc app/vmagent: add -remoteWrite.urlRelabelConfig for applying individual relabeling for each -remoteWrite.url
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/320
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/308
2020-03-03 13:12:16 +02:00
Aliaksandr Valialkin
1d7ab78b55 lib/protoparser/prometheus: allow trailing comma in tags list
The trailing comma is generated by cloudwatch exporter.
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/350
2020-03-02 22:22:09 +02:00
Aliaksandr Valialkin
7d178a40bd app/vmselect/prometheus: do not add __name__!= filter when searching for all the matching metric names via /api/v1/label/__name__/values with non-empty label filter
This should reduce query time.
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/343
2020-02-28 23:35:55 +02:00
Aliaksandr Valialkin
43754ff420 README.md: put https://gitlab.com/optima_public/prometheus_oauth_proxy in third-party contributions section 2020-02-28 21:23:34 +02:00
Aliaksandr Valialkin
b785429ddb lib/protoparser: metrics renaming: vm_protoparser_<type>_* -> vm_protoparser_*{type="<type>"}
This should improve composability of these metrics in PromQL queries
2020-02-28 20:20:10 +02:00
Aliaksandr Valialkin
f9a584b5c1 app/vmagent/remotewrite: yet another typo fix 2020-02-28 20:05:55 +02:00
Aliaksandr Valialkin
e22fdc1073 lib/persistentqueue: reset chunk file when the persistent queue is empty 2020-02-28 20:05:53 +02:00
Aliaksandr Valialkin
b9b46cb8dc app/vmagent/remotewrite: typo fix 2020-02-28 19:03:16 +02:00
Aliaksandr Valialkin
db6f4e4af1 app/vmagent/remotewrite: limit memory usage when big scrape blocks are pushed to remote storage 2020-02-28 18:58:01 +02:00
Aliaksandr Valialkin
8cc88db38d docs/Single-server-VictoriaMetrics.md: sync with README.md 2020-02-28 12:58:32 +02:00
Aliaksandr Valialkin
f3c28d2ae4 README.md: typo fix 2020-02-28 12:58:31 +02:00
Aliaksandr Valialkin
57528ca31c docs: add a doc for vmagent 2020-02-28 12:23:56 +02:00
Aliaksandr Valialkin
5701b2f7bb app/vmselect/prometheus: properly pass filter for labelName=__name__ in labelValuesWithMatches
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/343
2020-02-28 12:18:14 +02:00
Aliaksandr Valialkin
18af31a4c2 all: properly split vm_deduplicated_samples_total among cluster components
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/345
2020-02-27 23:48:07 +02:00
Aliaksandr Valialkin
6819db5686 lib/envflag: typo fix in docs to -envflag.enable: envoronment->environment 2020-02-27 21:47:58 +02:00
Aliaksandr Valialkin
63a88a619b deployment/docker: update Go builder from Go1.13.8 to Go1.14.0 2020-02-26 22:15:44 +02:00
Aliaksandr Valialkin
c458b521a2 app/vmagent: allow setting -httpListenAddr to empty string in order to disable listening for http requests
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/340
2020-02-26 20:58:11 +02:00
Aliaksandr Valialkin
b459919250 make vendor-update 2020-02-26 20:45:27 +02:00
Aliaksandr Valialkin
cc5fe0b315 vendor: update github.com/VictoriaMetrics/metrics from v1.10.1 to v1.11.0 2020-02-26 20:41:02 +02:00
Aliaksandr Valialkin
117c76311c app/vmagent/README.md: list service discovery mechanisms, which will be added soon
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/334
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/330
2020-02-26 19:27:08 +02:00
Aliaksandr Valialkin
b63e4464f4 lib/promscrape: properly reload new configs on SIGHUP
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/335
2020-02-26 13:54:00 +02:00
Edouard Hur
3ad36134f6 Readme markdown linting (#338)
* fixed MD009/no-trailing-spaces

* fixed MD033/no-inline-html: Inline HTML

* fixed MD012/no-multiple-blanks

* fixed MD007/ul-indent

* fixed MD004/ul-style

* fixed MD031/blanks-around-fences

* fixed MD040/fenced-code-language

* fixed MD032/blanks-around-lists

* fixed MD026/no-trailing-punctuation
2020-02-26 13:21:19 +02:00
Edouard Hur
1f0007d0b1 Readme envvars (#332)
* add details about env vars config

* add env var to table of contents

* remove unnecessary words
2020-02-25 22:41:34 +02:00
Aliaksandr Valialkin
6739c2749d lib/promscrape: go fmt 2020-02-25 20:56:44 +02:00
Aliaksandr Valialkin
7a33da8fea lib/promscrape: do not add missing port to __address__ label in order to be consistent with Prometheus behavior
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/331
2020-02-25 20:49:50 +02:00
Aliaksandr Valialkin
be37d762cd app/vmagent: add -remoteWrite.maxBlockSize command-line flag for limiting the maximum size of unpacked block to send to remote storage 2020-02-25 19:57:47 +02:00
Aliaksandr Valialkin
4e24839a2c app/vmagent: do not allow sending unpacked requests with sizes exceeding -maxInsertRequestSize 2020-02-25 19:34:41 +02:00
Aliaksandr Valialkin
6386aeb1e0 app/vmagent: add ability to accept Influx line protocol data via TCP and UDP
Just set `-influxListenAddr` command-line flag

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/333
2020-02-25 19:12:49 +02:00
Aliaksandr Valialkin
e453880084 app/vmagent/README.md: mention that vmagent exposes target statuses at /targets page 2020-02-25 18:15:58 +02:00
Aliaksandr Valialkin
4c4448b66e app/vminsert: add /targets handler, which exposes Prometheus targets defined in -promscrape.config file 2020-02-25 18:13:11 +02:00
Aliaksandr Valialkin
7ef7c9368e lib/fs: typo fix: read blocks bigger than 8KB via pread() call instead of using mmap 2020-02-25 18:05:06 +02:00
Aliaksandr Valialkin
e1ef72af01 app/vmagent: logo fix 2020-02-25 00:09:19 +02:00
Aliaksandr Valialkin
56c70fe856 app/vmagent: update docs 2020-02-25 00:09:18 +02:00
Aliaksandr Valialkin
e7e4aa5243 app/vmagent/README.md: small fixes 2020-02-24 21:25:38 +02:00
Aliaksandr Valialkin
fed2959658 lib/envflag: substitute dots with underscores in env var names if -envflag.enable is set
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/311
2020-02-24 21:14:44 +02:00
Aliaksandr Valialkin
ae51300973 app/vmselect/promql: properly take into account the first datapoint when calculating rollup_candlestick
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309
2020-02-24 13:24:30 +02:00
Aliaksandr Valialkin
e65ec88779 app/vmselect/promql: do not take into account values outside the current window in rollup_candlestick
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309
2020-02-23 18:03:57 +02:00
Yaroslav
a6d0645539 fix rollupOpen(), rollupHigh(), rollupLow() functions (#328) 2020-02-23 18:01:53 +02:00
Aliaksandr Valialkin
04762344c6 app/vmagent: initial implementation for vmagent 2020-02-23 13:36:03 +02:00
Aliaksandr Valialkin
4e905d6501 vendor: update github.com/valyala/fastjson from v1.4.5 to v1.5.0 2020-02-23 10:06:00 +02:00
kreedom
49390b8dbc [vmalert] integration with AlertManager (#325) 2020-02-21 23:15:05 +02:00
Aliaksandr Valialkin
2f55cabaa4 app/vmselect/promql: log when rollupResult cache is cleared 2020-02-21 20:07:01 +02:00
Aliaksandr Valialkin
d21cb43e48 lib/storage: add vm_ prefix to deduplicated_samples_total metric to be conistent with other metrics 2020-02-21 19:33:59 +02:00
Aliaksandr Valialkin
ec9bf39b5b app/vmselect: add -search.cacheTimestampOffset command-line flag
This flag can be used for removing gaps on graphs if the difference between the current time
and the timestamps from the ingested data exceeds 5 minutes.

This is the case when the time between data sources and VictoriaMetrics is improperly synchronized.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/312
2020-02-21 13:58:06 +02:00
Aliaksandr Valialkin
539139391c app/vmselect: add /internl/resetRollupResultCache handler for resetting response cache
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/312
2020-02-21 13:58:05 +02:00
Aliaksandr Valialkin
5431f9cd4e deployment/docker: update Go builder from v1.13.7 to v1.13.8 2020-02-20 19:46:20 +02:00
kreedom
3c06179184 basic vmalert backbone (#317)
* basic vmalert backbone

* Resolve code review comments for vmalert backbone

* Second review fixes for vmalert backbone
2020-02-16 20:59:02 +02:00
Aliaksandr Valialkin
71a52f5f90 lib/protoparser/prometheus: skip leading whitespace from tag names 2020-02-16 19:06:33 +02:00
Aliaksandr Valialkin
e7ba18b0d9 vendor: make vendor-udpate 2020-02-16 16:11:24 +02:00
Aliaksandr Valialkin
ce15cecae4 lib/storage: typo fix 2020-02-16 15:53:44 +02:00
Aliaksandr Valialkin
32e153e834 lib/storage: prevent from clobbering nin-nil lastError in Storage.add 2020-02-16 15:51:26 +02:00
Aliaksandr Valialkin
7b1c7051a3 app/vmselect: add sort_by_label(q, label) and sort_by_label_desc(q, label) functions
This is implementation of https://github.com/prometheus/prometheus/pull/1533 for VictoriaMetrics.
2020-02-13 17:01:37 +02:00
Aliaksandr Valialkin
7836ad8907 lib/mergeset: skip createing temporary part objects when merging source inmemory parts
This should reduce CPU usage when adding new entries to inverted index.
This should alos prevent from creating stalled cleaner goroutines for the created temporary parts,
since they were never closed.

This should fix the following issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/316 .
2020-02-13 14:09:32 +02:00
Aliaksandr Valialkin
eceaf13e5e lib/{storage,mergeset}: use time.Ticker instead of time.Timer where appropriate
It has been appeared that time.Timer was used in places where time.Ticker must be used instead.
This could result in blocked goroutines as in the https://github.com/VictoriaMetrics/VictoriaMetrics/issues/316 .
2020-02-13 13:10:07 +02:00
Aliaksandr Valialkin
8162d58dbd make vendor-update 2020-02-10 23:28:15 +02:00
Aliaksandr Valialkin
848d5da0be vendor: update github.com/VictoriaMetrics/metrics from v1.9.3 to v1.10.1 2020-02-10 23:08:38 +02:00
Aliaksandr Valialkin
4cc0163c7c docs: migrate ExtendedPromQL->MetricsQL in order to be more consistent 2020-02-10 23:02:43 +02:00
Aliaksandr Valialkin
a801a1a6e7 .github/ISSUE_TEMPLATE: ask for command-line flags and Prometheus logs 2020-02-10 22:56:17 +02:00
Aliaksandr Valialkin
02e852854a README.md: refer to the article about data deletion via relabeling 2020-02-10 22:46:52 +02:00
Aliaksandr Valialkin
9e6e2319b9 README.md: mention that flags may be read from env vars if -envflag.enable command-line flag is set 2020-02-10 16:20:15 +02:00
Aliaksandr Valialkin
025297f15d lib/envflag: check for incorrect flag values read from environment vars 2020-02-10 16:08:10 +02:00
Aliaksandr Valialkin
5d207b2025 lib/envflag: add -envflag.enable command-line flag for enabling reading flags from environment vars
By default flags are read only from command line. They can be read from environment vars if `-envflag.enable` is set.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/311
2020-02-10 16:02:37 +02:00
Aliaksandr Valialkin
8466ab0034 all: allow setting flags via environment vars
Now flags can be set via environment vars with the same names as flags.
Command-line flags override flags set via env vars.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/311
2020-02-10 13:29:13 +02:00
Aliaksandr Valialkin
e210cd9da1 lib/storage: move -dedup.minScrapeInterval flag outside lib/storage, so it doesnt show up in vminsert in cluster version 2020-02-10 13:09:51 +02:00
Aliaksandr Valialkin
6db573470c docs/Single-server-VictoriaMetrics.md: sync with README.md 2020-02-07 00:02:34 +02:00
Ryota Arai
fffe5d4ba4 Fix a typo in README (selfScrapeInterval) (#310) 2020-02-06 13:14:31 +02:00
Aliaksandr Valialkin
a6c6a2debc app/vmselect/promql: do not add step to range end, since this hack became obsolete since commit 9e1119dab8 2020-02-05 21:22:19 +02:00
Aliaksandr Valialkin
78b62dee87 app/vmselect/promql: properly adjust time range for data to select
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309
2020-02-05 21:22:18 +02:00
Aliaksandr Valialkin
366693b9f1 app/vmselect: unconditionally offset -step to rollup_candlestick. This makes results more consistent 2020-02-04 23:32:12 +02:00
Aliaksandr Valialkin
525101339e app/vmselect/promql: automatically apply offset -step to rollup_candlestick function in order to obtain the expected OHLC results
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309
2020-02-04 23:24:35 +02:00
Aliaksandr Valialkin
ada6a3da8d app/vmselect/promql: adjust rollup_candlestick calculations to the exepcted results
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309
2020-02-04 22:42:13 +02:00
Aliaksandr Valialkin
40c6ae2952 lib/logger: initialize output to os.Stderr by default 2020-02-04 22:40:44 +02:00
Aliaksandr Valialkin
cff0cb297c Do not require checking for errors returned from fmt.Fprint
This fixes `make errcheck` error found in lib/logger
2020-02-04 22:03:37 +02:00
Aliaksandr Valialkin
e0a4c37fc1 lib/logger: add -loggerOutput command-line flag
This flag allows changing log output from `stderr` to `stdout` if `-loggerOutput=stdout` is set.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/306
2020-02-04 21:47:56 +02:00
Aliaksandr Valialkin
7f3e3a6034 lib/logger: do not clutter -loggerFormat=json output with stack trace
This should improve json parsing
2020-02-04 21:37:25 +02:00
Aliaksandr Valialkin
bd4698bb7a lib/storage: do not deduplicate blocks with less than 32 samples during merge
This should improve deduplication accuracy for blocks with higher number of samples.
2020-02-04 18:41:54 +02:00
Aliaksandr Valialkin
36a1ac8360 app/vmselect: take into account the time the requests wait in the queue if -search.maxConcurrentRequests is exceeded
This will prevent from excess CPU usage for timed out queries.
2020-02-04 16:15:08 +02:00
Aliaksandr Valialkin
834051e5b2 app/vmselect: add a placeholder for /api/v1/metadata, which could be requested by Grafana
See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-metadata

VictoriaMetrics doesn't collect any metadata for metrics, so just return empty response.
2020-02-04 15:53:47 +02:00
Aliaksandr Valialkin
42864bb52f all: do not clash flag description with back-quoted flag types
See https://golang.org/pkg/flag/#PrintDefaults for more details.
2020-02-04 15:46:52 +02:00
Roman Khavronenko
1e023c6a72 Single dashboard (#300)
* improve description for `Pending datapoints` panel

* bump VM version requirement
2020-02-03 02:09:53 +02:00
Artem Navoiev
a47f292295 [vmalert] add vmalert.png.2 2020-02-02 12:17:19 +02:00
Artem Navoiev
354232b62b [vmalert] add vmalert.png 2020-02-02 12:16:05 +02:00
Artem Navoiev
28778be0cc [vmalert] initial 2020-02-02 12:14:09 +02:00
Aliaksandr Valialkin
90cf356ea1 app/vmselect/promql: adjust and and unless binary operator handling to be consistent with Prometheus 2020-01-31 18:52:38 +02:00
Aliaksandr Valialkin
c0b69ed06e deployment/docker: update Go builder from v1.13.6 to v1.13.7 2020-01-31 18:06:58 +02:00
Aliaksandr Valialkin
011a79da85 lib/fs: remove unused readerAt interface 2020-01-31 15:12:43 +02:00
Aliaksandr Valialkin
c3d86eef96 all: add -dedup.minScrapeInterval command-line flag for data de-duplication
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/86
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/278
2020-01-31 01:16:57 +02:00
Aliaksandr Valialkin
2152f6f0cd lib/storage: re-use indexSearch inside Storage.prefetchMetricNames 2020-01-31 01:16:53 +02:00
Aliaksandr Valialkin
d70ba7eb37 lib/fs: optimize small reads for ReaderAt.MustReadAt by reading from memory-mapped space instead of reading from file descriptor
This should improve performance when reading many small blocks.
2020-01-30 15:09:05 +02:00
Aliaksandr Valialkin
ad8af629bb all: rename ReadAt* to MustReadAt* in order to dont clash with io.ReaderAt 2020-01-30 15:08:58 +02:00
Aliaksandr Valialkin
d68546aa4a lib/storage: pre-fetch metricNames for the found metricIDs in Search.Init
This should speed up Search.NextMetricBlock loop for big number of found time series.
2020-01-30 15:08:51 +02:00
Aliaksandr Valialkin
5bb9ccb6bf lib/mergeset: properly update lastAccesstime in indexBlockCache entries
This is a follow-up for 6665f10e7b
2020-01-29 21:20:47 +02:00
Aliaksandr Valialkin
a462355b2f app/vmselect/promql: add keep_next_value(q) for filling gaps with the next non-empty value 2020-01-29 00:48:04 +02:00
Aliaksandr Valialkin
bdbb463756 docs/Single-server-VictoriaMetrics.md: fix heading size for Third-party contributions section 2020-01-28 23:13:35 +02:00
Aliaksandr Valialkin
371e86194d app/vminsert: moved -maxInsertRequestSize command-line flag out of lib/prompb in order to prevent its inclusion in vmselect and vmstorage apps 2020-01-28 23:02:08 +02:00
Aliaksandr Valialkin
adbbc4fa1a app/vmselect/promql: return expected results from increase() over the beginning of time series, which start from big value
Examples for such counters: OS-level counters for network or cpu stats.
2020-01-28 16:30:11 +02:00
Aliaksandr Valialkin
75ad47a43c app/victoria-metrics: check for error arg passed to filepath.Walk callback 2020-01-27 20:56:45 +02:00
Aliaksandr Valialkin
6320a19a8c app/victoria-metrics: remove integration build tag from tests
This simplifies testing with `go test ./app/victoria-metrics` without
the need to remember to pass `-tags=integration` to Go commands.
2020-01-27 20:25:28 +02:00
Aliaksandr Valialkin
7b26db5527 docs/Single-server-VictoriaMetrics.md: update Retention section 2020-01-27 18:44:21 +02:00
Alexander Danilov
1a3626bbe1 Add description for retention and how it works (#297) 2020-01-27 18:38:22 +02:00
Aliaksandr Valialkin
8074c10590 README.md: mention https://github.com/AnchorFree/tsdb-remote-write 2020-01-27 18:35:48 +02:00
Aliaksandr Valialkin
2392a359e1 app/vmselect/promql: fix panic on a single zero vmrange bucket in prometheus_buckets() function
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/296
2020-01-27 18:04:55 +02:00
Aliaksandr Valialkin
6caa9bb51b lib/logger: fix improperly set skipframes for all the logging functions
The bug has been introduced in the previous commit f6baee6efe
2020-01-26 18:34:27 +02:00
Aliaksandr Valialkin
f6baee6efe lib/httpserver: log the caller of httpserver.Errorf
Previously log message contained `httpserver.Errorf`, not it contains the caller of `httpserver.Errorf`, which is more useful.
2020-01-25 20:17:59 +02:00
Aliaksandr Valialkin
9df5b2d1c3 app/victoria-metrics: add -selfScrapeInterval flag for self-scraping /metrics page
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/30
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/180
2020-01-25 19:19:59 +02:00
Aliaksandr Valialkin
2a0a0ed14d lib/protoparser: add parser for Prometheus exposition text format
This parser will be used by vmagent
2020-01-24 20:11:02 +02:00
Aliaksandr Valialkin
6456c93dbb app/vminsert: move ingestion protocol parsers to lib/protoparser, so they could be re-used in the upcoming vmagent 2020-01-24 16:53:00 +02:00
Aliaksandr Valialkin
1efea246b7 docs/Articles.md: add a link to https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da 2020-01-22 19:08:35 +02:00
Aliaksandr Valialkin
680080887d all: consistently log durations in seconds with millisecond precision
This should improve logs readability
2020-01-22 18:28:27 +02:00
Aliaksandr Valialkin
3992984e10 vendor: make vendor-update 2020-01-22 18:08:39 +02:00
Aliaksandr Valialkin
9773022e50 app/vmselect: mention the original query and time range in error messages
This should simplify debugging invalid or heavy queries.
2020-01-22 17:36:36 +02:00
Aliaksandr Valialkin
f8954c7250 vendor: update github.com/klauspost/compress from v1.9.7 to v1.9.8
New version should have better gzip compression. See https://github.com/klauspost/compress#changelog
2020-01-22 16:50:15 +02:00
Aliaksandr Valialkin
0ef6f91410 docs: Mention Slack and Telegram channels for user questions 2020-01-22 16:50:14 +02:00
Aliaksandr Valialkin
efc7ad88ec app/vmselect: mention command-line flag, which could be used for adjusting query timeouts, in timeout errors 2020-01-22 15:50:48 +02:00
Aliaksandr Valialkin
ec9651e266 app/vmselect/prometheus: increase default value -maxExportDuration to 30 days, since 10 minutes beat users exporting bit amounts of data 2020-01-22 15:50:47 +02:00
Aliaksandr Valialkin
a8b2f82fc6 vendor: update github.com/VictoriaMetrics/fastcache from v1.5.5 to v1.5.7 2020-01-22 12:31:32 +02:00
Aliaksandr Valialkin
582dd01f42 app/vmselect/promql: add range_over_time(m[d]) function for calculating value range for m over d 2020-01-21 19:05:17 +02:00
Aliaksandr Valialkin
36973ee975 app/vmselect/promql: add label_match(q, label, regexp) and label_mismatch(q, label, regexp) functions for filtering out time series with labels matching the given regexp 2020-01-21 15:00:20 +02:00
Aliaksandr Valialkin
6665f10e7b lib/{mergeset,storage}: properly update lastAccessTime in index and data block cache entries 2020-01-20 14:59:47 +02:00
Aliaksandr Valialkin
04363d6511 README.md: mention that delete API shouldnt be used on a regular basis due to non-zero overhead 2020-01-20 13:28:36 +02:00
Aliaksandr Valialkin
c97ade4487 docs/FAQ.md: typo fix according to comment from https://www.reddit.com/message/messages/lezkmo 2020-01-18 18:05:13 +02:00
Aliaksandr Valialkin
970f0dfbf2 docs/CaseStudies.md: add links to COLOPL talk about VictoriaMetrics 2020-01-18 17:23:33 +02:00
Aliaksandr Valialkin
227cf53ef9 app/vminsert: increase default value for -insert.maxQueueDuration from 30s to 60s
This should help catching up with high ingestion rate after VictoriaMetrics restart.
2020-01-18 14:39:36 +02:00
Aliaksandr Valialkin
257e61195a lib/uint64set: add missing bucket32.b16his values 2020-01-18 14:26:04 +02:00
Aliaksandr Valialkin
4cc0c44b9e lib/uint64set: optimize Set.Union
This should improve performance for queries over big number of time series
2020-01-18 13:47:03 +02:00
Aliaksandr Valialkin
1b5f02e293 lib/uint64set: add benchmarks for Set.Union 2020-01-18 13:47:02 +02:00
Aliaksandr Valialkin
3748fb24b6 lib/storage: skip recovering timestamps order for lossless compression (PrecisionBits=64) 2020-01-18 00:09:33 +02:00
Aliaksandr Valialkin
c9472e4f3a all: use github.com/klauspost/compress/gzip instead of compress/gzip
`github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`.
This gives better gzip compression and decompression speeds.
2020-01-17 23:58:46 +02:00
Aliaksandr Valialkin
bc0f897fcb lib/uint64set: reduce memory allocations in Set.AppendTo 2020-01-17 22:33:09 +02:00
Aliaksandr Valialkin
f9289b804a lib/storage: reduce memory allocations when merging metricID sets 2020-01-17 22:10:44 +02:00
Aliaksandr Valialkin
0c8ad08578 lib/uint64set: typo fix in Set.Intersect 2020-01-17 18:10:58 +02:00
Aliaksandr Valialkin
cdcacaea6d app/vmselect/netstorage: make fmt 2020-01-17 17:47:21 +02:00
Aliaksandr Valialkin
7327adbc86 app/vmselect/netstorage: limit the maximum size for in-memory buffer for temporary blocks file
This should reduce memory usage on systems with more than 8GB RAM.
2020-01-17 16:28:21 +02:00
Aliaksandr Valialkin
9f027ec176 lib/uint64set: optimize Intersect, Subtract and Union functions
This should improve performance for queries over big number of time series.
2020-01-17 16:11:49 +02:00
Aliaksandr Valialkin
cd53f7d177 lib/uint64set: improve benchmark for Set.Intersect 2020-01-17 16:08:17 +02:00
Aliaksandr Valialkin
d0d258b314 app/vmselect: limit the default value for -search.maxConcurrentRequests, so it plays well on systems with more than 16 vCPUs
A single heavy request can saturate all the available CPUs, so let's limit the number of concurrent requests to lower value.
This will give more chances for executing insert path.
2020-01-17 15:43:54 +02:00
Aliaksandr Valialkin
d88725f133 app/{vminsert,vmselect}: improve error messages when VictoriaMetrics cannot handle too high number of concurrent inserts / selects 2020-01-17 13:24:37 +02:00
Aliaksandr Valialkin
8dbf430469 lib/uint64set: add benchmark for Set.Intersect 2020-01-17 00:31:07 +02:00
Aliaksandr Valialkin
9ef4d32a9a make vendor-update 2020-01-16 14:14:19 +02:00
Aliaksandr Valialkin
0d7505b00e all: mention command-line flags used for limiting the incoming request size in error messages
This should improve error logs usability.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/287
2020-01-16 13:03:30 +02:00
Aliaksandr Valialkin
2839f4688a app/vmselect/promql: fix panic on sum(aggr_over_time(...)) with incorrect number of args 2020-01-15 16:26:09 +02:00
Aliaksandr Valialkin
605d588ba6 lib/uint64set: reduce memory usage in Union, Intersect and Subtract methods
Iterate items with newly added Set.ForEach method instead of allocating `[]uint64`
slice for all the items before the iteration.
2020-01-15 12:12:49 +02:00
Aliaksandr Valialkin
7483deccca docs/FAQ.md: add bullet comparison with Cortex and Thanos 2020-01-15 10:47:40 +02:00
Aliaksandr Valialkin
893b62c682 lib/{mergeset,storage}: fix uint64 counters alignment for 32-bit architectures (GOARCH=386, GOARCH=arm) 2020-01-14 22:47:04 +02:00
Aliaksandr Valialkin
7830c10eb2 lib/{storage,mergeset}: gradually remove stale entries from block cache and index caches
This should reduce memory usage in the long run when old blocks and indexes
aren't accessed anymore.
2020-01-14 21:38:44 +02:00
Aliaksandr Valialkin
e4f1bfd221 deployment/docker: update Prometheus from v2.14.0 to v2.15.2 and Grafana from v6.5.0 to v6.5.2 2020-01-12 23:14:10 +02:00
Aliaksandr Valialkin
91ee1bce2e README.md: add a link to VictoriaMetrics subreddit - https://www.reddit.com/r/VictoriaMetrics/ 2020-01-12 00:06:20 +02:00
Aliaksandr Valialkin
8b14572f70 app/vmselect/promql: add hoeffding_bound_upper(phi, m[d]) and hoeffding_bound_lower(phi, m[d]) functions
These functions can be used for calculating Hoeffding bounds
for `m` over `d` time range and for the given `phi` in the range `[0..1]`.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/283
2020-01-11 14:46:23 +02:00
Aliaksandr Valialkin
8eaced8cae app/vmselect/promql: return continuous values for min_over_time and max_over_time when step is smaller than scrape_interval 2020-01-11 12:47:50 +02:00
Aliaksandr Valialkin
1585dab5a3 deployment/docker: switch Go builder from v1.13.5 to v1.13.6 2020-01-11 11:06:00 +02:00
Aliaksandr Valialkin
cd66d3fc43 README.md: mention about Prometheus->VictoriaMetrics exporter https://github.com/ryotarai/prometheus-tsdb-dump
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/93
2020-01-11 01:29:09 +02:00
Aliaksandr Valialkin
ea231f8167 app/victoria-metrics: adjust integration tests after the commit 99facd71cd6ac151d512cea1df73be91c10c7f83 2020-01-11 00:58:16 +02:00
Aliaksandr Valialkin
46bfdbe6cf app/vmselect/promql: do not take into account the previous point before time window in square brackets for min_over_time, max_over_time, rollup_first and rollup_last functions
This makes the behaviour for these functions similar to Prometheus when processing broken time series with irregular data points
like `gitlab_runner_jobs`. See https://gitlab.com/gitlab-org/gitlab-exporter/issues/50 for details.
2020-01-11 00:26:26 +02:00
Aliaksandr Valialkin
4f0a645f77 vendor: update github.com/valyala/fastjson from v1.4.2 to v1.4.5
This should fix parsing Inf values in `/api/v1/import`. The previous attempt to fix this in VictoriaMetrics v1.32.1 was unsuccessful.
2020-01-10 23:15:15 +02:00
Aliaksandr Valialkin
b829fe5e39 app/vmselect/promql: properly handle aggr(aggr_over_time(...)) 2020-01-10 21:57:18 +02:00
Aliaksandr Valialkin
164278151f app/vmselect/promql: add aggr_over_time(("aggr_func1", "aggr_func2", ...), m[d]) function
This function can be used for simultaneous calculating of multiple `aggr_func*` functions
that accept range vector. For example, `aggr_over_time(("min_over_time", "max_over_time"), m[d])`
would calculate `min_over_time` and `max_over_time` for `m[d]`.
2020-01-10 21:18:06 +02:00
Aliaksandr Valialkin
c4632faa9d app/vmselect/promql: add tmin_over_time(m[d]) and tmax_over_time(m[d]) functions
These functions return timestamp in seconds for the minimum and maximum value for `m` over time range `d`
2020-01-10 19:39:28 +02:00
Aliaksandr Valialkin
a768198814 docs: fix spelling typos 2020-01-09 23:42:55 +02:00
Roman Khavronenko
57f4875024 fix spellcheck issues (#285) 2020-01-09 23:41:52 +02:00
Aliaksandr Valialkin
b8038a14e7 lib/backup/s3remote: check whether the file exists before deleting it
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/284
2020-01-09 23:20:31 +02:00
Aliaksandr Valialkin
f358fb72d1 app/{vmbackup,vmrestore}: add backup complete file to backup when it is complete and check for this file before restoring from backup
This should prevent from restoring from incomplete backups.

Add `-skipBackupCompleteCheck` command-line flag to `vmrestore` in order to be able restoring from old backups without `backup complete` file.
2020-01-09 15:35:38 +02:00
Aliaksandr Valialkin
1c436b2723 vendor: update github.com/valyala/fastjson from v1.4.1 to v1.4.2
This fixes parsing of `inf` and `nan` values in json lines passed to `/api/v1/import`
2020-01-08 20:47:21 +02:00
Aliaksandr Valialkin
a973df6d79 README.md: remove height="200px" from logo image, since it is improperly displayed on smartphones 2020-01-08 20:29:11 +02:00
Aliaksandr Valialkin
d4132a6915 docs: typo fix 2020-01-08 14:45:27 +02:00
Aliaksandr Valialkin
d5aeda0e1a app/vmselect/promql: skip rate calculation for the first point on time series 2020-01-08 14:42:53 +02:00
Aliaksandr Valialkin
bb71b6d47d docs: add references to Remote Write Storage Wars
Also mention than VictoriaMetrics uses less RAM than Thanos Store Gateway - see https://github.com/thanos-io/thanos/issues/448 for details.
2020-01-04 23:57:35 +02:00
Aliaksandr Valialkin
fc71602039 lib/storage: limit maxRaRowsPerPartition by 500K for any number of rawRowsShardsPerPartition
This should reduce write amplification for high ingestion rate on multi-CPU systems
2020-01-04 23:57:31 +02:00
Aliaksandr Valialkin
c60fdbed30 docs/CaseStudies.md: add link to Remote Write Storage Wars talk from Adidas at PromCon 2019 2020-01-04 16:51:45 +02:00
Aliaksandr Valialkin
d410c78c7e app/vmselect/promql: fix calculations for histogram_share 2020-01-04 14:44:48 +02:00
Aliaksandr Valialkin
66f3d1dac8 README.md: update Alerting section 2020-01-04 13:55:09 +02:00
Aliaksandr Valialkin
d9c4ac9978 lib/metricsql: export IsRollupFunc and IsTransformFunc, since they can be used by package users 2020-01-04 13:25:05 +02:00
Aliaksandr Valialkin
4567a59fa0 LICENSE: update year 2020-01-04 13:21:04 +02:00
Aliaksandr Valialkin
d64699bb9f app/vmselect/promql: add missing MetricName into netstorage.Result in tests 2020-01-04 12:52:39 +02:00
Aliaksandr Valialkin
f409f2d050 app/vmselect/promql: add histogram_share(le, buckets) function 2020-01-04 12:45:55 +02:00
Aliaksandr Valialkin
b1ded7cf9a app/vmselect/promql: add absent_over_time(m[d]) func similar to the function in Prometheus 2.16
See https://github.com/prometheus/prometheus/issues/2882
2020-01-04 12:45:07 +02:00
Aliaksandr Valialkin
a8360d04c0 app/vmselect/promql: add histogram_over_time(m[d]) rollup function 2020-01-04 12:44:56 +02:00
Aliaksandr Valialkin
3e09d38f29 app/vmselect/promql: fix results caching for multi-arg rollup functions such as quantile_over_time
Previosly only a single arg was taken into account, so caching didn't work properly for multi-arg rollup funcs.
2020-01-03 20:49:08 +02:00
Aliaksandr Valialkin
a774120460 app/vmselect/promql: use scrapeInterval instead of window in denominator when calculating rate for the first point on the time series
This should provide better estimation for `rate` in the beginning of time series.
2020-01-03 19:01:50 +02:00
Aliaksandr Valialkin
695682232f lib/uint64set: reduce memory usage when storing big number of sparse metric_id values 2020-01-03 18:16:44 +02:00
Aliaksandr Valialkin
b5645ccbdf app/vmselect/promql: increase the estimated number of time series returned by aggr() by (something) from 100 to 1K, since 100 may result in OOM for high number of time series 2020-01-03 01:02:21 +02:00
Aliaksandr Valialkin
cb3a342882 app/vmselect/promql: add share_le_over_time and share_gt_over_time functions for SLI and SLO calculations 2020-01-03 00:41:16 +02:00
Aliaksandr Valialkin
0038365206 docs: refer to standalone MetricsQL package 2020-01-02 23:43:35 +02:00
Aliaksandr Valialkin
61c9d320ed vendor: update github.com/VictoriaMetrics/fastcache from v1.5.4 to v1.5.5 2019-12-29 18:17:49 +02:00
Aliaksandr Valialkin
a21d786d3c lib/metricsql: add example for ExpandWithExprs 2019-12-26 21:32:11 +02:00
640 changed files with 71842 additions and 12942 deletions

View File

@@ -26,5 +26,16 @@ $ ./victoria-metrics-prod --version
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
```
**Used command-line flags**
Command-line flags are listed as `flag{name="httpListenAddr", value=":443"} 1` lines at `/metrics` page.
See the following docs for details:
* [monitoring for single-node VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#monitoring)
* [montioring for VictoriaMetrics cluster](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#monitoring)
**Additional context**
Add any other context about the problem here such as error logs, `/metrics` output, screenshots from [the official Grafana dashboard for VictoriaMetrics](https://grafana.com/dashboards/10229).
Add any other context about the problem here such as error logs from VictoriaMetrics and Prometheus,
`/metrics` output, screenshots from the official Grafana dashboards for VictoriaMetrics:
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)

1
.gitignore vendored
View File

@@ -8,6 +8,7 @@
*.swp
/gocache-for-docker
/victoria-metrics-data
/vmagent-remotewrite-data
/vmstorage-data
/vmselect-cache
/package/temp-deb-*

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS
Copyright 2019 VictoriaMetrics, Inc.
Copyright 2019-2020 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -11,7 +11,10 @@ endif
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date -u +'%Y%m%d-%H%M%S')-$(BUILDINFO_TAG)'
all: \
victoria-metrics-prod
victoria-metrics-prod \
vmagent-prod \
vmbackup-prod \
vmrestore-prod
include app/*/Makefile
include deployment/*/Makefile
@@ -21,15 +24,18 @@ clean:
publish: \
publish-victoria-metrics \
publish-vmagent \
publish-vmbackup \
publish-vmrestore
package: \
package-victoria-metrics \
package-vmagent \
package-vmbackup \
package-vmrestore
vmutils: \
vmagent \
vmbackup \
vmrestore
@@ -42,9 +48,10 @@ release-victoria-metrics: victoria-metrics-prod
sha256sum victoria-metrics-$(PKG_TAG).tar.gz > victoria-metrics-$(PKG_TAG)_checksums.txt
release-vmutils: \
vmagent-prod \
vmbackup-prod \
vmrestore-prod
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmbackup-prod vmrestore-prod && \
cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmbackup-prod vmrestore-prod && \
sha256sum vmutils-$(PKG_TAG).tar.gz > vmutils-$(PKG_TAG)_checksums.txt
pprof-cpu:
@@ -70,8 +77,10 @@ errcheck: install-errcheck
errcheck -exclude=errcheck_excludes.txt ./app/vminsert/...
errcheck -exclude=errcheck_excludes.txt ./app/vmselect/...
errcheck -exclude=errcheck_excludes.txt ./app/vmstorage/...
errcheck -exclude=errcheck_excludes.txt ./app/vmagent/...
errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
install-errcheck:
which errcheck || GO111MODULE=off go get -u github.com/kisielk/errcheck
@@ -79,16 +88,16 @@ install-errcheck:
check-all: fmt vet lint errcheck golangci-lint
test:
GO111MODULE=on go test -tags=integration -mod=vendor ./lib/... ./app/...
GO111MODULE=on go test -mod=vendor ./lib/... ./app/...
test-pure:
GO111MODULE=on CGO_ENABLED=0 go test -tags=integration -mod=vendor ./lib/... ./app/...
GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor ./lib/... ./app/...
test-full:
GO111MODULE=on go test -tags=integration -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GO111MODULE=on go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386:
GO111MODULE=on GOARCH=386 go test -tags=integration -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GO111MODULE=on GOARCH=386 go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
benchmark:
GO111MODULE=on go test -mod=vendor -bench=. ./lib/...

424
README.md
View File

@@ -6,7 +6,7 @@
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/workflows/main/badge.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg)](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
<img alt="Victoria Metrics" src="logo.png" height="200px">
![Victoria Metrics logo](logo.png "Victoria Metrics")
## VictoriaMetrics
@@ -17,18 +17,18 @@ in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just downl
Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
## Case studies and talks
## Case studies
* [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
* [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
* [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
* [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
## Prominent features
* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL) query language, which is inspired by PromQL.
VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) query language, which is inspired by PromQL.
* Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
@@ -38,9 +38,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
may be crammed into limited storage comparing to TimescaleDB.
* Optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
and [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
[comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
* Easy operation:
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
* All the configuration is done via explicit command-line flags with reasonable defaults.
@@ -49,9 +51,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
to S3 or GCS with [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md) / [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md).
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
* Storage is protected from corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* Supports metrics' ingestion and [backfilling](#backfilling) via the following protocols:
* Supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
* [Metrics from Prometheus exporters](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format)
such as [node_exporter](https://github.com/prometheus/node_exporter). See [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter) for details.
* [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
* [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
* [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) over HTTP, TCP and UDP.
* [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
if `-graphiteListenAddr` is set.
* [OpenTSDB put message](#sending-data-via-telnet-put-protocol) if `-opentsdbListenAddr` is set.
@@ -60,60 +64,61 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various Enterprise workloads.
* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
## Operation
### Table of contents
- [How to start VictoriaMetrics](#how-to-start-victoriametrics)
- [Prometheus setup](#prometheus-setup)
- [Grafana setup](#grafana-setup)
- [How to upgrade VictoriaMetrics?](#how-to-upgrade-victoriametrics)
- [How to apply new config to VictoriaMetrics?](#how-to-apply-new-config-to-victoriametrics)
- [How to send data from InfluxDB-compatible agents such as Telegraf?](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
- [How to send data from Graphite-compatible agents such as StatsD?](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
- [Querying Graphite data](#querying-graphite-data)
- [How to send data from OpenTSDB-compatible agents?](#how-to-send-data-from-opentsdb-compatible-agents)
- [Prometheus querying API usage](#prometheus-querying-api-usage)
- [How to build from sources](#how-to-build-from-sources)
- [Development build](#development-build)
- [Production build](#production-build)
- [ARM build](#arm-build)
- [Pure Go build (CGO_ENABLED=0)](#pure-go-build-cgo_enabled0)
- [Building docker images](#building-docker-images)
- [Start with docker-compose](#start-with-docker-compose)
- [Setting up service](#setting-up-service)
- [Third-party contributions](#third-party-contributions)
- [How to work with snapshots?](#how-to-work-with-snapshots)
- [How to delete time series?](#how-to-delete-time-series)
- [How to export time series?](#how-to-export-time-series)
- [How to import time series data?](#how-to-import-time-series-data)
- [Federation](#federation)
- [Capacity planning](#capacity-planning)
- [High availability](#high-availability)
- [Multiple retentions](#multiple-retentions)
- [Downsampling](#downsampling)
- [Multi-tenancy](#multi-tenancy)
- [Scalability and cluster version](#scalability-and-cluster-version)
- [Alerting](#alerting)
- [Security](#security)
- [Tuning](#tuning)
- [Monitoring](#monitoring)
- [Troubleshooting](#troubleshooting)
- [Backfilling](#backfilling)
- [Profiling](#profiling)
- [Integrations](#integrations)
- [Roadmap](#roadmap)
- [Contacts](#contacts)
- [Community and contributions](#community-and-contributions)
- [Reporting bugs](#reporting-bugs)
- [Victoria Metrics Logo](#victoria-metrics-logo)
- [Logo Usage Guidelines](#logo-usage-guidelines)
- [Font used:](#font-used)
- [Color Palette:](#color-palette)
- [We kindly ask:](#we-kindly-ask)
* [How to start VictoriaMetrics](#how-to-start-victoriametrics)
* [Environment variables](#environment-variables)
* [Prometheus setup](#prometheus-setup)
* [Grafana setup](#grafana-setup)
* [How to upgrade VictoriaMetrics](#how-to-upgrade-victoriametrics)
* [How to apply new config to VictoriaMetrics](#how-to-apply-new-config-to-victoriametrics)
* [How to scrape Prometheus exporters such as node_exporter](#how-to-scrape-prometheus-exporters-such-as-node-exporter)
* [How to send data from InfluxDB-compatible agents such as Telegraf](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
* [How to send data from Graphite-compatible agents such as StatsD](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
* [Querying Graphite data](#querying-graphite-data)
* [How to send data from OpenTSDB-compatible agents](#how-to-send-data-from-opentsdb-compatible-agents)
* [Prometheus querying API usage](#prometheus-querying-api-usage)
* [How to build from sources](#how-to-build-from-sources)
* [Development build](#development-build)
* [Production build](#production-build)
* [ARM build](#arm-build)
* [Pure Go build (CGO_ENABLED=0)](#pure-go-build-cgo_enabled0)
* [Building docker images](#building-docker-images)
* [Start with docker-compose](#start-with-docker-compose)
* [Setting up service](#setting-up-service)
* [How to work with snapshots](#how-to-work-with-snapshots)
* [How to delete time series](#how-to-delete-time-series)
* [How to export time series](#how-to-export-time-series)
* [How to import time series data](#how-to-import-time-series-data)
* [Federation](#federation)
* [Capacity planning](#capacity-planning)
* [High availability](#high-availability)
* [Deduplication](#deduplication)
* [Retention](#retention)
* [Multiple retentions](#multiple-retentions)
* [Downsampling](#downsampling)
* [Multi-tenancy](#multi-tenancy)
* [Scalability and cluster version](#scalability-and-cluster-version)
* [Alerting](#alerting)
* [Security](#security)
* [Tuning](#tuning)
* [Monitoring](#monitoring)
* [Troubleshooting](#troubleshooting)
* [Backfilling](#backfilling)
* [Profiling](#profiling)
* [Integrations](#integrations)
* [Roadmap](#roadmap)
* [Contacts](#contacts)
* [Community and contributions](#community-and-contributions)
* [Third-party contributions](#third-party-contributions)
* [Reporting bugs](#reporting-bugs)
* [Victoria Metrics Logo](#victoria-metrics-logo)
* [Logo Usage Guidelines](#logo-usage-guidelines)
* [Font used](#font-used)
* [Color Palette](#color-palette)
* [We kindly ask](#we-kindly-ask)
### How to start VictoriaMetrics
@@ -125,19 +130,23 @@ The following command-line flags are used the most:
* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in current working directory.
* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted. Default period is 1 month.
* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
* `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default, it is disabled.
* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data over telnet protocol. By default, it is disabled.
* `-opentsdbHTTPListenAddr` - TCP address to listen to for HTTP OpenTSDB data over `/api/put`. By default, it is disabled.
Pass `-help` to see all the available flags with description and default values.
It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.
#### Environment variables
Each flag values can be set thru environment variables by following these rules:
* The `-envflag.enable` flag must be set
* Each `.` in flag names must be substituted by `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`)
* For repeating flags, an alternative syntax can be used by joining the different values into one using `,` as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`)
### Prometheus setup
Prometheus must be configured with [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
in order to send data to VictoriaMetrics. Add the following lines
Prometheus must be configured with [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
in order to send data to VictoriaMetrics. Add the following lines
to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):
```yml
@@ -148,7 +157,7 @@ remote_write:
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
Then apply the new config via the following command:
```
```bash
kill -HUP `pidof prometheus`
```
@@ -171,7 +180,8 @@ across Prometheus instances, so those time series may be filtered and grouped by
For highly loaded Prometheus instances (400k+ samples per second)
the following tuning may be applied:
```
```yaml
remote_write:
- url: http://<victoriametrics-addr>:8428/api/v1/write
queue_config:
@@ -182,29 +192,27 @@ remote_write:
Using remote write increases memory usage for Prometheus up to ~25%
and depends on the shape of data. If you are experiencing issues with
too high memory consumption try to lower `max_samples_per_send`
too high memory consumption try to lower `max_samples_per_send`
and `capacity` params (keep in mind that these two params are tightly connected).
Read more about tuning remote write for Prometheus [here](https://prometheus.io/docs/practices/remote_write).
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
since the previous versions may have issues with `remote_write`.
### Grafana setup
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
```
```url
http://<victoriametrics-addr>:8428
```
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
Then build graphs with the created datasource using [Prometheus query language](https://prometheus.io/docs/prometheus/latest/querying/basics/).
VictoriaMetrics supports native PromQL and [extends it with useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
VictoriaMetrics supports native PromQL and [extends it with useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL).
### How to upgrade VictoriaMetrics?
### How to upgrade VictoriaMetrics
It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
say otherwise. It is recommended performing regular upgrades to the latest version,
@@ -219,8 +227,7 @@ Follow the following steps during the upgrade:
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
### How to apply new config to VictoriaMetrics?
### How to apply new config to VictoriaMetrics
VictoriaMetrics must be restarted for applying new config:
@@ -231,20 +238,35 @@ VictoriaMetrics must be restarted for applying new config:
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
### How to scrape Prometheus exporters such as [node-exporter](https://github.com/prometheus/node_exporter)
### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)?
VictoriaMetrics can be used as drop-in replacement for Prometheus for scraping targets configured in `prometheus.yml` config file according to [the specification](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file).
Just set `-promscrape.config` command-line flag to the path to `prometheus.yml` config - and VictoriaMetrics should start scraping the configured targets.
Currently the following [scrape_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) types are supported:
* [static_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config)
* [file_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config)
In the future other `*_sd_config` types will be supported.
See also [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md), which can be used as drop-in replacement for Prometheus.
### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)
Just use `http://<victoriametric-addr>:8428` url instead of InfluxDB url in agents' configs.
For instance, put the following lines into `Telegraf` config, so it sends data to VictoriaMetrics instead of InfluxDB:
```
```toml
[[outputs.influxdb]]
urls = ["http://<victoriametrics-addr>:8428"]
```
Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag.
VictoriaMetrics maps Influx data using the following rules:
* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
unless `db` tag exists in the Influx line.
* Field names are mapped to time series names prefixed with `{measurement}{separator}` value,
@@ -255,13 +277,13 @@ VictoriaMetrics maps Influx data using the following rules:
For example, the following Influx line:
```
```raw
foo,tag1=value1,tag2=value2 field1=12,field2=40
```
is converted into the following Prometheus data points:
```
```raw
foo_field1{tag1="value1", tag2="value2"} 12
foo_field2{tag1="value1", tag2="value2"} 40
```
@@ -269,20 +291,20 @@ foo_field2{tag1="value1", tag2="value2"} 40
Example for writing data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
to local VictoriaMetrics using `curl`:
```
```bash
curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'
```
An arbitrary number of lines delimited by '\n' may be sent in a single request.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__=~"measurement_.*"}'
```
The `/api/v1/export` endpoint should return the following response:
```
```jsonl
{"metric":{"__name__":"measurement_field1","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560272508147]}
{"metric":{"__name__":"measurement_field2","tag1":"value1","tag2":"value2"},"values":[1.23],"timestamps":[1560272508147]}
```
@@ -290,23 +312,21 @@ The `/api/v1/export` endpoint should return the following response:
Note that Influx line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
while VictoriaMetrics stores them with *milliseconds* precision.
### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)?
### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)
1) Enable Graphite receiver in VictoriaMetrics by setting `-graphiteListenAddr` command line flag. For instance,
the following command will enable Graphite receiver in VictoriaMetrics on TCP and UDP port `2003`:
```
```bash
/path/to/victoria-metrics-prod -graphiteListenAddr=:2003
```
2) Use the configured address in Graphite-compatible agents. For instance, set `graphiteHost`
to the VictoriaMetrics host in `StatsD` configs.
Example for writing data with Graphite plaintext protocol to local VictoriaMetrics using `nc`:
```
```bash
echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003
```
@@ -314,26 +334,23 @@ VictoriaMetrics sets the current time if the timestamp is omitted.
An arbitrary number of lines delimited by `\n` may be sent in one go.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match=foo.bar.baz'
```
The `/api/v1/export` endpoint should return the following response:
```
```bash
{"metric":{"__name__":"foo.bar.baz","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560277406000]}
```
### Querying Graphite data
Data sent to VictoriaMetrics via `Graphite plaintext protocol` may be read either via
[Prometheus querying API](#prometheus-querying-api-usage)
or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
### How to send data from OpenTSDB-compatible agents?
### How to send data from OpenTSDB-compatible agents
VictoriaMetrics supports [telnet put protocol](http://opentsdb.net/docs/build/html/api_telnet/put.html)
and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) for ingesting OpenTSDB data.
@@ -343,39 +360,37 @@ and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.ht
1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance,
the following command enables OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
```
```bash
/path/to/victoria-metrics-prod -opentsdbListenAddr=:4242
```
2) Send data to the given address from OpenTSDB-compatible agents.
Example for writing data with OpenTSDB protocol to local VictoriaMetrics using `nc`:
```
```bash
echo "put foo.bar.baz `date +%s` 123 tag1=value1 tag2=value2" | nc -N localhost 4242
```
An arbitrary number of lines delimited by `\n` may be sent in one go.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match=foo.bar.baz'
```
The `/api/v1/export` endpoint should return the following response:
```
```bash
{"metric":{"__name__":"foo.bar.baz","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560277292000]}
```
#### Sending OpenTSDB data via HTTP `/api/put` requests
1) Enable HTTP server for OpenTSDB `/api/put` requests by setting `-opentsdbHTTPListenAddr` command line flag. For instance,
the following command enables OpenTSDB HTTP server on port `4242`:
```
```bash
/path/to/victoria-metrics-prod -opentsdbHTTPListenAddr=:4242
```
@@ -383,31 +398,30 @@ the following command enables OpenTSDB HTTP server on port `4242`:
Example for writing a single data point:
```
```bash
curl -H 'Content-Type: application/json' -d '{"metric":"x.y.z","value":45.34,"tags":{"t1":"v1","t2":"v2"}}' http://localhost:4242/api/put
```
Example for writing multiple data points in a single request:
```
```bash
curl -H 'Content-Type: application/json' -d '[{"metric":"foo","value":45.34},{"metric":"bar","value":43}]' http://localhost:4242/api/put
```
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match[]=x.y.z' -d 'match[]=foo' -d 'match[]=bar'
```
The `/api/v1/export` endpoint should return the following response:
```
```bash
{"metric":{"__name__":"foo"},"values":[45.34],"timestamps":[1566464846000]}
{"metric":{"__name__":"bar"},"values":[43],"timestamps":[1566464846000]}
{"metric":{"__name__":"x.y.z","t1":"v1","t2":"v2"},"values":[45.34],"timestamps":[1566464763000]}
```
### Prometheus querying API usage
VictoriaMetrics supports the following handlers from [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/):
@@ -432,7 +446,6 @@ Additionally VictoriaMetrics provides the following handlers:
so it can be slow if the database contains tens of millions of time series.
* `/api/v1/labels/count` - it returns a list of `label: values_count` entries. It can be used for determining labels with the maximum number of values.
### How to build from sources
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
@@ -440,7 +453,6 @@ We recommend using either [binary releases](https://github.com/VictoriaMetrics/V
from sources. Building from sources is reasonable when developing additional features specific
to your needs.
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
@@ -485,32 +497,24 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.
### Start with docker-compose
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
helps to spin up VictoriaMetrics, Prometheus and Grafana with one command.
More details may be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#folder-contains-basic-images-and-tools-for-building-and-running-victoria-metrics-in-docker).
### Setting up service
Read [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) on how to set up VictoriaMetrics as a service in your OS.
### Third-party contributions
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
### How to work with snapshots?
### How to work with snapshots
VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
for all the data stored under `-storageDataPath` directory.
Navigate to `http://<victoriametrics-addr>:8428/snapshot/create` in order to create an instant snapshot.
The page will return the following JSON response:
```
```json
{"status":"ok","snapshot":"<snapshot-name>"}
```
@@ -526,13 +530,13 @@ to delete `<snapshot-name>` snapshot.
Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete_all` in order to delete all the snapshots.
Steps for restoring from a snapshot:
1. Stop VictoriaMetrics with `kill -INT`.
2. Restore snapshot contents from backup with [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md)
to the directory pointed by `-storageDataPath`.
3. Start VictoriaMetrics.
### How to delete time series?
### How to delete time series
Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
@@ -542,8 +546,21 @@ the deleted time series isn't freed instantly - it is freed during subsequent me
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
before actually deleting the metrics.
The delete API is intended mainly for the following cases:
### How to export time series?
* One-off deleting of accidentally written invalid (or undesired) time series.
* One-off deleting of user data due to [GDPR](https://en.wikipedia.org/wiki/General_Data_Protection_Regulation).
It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
* Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
See [this article](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts) for details.
* Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
time series occupy disk space until the next merge operation, which can never occur.
It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
### How to export time series
Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
@@ -551,7 +568,7 @@ for metrics to export. Use `{__name__!=""}` selector for fetching all the time s
The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
Each JSON line would contain data for a single time series. An example output:
```
```jsonl
{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
```
@@ -562,7 +579,7 @@ unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) val
Pass `Accept-Encoding: gzip` HTTP header in the request to `/api/v1/export` in order to reduce network bandwidth during exporing big amounts
of time series data. This enables gzip compression for the exported data. Example for exporting gzipped data:
```
```bash
curl -H 'Accept-Encoding: gzip' http://localhost:8428/api/v1/export -d 'match[]={__name__!=""}' > data.jsonl.gz
```
@@ -570,8 +587,7 @@ The maximum duration for each request to `/api/v1/export` is limited by `-search
Exported data can be imported via POST'ing it to [/api/v1/import](#how-to-import-time-series-data).
### How to import time series data?
### How to import time series data
Time series data can be imported via any supported ingestion protocol:
@@ -584,7 +600,7 @@ Time series data can be imported via any supported ingestion protocol:
The most efficient protocol for importing data into VictoriaMetrics is `/api/v1/import`. Example for importing data obtained via `/api/v1/export`:
```
```bash
# Export the data from <source-victoriametrics>:
curl http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl
@@ -594,7 +610,7 @@ curl -X POST http://destination-victoriametrics:8428/api/v1/import -T exported_d
Pass `Content-Encoding: gzip` HTTP request header to `/api/v1/import` for importing gzipped data:
```
```bash
# Export gzipped data from <source-victoriametrics>:
curl -H 'Accept-Encoding: gzip' http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl.gz
@@ -605,7 +621,6 @@ curl -X POST -H 'Content-Encoding: gzip' http://destination-victoriametrics:8428
Each request to `/api/v1/import` can load up to a single vCPU core on VictoriaMetrics. Import speed can be improved by splitting the original file into smaller parts
and importing them concurrently. Note that the original file must be split on newlines.
### Federation
VictoriaMetrics exports [Prometheus-compatible federation data](https://prometheus.io/docs/prometheus/latest/federation/)
@@ -617,7 +632,6 @@ on the interval `[now - max_lookback ... now]` is scraped for each time series.
For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation
with scrape intervals exceeding `5m`.
### Capacity planning
A rough estimation of the required resources for ingestion path:
@@ -625,7 +639,7 @@ A rough estimation of the required resources for ingestion path:
* RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series.
Time series is considered active if new data points have been added to it recently or if it has been recently queried.
The number of active time series may be obtained from `vm_cache_entries{type="storage/hour_metric_ids"}` metric
exproted on the `/metrics` page.
exported on the `/metrics` page.
VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited by `-memory.allowedPercent` flag.
* CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
@@ -645,7 +659,6 @@ A rough estimation of the required resources for ingestion path:
The actual ingress bandwidth usage depends on the average number of labels per ingested metric and the average size
of label values. The higher number of per-metric labels and longer label values mean the higher ingress bandwidth.
The required resources for query path:
* RAM size: depends on the number of time series to scan in each query and the `step`
@@ -657,7 +670,6 @@ The required resources for query path:
* Network usage: depends on the frequency and the type of incoming requests. Typical Grafana dashboards usually
require negligible network bandwidth.
### High availability
1) Install multiple VictoriaMetrics instances in distinct datacenters (availability zones).
@@ -676,7 +688,7 @@ remote_write:
3) Apply the updated config:
```
```bash
kill -HUP `pidof prometheus`
```
@@ -684,10 +696,29 @@ kill -HUP `pidof prometheus`
5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
6) Set up Prometheus datasource in Grafana that points to Promxy.
If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
with the enabled de-duplication. See [this section](#deduplication) for details.
### Deduplication
VictoriaMetrics de-duplicates data points if `-dedup.minScrapeInterval` command-line flag
is set to positive duration. For example, `-dedup.minScrapeInterval=60s` would de-duplicate data points
on the same time series if they are located closer than 60s to each other.
The de-duplication reduces disk space usage if multiple identically configured Prometheus instances in HA pair
write data to the same VictoriaMetrics instance. Note that these Prometheus instances must have identical
`external_labels` section in their configs, so they write data to the same time series.
### Retention
Retention is configured with `-retentionPeriod` command-line flag. For instance, `-retentionPeriod=3` means
that the data will be stored for 3 months and then deleted.
Data is split in per-month subdirectories inside `<-storageDataPath>/data/small` and `<-storageDataPath>/data/big` folders.
Directories for months outside the configured retention are deleted on the first day of new month.
In order to keep data according to `-retentionPeriod` max disk space usage is going to be `-retentionPeriod` + 1 month.
For example if `-retentionPeriod` is set to 1, data for January is deleted on March 1st.
### Multiple retentions
@@ -697,24 +728,22 @@ Just start multiple VictoriaMetrics instances with distinct values for the follo
* `-storageDataPath`, so the data for each retention period is saved in a separate directory
* `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention
### Downsampling
There is no downsampling support at the moment, but:
- VictoriaMetrics is optimized for querying big amounts of raw data. See benchmark results for heavy queries
* VictoriaMetrics is optimized for querying big amounts of raw data. See benchmark results for heavy queries
in [this article](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
- VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
* VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
for details.
These properties reduce the need in downsampling. We plan to implement downsampling in the future.
These properties reduce the need of downsampling. We plan to implement downsampling in the future.
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details.
### Multi-tenancy
Single-node VictoriaMetrics doesn't support multi-tenancy. Use [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) instead.
### Scalability and cluster version
Though single-node VictoriaMetrics cannot scale to multiple nodes, it is optimized for resource usage - storage size / bandwidth / IOPS, RAM, CPU.
@@ -725,12 +754,13 @@ So try single-node VictoriaMetrics at first and then [switch to cluster version]
horizontally scalable long-term remote storage for really large Prometheus deployments.
[Contact us](mailto:info@victoriametrics.com) for paid support.
### Alerting
VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions must be performed either
on [Prometheus side](https://prometheus.io/docs/alerting/overview/) or on [Grafana side](https://grafana.com/docs/alerting/rules/).
VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions can be performed at the following places:
* At Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
* At Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
* At Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).
### Security
@@ -746,28 +776,29 @@ Consider setting the following command-line flags:
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
### Tuning
* There is no need in VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
* There is no need for VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
which are automatically adjusted for the available CPU and RAM resources.
* There is no need in Operating System tuning since VictoriaMetrics is optimized for default OS settings.
* There is no need for Operating System tuning since VictoriaMetrics is optimized for default OS settings.
The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a),
so Prometheus instances could establish more connections to VictoriaMetrics.
* The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs),
since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
If you plan storing more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
If you plan to store more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
then the following options are recommended to pass to `mkfs.ext4`:
```
```bash
mkfs.ext4 ... -O 64bit,huge_file,extent -T huge
```
### Monitoring
VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page.
Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics.
VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
These metrics may be collected via Prometheus by adding the corresponding scrape config to it.
Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.
There are officials Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229) and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176).
The most interesting metrics are:
@@ -783,11 +814,10 @@ The most interesting metrics are:
where `system_memory` is the amount of system memory and `-memory.allowedPercent` is the corresponding flag value.
* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
### Troubleshooting
* It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need
in tweaking these flag values arises.
of tweaking these flag values arises.
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
then it is likely you have too many active time series for the current amount of RAM.
@@ -801,31 +831,42 @@ The most interesting metrics are:
has at least 20% of free space comparing to disk size.
* If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors,
then just remove directoreis with broken parts. This will recover VictoriaMetrics at the cost
then just remove directories with broken parts. This will recover VictoriaMetrics at the cost
of data loss stored in the broken parts. In the future, `vmrecover` tool will be created
for automatic recovering from such errors.
* If you see gaps on the graphs, try resetting the cache by sending request to `/internal/resetRollupResultCache`.
If this removes gaps on the graphs, then it is likely data with timestamps older than `-search.cacheTimestampOffset`
is ingested into VictoriaMetrics. Make sure that data sources have synchronized time with VictoriaMetrics.
### Backfilling
VictoriaMetrics accepts historical data in arbitrary order of time.
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
historical data with timestamps from the past, since the cache assumes that the data is written with
the current timestamps. Query cache can be enabled after the backfilling is complete.
An alternative solution is to query `/internal/resetRollupResultCache` url after backfilling is complete. This will reset
the query cache, which could contain incomplete data cached during the backfilling.
Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
for data with timestamps close to the current time.
### Profiling
VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
- Memory profile. It can be collected with the following command:
```
* Memory profile. It can be collected with the following command:
```bash
curl -s http://<victoria-metrics-host>:8428/debug/pprof/heap > mem.pprof
```
- CPU profile. It can be collected with the following command:
```
* CPU profile. It can be collected with the following command:
```bash
curl -s http://<victoria-metrics-host>:8428/debug/pprof/profile > cpu.pprof
```
@@ -833,7 +874,6 @@ The command for collecting CPU profile waits for 30 seconds before returning.
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
## Integrations
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
@@ -842,60 +882,61 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
See [this example](/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
* [Ansible role for installing VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
## Roadmap
- [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
- [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
- [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
- [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
- [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment any item or add own one.
* [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
* [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
* [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
* [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
* [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.
## Contacts
Contact us with any questions regarding VictoriaMetrics at [info@victoriametrics.com](mailto:info@victoriametrics.com).
## Community and contributions
Feel free asking any questions regarding VictoriaMetrics:
- [slack](http://slack.victoriametrics.com/)
- [telegram-en](https://t.me/VictoriaMetrics_en)
- [telegram-ru](https://t.me/VictoriaMetrics_ru1)
- [google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
* [slack](http://slack.victoriametrics.com/)
* [reddit](https://www.reddit.com/r/VictoriaMetrics/)
* [telegram-en](https://t.me/VictoriaMetrics_en)
* [telegram-ru](https://t.me/VictoriaMetrics_ru1)
* [google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
If you like VictoriaMetrics and want to contribute, then we need the following:
- Filing issues and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
- Spreading a word about VictoriaMetrics: conference talks, articles, comments, experience sharing with colleagues.
- Updating documentation.
* Filing issues and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
* Spreading a word about VictoriaMetrics: conference talks, articles, comments, experience sharing with colleagues.
* Updating documentation.
We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):
- Prefer simple code and architecture.
- Avoid complex abstractions.
- Avoid magic code and fancy algorithms.
- Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d).
- Minimize the number of moving parts in the distributed system.
- Avoid automated decisions, which may hurt cluster availability, consistency or performance.
* Prefer simple code and architecture.
* Avoid complex abstractions.
* Avoid magic code and fancy algorithms.
* Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d).
* Minimize the number of moving parts in the distributed system.
* Avoid automated decisions, which may hurt cluster availability, consistency or performance.
Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.
## Third-party contributions
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
* [Prometheus Oauth proxy](https://gitlab.com/optima_public/prometheus_oauth_proxy) - see [this article](https://medium.com/@richard.holly/powerful-saas-solution-for-detection-metrics-c67b9208d362) for details.
## Reporting bugs
Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
## Victoria Metrics Logo
[Zip](VM_logo.zip) contains three folders with different image orientation (main color and inverted version).
[Zip](VM_logo.zip) contains three folders with different image orientations (main color and inverted version).
Files included in each folder:
@@ -903,22 +944,21 @@ Files included in each folder:
* 2 PNG Preview files with transparent background
* 2 EPS Adobe Illustrator EPS10 files
### Logo Usage Guidelines
#### Font used:
#### Font used
* Lato Black
* Lato Regular
#### Color Palette:
#### Color Palette
* HEX [#110f0f](https://www.color-hex.com/color/110f0f)
* HEX [#ffffff](https://www.color-hex.com/color/ffffff)
### We kindly ask:
### We kindly ask
- Please don't use any other font instead of suggested.
- There should be sufficient clear space around the logo.
- Do not change spacing, alignment, or relative locations of the design elements.
- Do not change the proportions of any of the design elements or the design itself. You may resize as needed but must retain all proportions.
* Please don't use any other font instead of suggested.
* There should be sufficient clear space around the logo.
* Do not change spacing, alignment, or relative locations of the design elements.
* Do not change the proportions of any of the design elements or the design itself. You may resize as needed but must retain all proportions.

View File

@@ -9,44 +9,55 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
var httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
var (
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Remove superflouos samples from time series if they are located closer to each other than this duration. "+
"This may be useful for reducing overhead when multiple identically configured Prometheus instances write data to the same VictoriaMetrics. "+
"Deduplication is disabled if the -dedup.minScrapeInterval is 0")
)
func main() {
flag.Parse()
envflag.Parse()
buildinfo.Init()
logger.Init()
logger.Infof("starting VictoriaMetrics at %q...", *httpListenAddr)
startTime := time.Now()
storage.SetMinScrapeIntervalForDeduplication(*minScrapeInterval)
vmstorage.Init()
vmselect.Init()
vminsert.Init()
startSelfScraper()
go httpserver.Serve(*httpListenAddr, requestHandler)
logger.Infof("started VictoriaMetrics in %s", time.Since(startTime))
logger.Infof("started VictoriaMetrics in %.3f seconds", time.Since(startTime).Seconds())
sig := procutil.WaitForSigterm()
logger.Infof("received signal %s", sig)
stopSelfScraper()
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
startTime = time.Now()
if err := httpserver.Stop(*httpListenAddr); err != nil {
logger.Fatalf("cannot stop the webservice: %s", err)
}
vminsert.Stop()
logger.Infof("successfully shut down the webservice in %s", time.Since(startTime))
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
vmstorage.Stop()
vmselect.Stop()
fs.MustStopDirRemover()
logger.Infof("the VictoriaMetrics has been stopped in %s", time.Since(startTime))
logger.Infof("the VictoriaMetrics has been stopped in %.3f seconds", time.Since(startTime).Seconds())
}
func requestHandler(w http.ResponseWriter, r *http.Request) bool {

View File

@@ -1,5 +1,3 @@
// +build integration
package main
import (
@@ -23,6 +21,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -148,7 +147,7 @@ func setUp() {
}
func processFlags() {
flag.Parse()
envflag.Parse()
for _, fv := range []struct {
flag string
value string
@@ -302,6 +301,9 @@ func readIn(readFor string, t *testing.T, insertTime time.Time) []test {
s := newSuite(t)
var tt []test
s.noError(filepath.Walk(filepath.Join(testFixturesDir, readFor), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if filepath.Ext(path) != ".json" {
return nil
}

View File

@@ -0,0 +1,99 @@
package main
import (
"flag"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
var selfScrapeInterval = flag.Duration("selfScrapeInterval", 0, "Interval for self-scraping own metrics at /metrics page")
var selfScraperStopCh chan struct{}
var selfScraperWG sync.WaitGroup
func startSelfScraper() {
selfScraperStopCh = make(chan struct{})
selfScraperWG.Add(1)
go func() {
defer selfScraperWG.Done()
selfScraper(*selfScrapeInterval)
}()
}
func stopSelfScraper() {
close(selfScraperStopCh)
selfScraperWG.Wait()
}
func selfScraper(scrapeInterval time.Duration) {
if scrapeInterval <= 0 {
// Self-scrape is disabled.
return
}
logger.Infof("started self-scraping `/metrics` page with interval %.3f seconds", scrapeInterval.Seconds())
var bb bytesutil.ByteBuffer
var rows prometheus.Rows
var mrs []storage.MetricRow
var labels []prompb.Label
t := time.NewTicker(scrapeInterval)
var currentTimestamp int64
for {
select {
case <-selfScraperStopCh:
t.Stop()
logger.Infof("stopped self-scraping `/metrics` page")
return
case currentTime := <-t.C:
currentTimestamp = currentTime.UnixNano() / 1e6
}
bb.Reset()
httpserver.WritePrometheusMetrics(&bb)
s := bytesutil.ToUnsafeString(bb.B)
rows.Reset()
rows.Unmarshal(s)
mrs = mrs[:0]
for i := range rows.Rows {
r := &rows.Rows[i]
labels = labels[:0]
labels = addLabel(labels, "", r.Metric)
labels = addLabel(labels, "job", "victoria-metrics")
labels = addLabel(labels, "instance", "self")
for j := range r.Tags {
t := &r.Tags[j]
labels = addLabel(labels, t.Key, t.Value)
}
if len(mrs) < cap(mrs) {
mrs = mrs[:len(mrs)+1]
} else {
mrs = append(mrs, storage.MetricRow{})
}
mr := &mrs[len(mrs)-1]
mr.MetricNameRaw = storage.MarshalMetricNameRaw(mr.MetricNameRaw[:0], labels)
mr.Timestamp = currentTimestamp
mr.Value = r.Value
}
logger.Infof("writing %d rows at timestamp %d", len(mrs), currentTimestamp)
vmstorage.AddRows(mrs)
}
}
func addLabel(dst []prompb.Label, key, value string) []prompb.Label {
if len(dst) < cap(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, prompb.Label{})
}
lb := &dst[len(dst)-1]
lb.Name = bytesutil.ToUnsafeBytes(key)
lb.Value = bytesutil.ToUnsafeBytes(value)
return dst
}

View File

@@ -1,18 +1,18 @@
// +build integration
// Source https://github.com/prometheus/prometheus/blob/master/prompb/remote.pb.go . Code is copy pasted and cleaned up
package test
// Source https://github.com/prometheus/prometheus/blob/master/prompb/remote.pb.go . Code is copy pasted and cleaned up
import (
"encoding/binary"
"math"
"math/bits"
)
// WriteRequest is write request
type WriteRequest struct {
Timeseries []TimeSeries `protobuf:"bytes,1,rep,name=timeseries,proto3" json:"timeseries"`
}
// Size returns m size in bytes after marshaling.
func (m *WriteRequest) Size() (n int) {
if m == nil {
return 0
@@ -31,6 +31,7 @@ func sovRemote(x uint64) (n int) {
return (bits.Len64(x|1) + 6) / 7
}
// Marshal marshals m.
func (m *WriteRequest) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
@@ -41,11 +42,13 @@ func (m *WriteRequest) Marshal() (dAtA []byte, err error) {
return dAtA[:n], nil
}
// MarshalTo marshals m to dAtA
func (m *WriteRequest) MarshalTo(dAtA []byte) (int, error) {
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
// MarshalToSizedBuffer marshals m to dAtA.
func (m *WriteRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
if len(m.Timeseries) > 0 {
@@ -77,11 +80,13 @@ func encodeVarintRemote(dAtA []byte, offset int, v uint64) int {
return base
}
// Sample is time series sample.
type Sample struct {
Value float64 `protobuf:"fixed64,1,opt,name=value,proto3" json:"value,omitempty"`
Timestamp int64 `protobuf:"varint,2,opt,name=timestamp,proto3" json:"timestamp,omitempty"`
}
// Reset resets m.
func (m *Sample) Reset() { *m = Sample{} }
// TimeSeries represents samples and labels for a single time series.
@@ -90,21 +95,27 @@ type TimeSeries struct {
Samples []Sample `protobuf:"bytes,2,rep,name=samples,proto3" json:"samples"`
}
// Reset resets m.
func (m *TimeSeries) Reset() { *m = TimeSeries{} }
// Label is time series label.
type Label struct {
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
}
// Reset resets m.
func (m *Label) Reset() { *m = Label{} }
// Labels is a set of labels.
type Labels struct {
Labels []Label `protobuf:"bytes,1,rep,name=labels,proto3" json:"labels"`
}
// Reset resets m.
func (m *Labels) Reset() { *m = Labels{} }
// Marshal marshals m.
func (m *Sample) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
@@ -115,11 +126,13 @@ func (m *Sample) Marshal() (dAtA []byte, err error) {
return dAtA[:n], nil
}
// MarshalTo marshals m to dAtA.
func (m *Sample) MarshalTo(dAtA []byte) (int, error) {
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
// MarshalToSizedBuffer marshals m to dAtA.
func (m *Sample) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
if m.Timestamp != 0 {
@@ -136,6 +149,7 @@ func (m *Sample) MarshalToSizedBuffer(dAtA []byte) (int, error) {
return len(dAtA) - i, nil
}
// Marshal marshals m.
func (m *TimeSeries) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
@@ -146,11 +160,13 @@ func (m *TimeSeries) Marshal() (dAtA []byte, err error) {
return dAtA[:n], nil
}
// MarshalTo marshals m to dAtA.
func (m *TimeSeries) MarshalTo(dAtA []byte) (int, error) {
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
// MarshalToSizedBuffer marshals m to dAtA.
func (m *TimeSeries) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
if len(m.Samples) > 0 {
@@ -184,6 +200,7 @@ func (m *TimeSeries) MarshalToSizedBuffer(dAtA []byte) (int, error) {
return len(dAtA) - i, nil
}
// Marshal marshals m.
func (m *Label) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
@@ -194,11 +211,13 @@ func (m *Label) Marshal() (dAtA []byte, err error) {
return dAtA[:n], nil
}
// MarshalTo marshals m to dAtA.
func (m *Label) MarshalTo(dAtA []byte) (int, error) {
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
// MarshalToSizedBuffer marshals m to dAtA.
func (m *Label) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
_ = i
@@ -221,6 +240,7 @@ func (m *Label) MarshalToSizedBuffer(dAtA []byte) (int, error) {
return len(dAtA) - i, nil
}
// Marshal marshals m.
func (m *Labels) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
@@ -231,11 +251,13 @@ func (m *Labels) Marshal() (dAtA []byte, err error) {
return dAtA[:n], nil
}
// MarshalTo marshals m to dAtA.
func (m *Labels) MarshalTo(dAtA []byte) (int, error) {
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
// MarshalToSizedBuffer marshals m to dAtA.
func (m *Labels) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
if len(m.Labels) > 0 {
@@ -267,6 +289,7 @@ func encodeVarintTypes(dAtA []byte, offset int, v uint64) int {
return base
}
// Size returns the size of marshaled m.
func (m *Sample) Size() (n int) {
if m == nil {
return 0
@@ -280,6 +303,7 @@ func (m *Sample) Size() (n int) {
return n
}
// Size returns the size of marshaled m.
func (m *TimeSeries) Size() (n int) {
if m == nil {
return 0
@@ -301,6 +325,7 @@ func (m *TimeSeries) Size() (n int) {
return n
}
// Size returns the size of marshaled m.
func (m *Label) Size() (n int) {
if m == nil {
return 0
@@ -318,6 +343,7 @@ func (m *Label) Size() (n int) {
return n
}
// Size returns the size of marshaled m.
func (m *Labels) Size() (n int) {
if m == nil {
return 0

View File

@@ -1,9 +1,8 @@
// +build integration
package test
import "github.com/golang/snappy"
// Compress marshals and compresses wr.
func Compress(wr WriteRequest) ([]byte, error) {
data, err := wr.Marshal()
if err != nil {

View File

@@ -13,11 +13,8 @@
"data":{"resultType":"matrix",
"result":[{"metric":{"__name__":"max_lookback_set"},"values":[
["{TIME_S-150s}","4"],
["{TIME_S-140s}","4"],
["{TIME_S-120s}","3"],
["{TIME_S-110s}","3"],
["{TIME_S-60s}","2"],
["{TIME_S-50s}","2"],
["{TIME_S-30s}","1"],
["{TIME_S-20s}","1"]
]}]}}

View File

@@ -19,14 +19,12 @@
["{TIME_S-110s}","3"],
["{TIME_S-100s}","3"],
["{TIME_S-90s}","3"],
["{TIME_S-80s}","3"],
["{TIME_S-70s}","3"],
["{TIME_S-60s}","2"],
["{TIME_S-50s}","2"],
["{TIME_S-40s}","2"],
["{TIME_S-30s}","1"],
["{TIME_S-20s}","1"],
["{TIME_S-10s}","1"],
["{TIME_S}","1"]
["{TIME_S-0s}","1"]
]}]}}
}

73
app/vmagent/Makefile Normal file
View File

@@ -0,0 +1,73 @@
# All these commands must run from repository root.
vmagent:
APP_NAME=vmagent $(MAKE) app-local
vmagent-prod:
APP_NAME=vmagent $(MAKE) app-via-docker
vmagent-pure-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-pure
vmagent-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-amd64
vmagent-arm-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-arm
vmagent-arm64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-arm64
vmagent-ppc64le-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-ppc64le
vmagent-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-386
package-vmagent:
APP_NAME=vmagent $(MAKE) package-via-docker
package-vmagent-pure:
APP_NAME=vmagent $(MAKE) package-via-docker-pure
package-vmagent-amd64:
APP_NAME=vmagent $(MAKE) package-via-docker-amd64
package-vmagent-arm:
APP_NAME=vmagent $(MAKE) package-via-docker-arm
package-vmagent-arm64:
APP_NAME=vmagent $(MAKE) package-via-docker-arm64
package-vmagent-ppc64le:
APP_NAME=vmagent $(MAKE) package-via-docker-ppc64le
package-vmagent-386:
APP_NAME=vmagent $(MAKE) package-via-docker-386
publish-vmagent:
APP_NAME=vmagent $(MAKE) publish-via-docker
run-vmagent:
mkdir -p vmagent-data
DOCKER_OPTS='-v $(shell pwd)/vmagent-data:/vmagent-data' \
APP_NAME=vmagent \
$(MAKE) run-via-docker
vmagent-amd64:
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-amd64 ./app/vmagent
vmagent-arm:
CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-arm ./app/vmagent
vmagent-arm64:
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-arm64 ./app/vmagent
vmagent-ppc64le:
CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-ppc64le ./app/vmagent
vmagent-386:
CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmagent-386 ./app/vmagent
vmagent-pure:
APP_NAME=vmagent $(MAKE) app-local-pure

167
app/vmagent/README.md Normal file
View File

@@ -0,0 +1,167 @@
## vmagent
`vmagent` is a tiny but brave agent, which helps you collecting metrics from various sources
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
<img alt="vmagent" src="vmagent.png">
### Motivation
While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
and RAM friendly to scrape metrics from Prometheus-compatible exporters to VictoriaMetrics.
Also, we found that users infrastructure is like snowflakes - never alike, and we decided to add more flexibility
to `vmagent` (like the ability to push metrics instead of pulling them). We did our best and plan to do even more.
### Features
* Can be used as drop-in replacement for Prometheus for scraping targets such as [node_exporter](https://github.com/prometheus/node_exporter).
See [Quick Start](#quick-start) for details.
* Can add, remove and modify labels via Prometheus relabeling. See [these docs](#relabeling) for details.
* Accepts data via all the ingestion protocols supported by VictoriaMetrics:
* Influx line protocol via `http://<vmagent>:8429/write`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf).
* JSON lines import protocol via `http://<vmagent>:8429/api/v1/import`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-time-series-data).
* Graphite plaintext protocol if `-graphiteListenAddr` command-line flag is set. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-graphite-compatible-agents-such-as-statsd).
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-opentsdb-compatible-agents).
* Prometheus remote write protocol via `http://<vmagent>:8429/api/v1/write`.
* Can replicate collected metrics simultaneously to multiple remote storage systems.
* Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as connection
to remote storage is recovered. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth comparing to Prometheus.
### Quick Start
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
and pass the following flags to `vmagent` binary in order to start scraping Prometheus targets:
* `-promscrape.config` with the path to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`)
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. Multiple `-remoteWrite.url` args can be set in parallel
in order to replicate data concurrently to multiple remote storage systems.
Example command line:
```
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
If you need collecting only Influx data, then the following command line would be enough:
```
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
Then send Influx data to `http://vmagent-host:8429/write`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for more details.
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/).
Pass `-help` to `vmagent` in order to see the full list of supported command-line flags with their descriptions.
### How to collect metrics in Prometheus format?
Pass the path to `prometheus.yml` to `-promscrape.config` command-line flag. `vmagent` takes into account the following
sections from [Prometheus config file](https://prometheus.io/docs/prometheus/latest/configuration/configuration/):
* `global`
* `scrape_configs`
All the other sections are ignored, including [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section.
Use `-remoteWrite.*` command-line flags instead for configuring remote write settings.
The following scrape types in [scrape_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) section are supported:
* `static_configs` - for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
* `file_sd_configs` - for scraping targets defined in external files aka file-based service discover.
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
The following service discovery mechanisms will be added to `vmagent` soon:
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
### Adding labels to metrics
Labels can be added to metrics via the following mechanisms:
* Via `global -> external_labels` section in `-promscrape.config` file. These labels are added only to metrics scraped from targets configured in `-promscrape.config` file.
* Via `-remoteWrite.label` command-line flag. These labels are added to all the collected metrics before sending them to `-remoteWrite.url`.
### Relabeling
`vmagent` supports [Prometheus relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config).
Additionally it provides the following extra actions:
* `replace_all`: replaces all the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the result in the `target_label`.
* `labelmap_all`: replaces all the occurences of `regex` in all the label names with the `replacement`.
The relabeling can be defined in the following places:
* At `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is applied to targets when parsing the file during `vmagent` startup
or during config reload after sending `SIGHUP` signal to `vmagent` via `kill -HUP`.
* At `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is applied to metrics after each scrape for the configured targets.
* At `-remoteWrite.relabelConfig` file. This relabeling is aplied to all the collected metrics before sending them to remote storage.
* At `-remoteWrite.urlRelabelConfig` files. This relabeling is applied to metrics before sending them to the corresponding `-remoteWrite.url`.
Read more about relabeling in the following articles:
* [Life of a label](https://www.robustperception.io/life-of-a-label)
* [Discarding targets and timeseries with relabeling](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts)
* [Dropping labels at scrape time](https://www.robustperception.io/dropping-metrics-at-scrape-time-with-prometheus)
* [Extracting labels from legacy metric names](https://www.robustperception.io/extracting-labels-from-legacy-metric-names)
* [relabel_configs vs metric_relabel_configs](https://www.robustperception.io/relabel_configs-vs-metric_relabel_configs)
### Monitoring
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
### Troubleshooting
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
since `vmagent` establishes at least a single TCP connection per each target.
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
and `vmagent_remotewrite_pending_data_bytes` metric exported by `vmagent` at `/metrics` page constantly grows.
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
The directory can grow big when remote storage is unvailable during extended periods of time. If you don't want
sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
### How to build from sources
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmagent` is located in `vmutils-*` archives there.
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
2. Run `make vmagent` from the root folder of the repository.
It builds `vmagent` binary and puts it into the `bin` folder.
#### Production build
1. [Install docker](https://docs.docker.com/install/).
2. Run `make vmagent-prod` from the root folder of the repository.
It builds `vmagent-prod` binary and puts it into the `bin` folder.
#### Building docker images
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.

View File

@@ -0,0 +1,70 @@
package common
import (
"runtime"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// PushCtx is a context used for populating WriteRequest.
type PushCtx struct {
WriteRequest prompbmarshal.WriteRequest
// Labels contains flat list of all the labels used in WriteRequest.
Labels []prompbmarshal.Label
// Samples contains flat list of all the samples used in WriteRequest.
Samples []prompbmarshal.Sample
}
// Reset resets ctx.
func (ctx *PushCtx) Reset() {
tss := ctx.WriteRequest.Timeseries
for i := range tss {
ts := &tss[i]
ts.Labels = nil
ts.Samples = nil
}
ctx.WriteRequest.Timeseries = ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels
for i := range labels {
label := &labels[i]
label.Name = ""
label.Value = ""
}
ctx.Labels = ctx.Labels[:0]
ctx.Samples = ctx.Samples[:0]
}
// GetPushCtx returns PushCtx from pool.
//
// Call PutPushCtx when the ctx is no longer needed.
func GetPushCtx() *PushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*PushCtx)
}
return &PushCtx{}
}
}
// PutPushCtx returns ctx to the pool.
//
// ctx mustn't be used after returning to the pool.
func PutPushCtx(ctx *PushCtx) {
ctx.Reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *PushCtx, runtime.GOMAXPROCS(-1))

View File

@@ -0,0 +1,8 @@
ARG certs_image
FROM $certs_image AS certs
FROM scratch
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
ARG src_binary
COPY $src_binary ./vmagent-prod
EXPOSE 8429
ENTRYPOINT ["/vmagent-prod"]

View File

@@ -0,0 +1,65 @@
package graphite
import (
"io"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="graphite"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="graphite"}`)
)
// InsertHandler processes remote write for graphite plaintext protocol.
//
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
func InsertHandler(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
}
func insertRows(rows []parser.Row) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return nil
}

View File

@@ -0,0 +1,167 @@
package influx
import (
"flag"
"io"
"net/http"
"runtime"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field")
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="influx"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="influx"}`)
)
// InsertHandlerForReader processes remote write for influx line protocol.
//
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
func InsertHandlerForReader(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, false, "", "", insertRows)
})
}
// InsertHandlerForHTTP processes remote write for influx line protocol.
//
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
func InsertHandlerForHTTP(req *http.Request) error {
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
q := req.URL.Query()
precision := q.Get("precision")
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
db := q.Get("db")
return parser.ParseStream(req.Body, isGzipped, precision, db, insertRows)
})
}
func insertRows(db string, rows []parser.Row) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.ctx.WriteRequest.Timeseries[:0]
labels := ctx.ctx.Labels[:0]
samples := ctx.ctx.Samples[:0]
commonLabels := ctx.commonLabels[:0]
buf := ctx.buf[:0]
for i := range rows {
r := &rows[i]
commonLabels = commonLabels[:0]
hasDBLabel := false
for j := range r.Tags {
tag := &r.Tags[j]
if tag.Key == "db" {
hasDBLabel = true
}
commonLabels = append(commonLabels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
if len(db) > 0 && !hasDBLabel {
commonLabels = append(commonLabels, prompbmarshal.Label{
Name: "db",
Value: db,
})
}
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
skipFieldKey := len(r.Fields) == 1 && *skipSingleField
if len(ctx.metricGroupBuf) > 0 && !skipFieldKey {
ctx.metricGroupBuf = append(ctx.metricGroupBuf, *measurementFieldSeparator...)
}
for j := range r.Fields {
f := &r.Fields[j]
bufLen := len(buf)
buf = append(buf, ctx.metricGroupBuf...)
if !skipFieldKey {
buf = append(buf, f.Key...)
}
metricGroup := bytesutil.ToUnsafeString(buf[bufLen:])
labelsLen := len(labels)
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: metricGroup,
})
labels = append(labels, commonLabels...)
samples = append(samples, prompbmarshal.Sample{
Timestamp: r.Timestamp,
Value: f.Value,
})
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
}
rowsTotal += len(r.Fields)
}
ctx.buf = buf
ctx.ctx.WriteRequest.Timeseries = tssDst
ctx.ctx.Labels = labels
ctx.ctx.Samples = samples
ctx.commonLabels = commonLabels
remotewrite.Push(&ctx.ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
return nil
}
type pushCtx struct {
ctx common.PushCtx
commonLabels []prompbmarshal.Label
metricGroupBuf []byte
buf []byte
}
func (ctx *pushCtx) reset() {
ctx.ctx.Reset()
commonLabels := ctx.commonLabels
for i := range commonLabels {
label := &commonLabels[i]
label.Name = ""
label.Value = ""
}
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
ctx.buf = ctx.buf[:0]
}
func getPushCtx() *pushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))

167
app/vmagent/main.go Normal file
View File

@@ -0,0 +1,167 @@
package main
import (
"flag"
"fmt"
"net/http"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
influxserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/influx"
opentsdbserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdb"
opentsdbhttpserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty")
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
"Usually :4242 must be set. Doesn't work if empty")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
)
var (
influxServer *influxserver.Server
graphiteServer *graphiteserver.Server
opentsdbServer *opentsdbserver.Server
opentsdbhttpServer *opentsdbhttpserver.Server
)
func main() {
envflag.Parse()
buildinfo.Init()
logger.Init()
logger.Infof("starting vmagent at %q...", *httpListenAddr)
startTime := time.Now()
remotewrite.Init()
writeconcurrencylimiter.Init()
if len(*influxListenAddr) > 0 {
influxServer = influxserver.MustStart(*influxListenAddr, influx.InsertHandlerForReader)
}
if len(*graphiteListenAddr) > 0 {
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, graphite.InsertHandler)
}
if len(*opentsdbListenAddr) > 0 {
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, opentsdb.InsertHandler, opentsdbhttp.InsertHandler)
}
if len(*opentsdbHTTPListenAddr) > 0 {
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, opentsdbhttp.InsertHandler)
}
promscrape.Init(remotewrite.Push)
if len(*httpListenAddr) > 0 {
go httpserver.Serve(*httpListenAddr, requestHandler)
}
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
sig := procutil.WaitForSigterm()
logger.Infof("received signal %s", sig)
startTime = time.Now()
if len(*httpListenAddr) > 0 {
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
if err := httpserver.Stop(*httpListenAddr); err != nil {
logger.Fatalf("cannot stop the webservice: %s", err)
}
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
}
promscrape.Stop()
if len(*influxListenAddr) > 0 {
influxServer.MustStop()
}
if len(*graphiteListenAddr) > 0 {
graphiteServer.MustStop()
}
if len(*opentsdbListenAddr) > 0 {
opentsdbServer.MustStop()
}
if len(*opentsdbHTTPListenAddr) > 0 {
opentsdbhttpServer.MustStop()
}
remotewrite.Stop()
logger.Infof("successfully stopped vmagent in %.3f seconds", time.Since(startTime).Seconds())
}
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
path := strings.Replace(r.URL.Path, "//", "/", -1)
switch path {
case "/api/v1/write":
prometheusWriteRequests.Inc()
if err := promremotewrite.InsertHandler(r); err != nil {
prometheusWriteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
case "/api/v1/import":
vmimportRequests.Inc()
if err := vmimport.InsertHandler(r); err != nil {
vmimportErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
case "/write", "/api/v2/write":
influxWriteRequests.Inc()
if err := influx.InsertHandlerForHTTP(r); err != nil {
influxWriteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
case "/query":
// Emulate fake response for influx query.
// This is required for TSBS benchmark.
influxQueryRequests.Inc()
fmt.Fprintf(w, `{"results":[{"series":[{"values":[]}]}]}`)
return true
case "/targets":
promscrapeTargetsRequests.Inc()
w.Header().Set("Content-Type", "text/plain")
promscrape.WriteHumanReadableTargetsStatus(w)
return true
}
return false
}
var (
prometheusWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/write", protocol="prometheus"}`)
prometheusWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/write", protocol="prometheus"}`)
vmimportRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/import", protocol="vm"}`)
vmimportErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/api/v1/import", protocol="vm"}`)
influxWriteRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/write", protocol="influx"}`)
influxWriteErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/write", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/query", protocol="influx"}`)
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
)

View File

@@ -0,0 +1,65 @@
package opentsdb
import (
"io"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="opentsdb"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentsdb"}`)
)
// InsertHandler processes remote write for OpenTSDB put protocol.
//
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
func InsertHandler(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
}
func insertRows(rows []parser.Row) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return nil
}

View File

@@ -0,0 +1,64 @@
package opentsdbhttp
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="opentsdbhttp"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentsdbhttp"}`)
)
// InsertHandler processes HTTP OpenTSDB put requests.
// See http://opentsdb.net/docs/build/html/api_http/put.html
func InsertHandler(req *http.Request) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, insertRows)
})
}
func insertRows(rows []parser.Row) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: r.Metric,
})
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompbmarshal.Label{
Name: tag.Key,
Value: tag.Value,
})
}
samples = append(samples, prompbmarshal.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return nil
}

View File

@@ -0,0 +1,67 @@
package promremotewrite
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="promremotewrite"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="promremotewrite"}`)
)
// InsertHandler processes remote write for prometheus.
func InsertHandler(req *http.Request) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, insertRows)
})
}
func insertRows(timeseries []prompb.TimeSeries) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range timeseries {
ts := &timeseries[i]
labelsLen := len(labels)
for i := range ts.Labels {
label := &ts.Labels[i]
labels = append(labels, prompbmarshal.Label{
Name: bytesutil.ToUnsafeString(label.Name),
Value: bytesutil.ToUnsafeString(label.Value),
})
}
samplesLen := len(samples)
for i := range ts.Samples {
sample := &ts.Samples[i]
samples = append(samples, prompbmarshal.Sample{
Value: sample.Value,
Timestamp: sample.Timestamp,
})
}
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
rowsTotal += len(ts.Samples)
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -0,0 +1,267 @@
package remotewrite
import (
"crypto/tls"
"crypto/x509"
"encoding/base64"
"flag"
"fmt"
"io/ioutil"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fasthttp"
)
var (
sendTimeout = flag.Duration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to -remoteWrite.url")
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
tlsCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
tlsKeyFile = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
tlsCAFile = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
"By default system CA is used")
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username to use for -remoteWrite.url")
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password to use for -remoteWrite.url")
bearerToken = flag.String("remoteWrite.bearerToken", "", "Optional bearer auth token to use for -remoteWrite.url")
)
type client struct {
urlLabelValue string
remoteWriteURL string
host string
requestURI string
authHeader string
fq *persistentqueue.FastQueue
hc *fasthttp.HostClient
requestDuration *metrics.Histogram
requestsOKCount *metrics.Counter
errorsCount *metrics.Counter
retriesCount *metrics.Counter
wg sync.WaitGroup
stopCh chan struct{}
}
func newClient(remoteWriteURL, urlLabelValue string, fq *persistentqueue.FastQueue, concurrency int) *client {
authHeader := ""
if len(*basicAuthUsername) > 0 || len(*basicAuthPassword) > 0 {
// See https://en.wikipedia.org/wiki/Basic_access_authentication
token := *basicAuthUsername + ":" + *basicAuthPassword
token64 := base64.StdEncoding.EncodeToString([]byte(token))
authHeader = "Basic " + token64
}
if len(*bearerToken) > 0 {
if authHeader != "" {
logger.Panicf("FATAL: `-remoteWrite.bearerToken`=%q cannot be set when `-remoteWrite.basicAuth.*` flags are set", *bearerToken)
}
authHeader = "Bearer " + *bearerToken
}
readTimeout := *sendTimeout
if readTimeout <= 0 {
readTimeout = time.Minute
}
var u fasthttp.URI
u.Update(remoteWriteURL)
scheme := string(u.Scheme())
switch scheme {
case "http", "https":
default:
logger.Panicf("FATAL: unsupported scheme in -remoteWrite.url=%q: %q. It must be http or https", remoteWriteURL, scheme)
}
host := string(u.Host())
if len(host) == 0 {
logger.Panicf("FATAL: invalid -remoteWrite.url=%q: host cannot be empty. Make sure the url looks like `http://host:port/path`", remoteWriteURL)
}
requestURI := string(u.RequestURI())
isTLS := scheme == "https"
var tlsCfg *tls.Config
if isTLS {
var err error
tlsCfg, err = getTLSConfig()
if err != nil {
logger.Panicf("FATAL: cannot initialize TLS config: %s", err)
}
}
if !strings.Contains(host, ":") {
if isTLS {
host += ":443"
} else {
host += ":80"
}
}
maxConns := 2 * concurrency
hc := &fasthttp.HostClient{
Addr: host,
Name: "vmagent",
Dial: statDial,
DialDualStack: netutil.TCP6Enabled(),
IsTLS: isTLS,
TLSConfig: tlsCfg,
MaxConns: maxConns,
MaxIdleConnDuration: 10 * readTimeout,
ReadTimeout: readTimeout,
WriteTimeout: 10 * time.Second,
MaxResponseBodySize: 1024 * 1024,
}
c := &client{
urlLabelValue: urlLabelValue,
remoteWriteURL: remoteWriteURL,
host: host,
requestURI: requestURI,
authHeader: authHeader,
fq: fq,
hc: hc,
stopCh: make(chan struct{}),
}
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.urlLabelValue))
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.urlLabelValue))
c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_errors_total{url=%q}`, c.urlLabelValue))
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.urlLabelValue))
for i := 0; i < concurrency; i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
}
logger.Infof("initialized client for -remoteWrite.url=%q", c.remoteWriteURL)
return c
}
func (c *client) MustStop() {
close(c.stopCh)
c.wg.Wait()
logger.Infof("stopped client for -remoteWrite.url=%q", c.remoteWriteURL)
}
func getTLSConfig() (*tls.Config, error) {
var tlsRootCA *x509.CertPool
var tlsCertificate *tls.Certificate
if *tlsCertFile != "" || *tlsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(*tlsCertFile, *tlsKeyFile)
if err != nil {
return nil, fmt.Errorf("cannot load TLS certificate for -remoteWrite.tlsCertFile=%q and -remoteWrite.tlsKeyFile=%q: %s", *tlsCertFile, *tlsKeyFile, err)
}
tlsCertificate = &cert
}
if *tlsCAFile != "" {
data, err := ioutil.ReadFile(*tlsCAFile)
if err != nil {
return nil, fmt.Errorf("cannot read -remoteWrite.tlsCAFile=%q: %s", *tlsCAFile, err)
}
tlsRootCA = x509.NewCertPool()
if !tlsRootCA.AppendCertsFromPEM(data) {
return nil, fmt.Errorf("cannot parse data -remoteWrite.tlsCAFile=%q", *tlsCAFile)
}
}
tlsCfg := &tls.Config{
RootCAs: tlsRootCA,
ClientSessionCache: tls.NewLRUClientSessionCache(0),
}
if tlsCertificate != nil {
tlsCfg.Certificates = []tls.Certificate{*tlsCertificate}
}
tlsCfg.InsecureSkipVerify = *tlsInsecureSkipVerify
return tlsCfg, nil
}
func (c *client) runWorker() {
var ok bool
var block []byte
ch := make(chan struct{})
for {
block, ok = c.fq.MustReadBlock(block[:0])
if !ok {
return
}
go func() {
c.sendBlock(block)
ch <- struct{}{}
}()
select {
case <-ch:
// The block has been sent successfully
continue
case <-c.stopCh:
// c must be stopped. Wait for a while in the hope the block will be sent.
graceDuration := 5 * time.Second
select {
case <-ch:
// The block has been sent successfully.
case <-time.After(graceDuration):
logger.Errorf("couldn't sent block with size %d bytes to %q in %.3f seconds during shutdown; dropping it",
len(block), c.remoteWriteURL, graceDuration.Seconds())
}
return
}
}
}
func (c *client) sendBlock(block []byte) {
req := fasthttp.AcquireRequest()
req.SetRequestURI(c.requestURI)
req.SetHost(c.host)
req.Header.SetMethod("POST")
req.Header.Add("Content-Type", "application/x-protobuf")
req.Header.Add("Content-Encoding", "snappy")
if c.authHeader != "" {
req.Header.Set("Authorization", c.authHeader)
}
req.SetBody(block)
retryDuration := time.Second
resp := fasthttp.AcquireResponse()
again:
select {
case <-c.stopCh:
fasthttp.ReleaseRequest(req)
fasthttp.ReleaseResponse(resp)
return
default:
}
startTime := time.Now()
// There is no need in calling DoTimeout, since the timeout is set in c.hc.ReadTimeout.
err := c.hc.Do(req, resp)
c.requestDuration.UpdateDuration(startTime)
if err != nil {
c.errorsCount.Inc()
retryDuration *= 2
if retryDuration > time.Minute {
retryDuration = time.Minute
}
logger.Errorf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
len(block), c.remoteWriteURL, err, retryDuration.Seconds())
time.Sleep(retryDuration)
c.retriesCount.Inc()
goto again
}
statusCode := resp.StatusCode()
if statusCode/100 != 2 {
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.urlLabelValue, statusCode)).Inc()
retryDuration *= 2
if retryDuration > time.Minute {
retryDuration = time.Minute
}
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q: %d; response body=%q; re-sending the block in %.3f seconds",
len(block), c.remoteWriteURL, statusCode, resp.Body(), retryDuration.Seconds())
time.Sleep(retryDuration)
c.retriesCount.Inc()
goto again
}
c.requestsOKCount.Inc()
// The block has been successfully sent to the remote storage.
fasthttp.ReleaseResponse(resp)
fasthttp.ReleaseRequest(req)
}

View File

@@ -0,0 +1,199 @@
package remotewrite
import (
"flag"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
)
var (
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
"Higher value reduces network bandwidth usage at the cost of delayed push of scraped data to remote storage")
maxUnpackedBlockSize = flag.Int("remoteWrite.maxBlockSize", 32*1024*1024, "The maximum size in bytes of unpacked request to send to remote storage. "+
"It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics")
)
// the maximum number of rows to send per each block.
const maxRowsPerBlock = 10000
type pendingSeries struct {
mu sync.Mutex
wr writeRequest
stopCh chan struct{}
periodicFlusherWG sync.WaitGroup
}
func newPendingSeries(pushBlock func(block []byte)) *pendingSeries {
var ps pendingSeries
ps.wr.pushBlock = pushBlock
ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
return &ps
}
func (ps *pendingSeries) MustStop() {
close(ps.stopCh)
ps.periodicFlusherWG.Wait()
}
func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
ps.mu.Lock()
ps.wr.push(tss)
ps.mu.Unlock()
}
func (ps *pendingSeries) periodicFlusher() {
ticker := time.NewTicker(*flushInterval)
defer ticker.Stop()
mustStop := false
for !mustStop {
select {
case <-ps.stopCh:
mustStop = true
case <-ticker.C:
if time.Since(ps.wr.lastFlushTime) < *flushInterval/2 {
continue
}
}
ps.mu.Lock()
ps.wr.flush()
ps.mu.Unlock()
}
}
type writeRequest struct {
wr prompbmarshal.WriteRequest
pushBlock func(block []byte)
lastFlushTime time.Time
tss []prompbmarshal.TimeSeries
labels []prompbmarshal.Label
samples []prompbmarshal.Sample
buf []byte
}
func (wr *writeRequest) reset() {
wr.wr.Timeseries = nil
for i := range wr.tss {
ts := &wr.tss[i]
ts.Labels = nil
ts.Samples = nil
}
wr.tss = wr.tss[:0]
for i := range wr.labels {
label := &wr.labels[i]
label.Name = ""
label.Value = ""
}
wr.labels = wr.labels[:0]
wr.samples = wr.samples[:0]
wr.buf = wr.buf[:0]
}
func (wr *writeRequest) flush() {
wr.wr.Timeseries = wr.tss
wr.lastFlushTime = time.Now()
pushWriteRequest(&wr.wr, wr.pushBlock)
wr.reset()
}
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
tssDst := wr.tss
for i := range src {
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
dst := &tssDst[len(tssDst)-1]
wr.copyTimeSeries(dst, &src[i])
if len(wr.tss) >= maxRowsPerBlock {
wr.flush()
tssDst = wr.tss
}
}
wr.tss = tssDst
}
func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
labelsDst := wr.labels
labelsLen := len(wr.labels)
samplesDst := wr.samples
buf := wr.buf
for i := range src.Labels {
labelsDst = append(labelsDst, prompbmarshal.Label{})
dstLabel := &labelsDst[len(labelsDst)-1]
srcLabel := &src.Labels[i]
buf = append(buf, srcLabel.Name...)
dstLabel.Name = bytesutil.ToUnsafeString(buf[len(buf)-len(srcLabel.Name):])
buf = append(buf, srcLabel.Value...)
dstLabel.Value = bytesutil.ToUnsafeString(buf[len(buf)-len(srcLabel.Value):])
}
dst.Labels = labelsDst[labelsLen:]
samplesDst = append(samplesDst, prompbmarshal.Sample{})
dstSample := &samplesDst[len(samplesDst)-1]
if len(src.Samples) != 1 {
logger.Panicf("BUG: unexpected number of samples in time series; got %d; want 1", len(src.Samples))
}
*dstSample = src.Samples[0]
dst.Samples = samplesDst[len(samplesDst)-1:]
wr.samples = samplesDst
wr.labels = labelsDst
wr.buf = buf
}
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte)) {
if len(wr.Timeseries) == 0 {
// Nothing to push
return
}
bb := writeRequestBufPool.Get()
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
if len(bb.B) <= *maxUnpackedBlockSize {
zb := snappyBufPool.Get()
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
writeRequestBufPool.Put(bb)
if len(zb.B) <= persistentqueue.MaxBlockSize {
pushBlock(zb.B)
blockSizeRows.Update(float64(len(wr.Timeseries)))
blockSizeBytes.Update(float64(len(zb.B)))
snappyBufPool.Put(zb)
return
}
snappyBufPool.Put(zb)
} else {
writeRequestBufPool.Put(bb)
}
// Too big block. Recursively split it into smaller parts.
timeseries := wr.Timeseries
n := len(timeseries) / 2
wr.Timeseries = timeseries[:n]
pushWriteRequest(wr, pushBlock)
wr.Timeseries = timeseries[n:]
pushWriteRequest(wr, pushBlock)
wr.Timeseries = timeseries
}
var (
blockSizeBytes = metrics.NewHistogram(`vmagent_remotewrite_block_size_bytes`)
blockSizeRows = metrics.NewHistogram(`vmagent_remotewrite_block_size_rows`)
)
var writeRequestBufPool bytesutil.ByteBufferPool
var snappyBufPool bytesutil.ByteBufferPool

View File

@@ -0,0 +1,113 @@
package remotewrite
import (
"flag"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
var (
unparsedLabelsGlobal = flagutil.NewArray("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple flags to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. These entries are applied to all the metrics "+
"before sending them to -remoteWrite.url. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config for details")
)
var labelsGlobal []prompbmarshal.Label
var prcsGlobal []promrelabel.ParsedRelabelConfig
// initRelabelGlobal must be called after parsing command-line flags.
func initRelabelGlobal() {
// Init labelsGlobal
labelsGlobal = nil
for _, s := range *unparsedLabelsGlobal {
n := strings.IndexByte(s, '=')
if n < 0 {
logger.Panicf("FATAL: missing '=' in `-remoteWrite.label`. It must contain label in the form `name=value`; got %q", s)
}
labelsGlobal = append(labelsGlobal, prompbmarshal.Label{
Name: s[:n],
Value: s[n+1:],
})
}
// Init prcsGlobal
prcsGlobal = nil
if len(*relabelConfigPathGlobal) > 0 {
var err error
prcsGlobal, err = promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
if err != nil {
logger.Panicf("FATAL: cannot load relabel configs from -remoteWrite.relabelConfig=%q: %s", *relabelConfigPathGlobal, err)
}
}
}
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, prcs []promrelabel.ParsedRelabelConfig) []prompbmarshal.TimeSeries {
if len(extraLabels) == 0 && len(prcs) == 0 {
// Nothing to change.
return tss
}
tssDst := tss[:0]
labels := rctx.labels[:0]
for i := range tss {
ts := &tss[i]
labelsLen := len(labels)
labels = append(labels, ts.Labels...)
// extraLabels must be added before applying relabeling according to https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
for j := range extraLabels {
extraLabel := &extraLabels[j]
tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
if tmp != nil {
tmp.Value = extraLabel.Value
} else {
labels = append(labels, *extraLabel)
}
}
labels = promrelabel.ApplyRelabelConfigs(labels, labelsLen, prcs, true)
if len(labels) == labelsLen {
// Drop the current time series, since relabeling removed all the labels.
continue
}
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: ts.Samples,
})
}
rctx.labels = labels
return tssDst
}
type relabelCtx struct {
// pool for labels, which are used during the relabeling.
labels []prompbmarshal.Label
}
func (rctx *relabelCtx) reset() {
labels := rctx.labels
for i := range labels {
label := &labels[i]
label.Name = ""
label.Value = ""
}
rctx.labels = rctx.labels[:0]
}
var relabelCtxPool = &sync.Pool{
New: func() interface{} {
return &relabelCtx{}
},
}
func getRelabelCtx() *relabelCtx {
return relabelCtxPool.Get().(*relabelCtx)
}
func putRelabelCtx(rctx *relabelCtx) {
rctx.labels = rctx.labels[:0]
relabelCtxPool.Put(rctx)
}

View File

@@ -0,0 +1,195 @@
package remotewrite
import (
"flag"
"fmt"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/metrics"
xxhash "github.com/cespare/xxhash/v2"
)
var (
remoteWriteURLs = flagutil.NewArray("remoteWrite.url", "Remote storage URL to write data to. It must support Prometheus remote_write API. "+
"It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
"Pass multiple -remoteWrite.url flags in order to write data concurrently to multiple remote storage systems")
relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url")
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored")
queues = flag.Int("remoteWrite.queues", 1, "The number of concurrent queues to each -remoteWrite.url. Set more queues if a single queue "+
"isn't enough for sending high volume of collected data to remote storage")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
"It is hidden by default, since it can contain sensistive auth info")
maxPendingBytesPerURL = flag.Int("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
"Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. "+
"Disk usage is unlimited if the value is set to 0")
)
var rwctxs []*remoteWriteCtx
// Init initializes remotewrite.
//
// It must be called after flag.Parse().
//
// Stop must be called for graceful shutdown.
func Init() {
if len(*remoteWriteURLs) == 0 {
logger.Panicf("FATAL: at least one `-remoteWrite.url` must be set")
}
if !*showRemoteWriteURL {
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
httpserver.RegisterSecretFlag("remoteWrite.url")
}
initRelabelGlobal()
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / maxRowsPerBlock / 100
if maxInmemoryBlocks > 200 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 200
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
for i, remoteWriteURL := range *remoteWriteURLs {
relabelConfigPath := ""
if i < len(*relabelConfigPaths) {
relabelConfigPath = (*relabelConfigPaths)[i]
}
urlLabelValue := fmt.Sprintf("secret-url-%d", i+1)
if *showRemoteWriteURL {
urlLabelValue = remoteWriteURL
}
rwctx := newRemoteWriteCtx(remoteWriteURL, relabelConfigPath, maxInmemoryBlocks, urlLabelValue)
rwctxs = append(rwctxs, rwctx)
}
}
// Stop stops remotewrite.
//
// It is expected that nobody calls Push during and after the call to this func.
func Stop() {
for _, rwctx := range rwctxs {
rwctx.MustStop()
}
rwctxs = nil
}
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
//
// Each timeseries in wr.Timeseries must contain one sample.
func Push(wr *prompbmarshal.WriteRequest) {
var rctx *relabelCtx
if len(prcsGlobal) > 0 {
rctx = getRelabelCtx()
}
tss := wr.Timeseries
for len(tss) > 0 {
// Process big tss in smaller blocks in order to reduce maxmimum memory usage
tssBlock := tss
if len(tssBlock) > maxRowsPerBlock {
tssBlock = tss[:maxRowsPerBlock]
tss = tss[maxRowsPerBlock:]
} else {
tss = nil
}
if rctx != nil {
tssBlockLen := len(tssBlock)
tssBlock = rctx.applyRelabeling(tssBlock, labelsGlobal, prcsGlobal)
globalRelabelMetricsDropped.Add(tssBlockLen - len(tssBlock))
}
for _, rwctx := range rwctxs {
rwctx.Push(tssBlock)
}
if rctx != nil {
rctx.reset()
}
}
if rctx != nil {
putRelabelCtx(rctx)
}
}
var globalRelabelMetricsDropped = metrics.NewCounter("vmagent_remotewrite_global_relabel_metrics_dropped_total")
type remoteWriteCtx struct {
fq *persistentqueue.FastQueue
c *client
prcs []promrelabel.ParsedRelabelConfig
pss []*pendingSeries
pssNextIdx uint64
relabelMetricsDropped *metrics.Counter
}
func newRemoteWriteCtx(remoteWriteURL, relabelConfigPath string, maxInmemoryBlocks int, urlLabelValue string) *remoteWriteCtx {
h := xxhash.Sum64([]byte(remoteWriteURL))
path := fmt.Sprintf("%s/persistent-queue/%016X", *tmpDataPath, h)
fq := persistentqueue.MustOpenFastQueue(path, remoteWriteURL, maxInmemoryBlocks, *maxPendingBytesPerURL)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, path, urlLabelValue), func() float64 {
return float64(fq.GetPendingBytes())
})
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, path, urlLabelValue), func() float64 {
return float64(fq.GetInmemoryQueueLen())
})
c := newClient(remoteWriteURL, urlLabelValue, fq, *queues)
var prcs []promrelabel.ParsedRelabelConfig
if len(relabelConfigPath) > 0 {
var err error
prcs, err = promrelabel.LoadRelabelConfigs(relabelConfigPath)
if err != nil {
logger.Panicf("FATAL: cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %s", relabelConfigPath, err)
}
}
pss := make([]*pendingSeries, *queues)
for i := range pss {
pss[i] = newPendingSeries(fq.MustWriteBlock)
}
return &remoteWriteCtx{
fq: fq,
c: c,
prcs: prcs,
pss: pss,
relabelMetricsDropped: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, path, urlLabelValue)),
}
}
func (rwctx *remoteWriteCtx) MustStop() {
for _, ps := range rwctx.pss {
ps.MustStop()
}
rwctx.pss = nil
rwctx.fq.MustClose()
rwctx.fq = nil
rwctx.prcs = nil
rwctx.c.MustStop()
rwctx.c = nil
rwctx.relabelMetricsDropped = nil
}
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
var rctx *relabelCtx
if len(rwctx.prcs) > 0 {
rctx = getRelabelCtx()
tssLen := len(tss)
tss = rctx.applyRelabeling(tss, nil, rwctx.prcs)
rwctx.relabelMetricsDropped.Add(tssLen - len(tss))
}
pss := rwctx.pss
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
pss[idx].Push(tss)
if rctx != nil {
putRelabelCtx(rctx)
}
}

View File

@@ -0,0 +1,71 @@
package remotewrite
import (
"net"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fasthttp"
)
func statDial(addr string) (net.Conn, error) {
conn, err := fasthttp.Dial(addr)
dialsTotal.Inc()
if err != nil {
dialErrors.Inc()
return nil, err
}
conns.Inc()
sc := &statConn{
Conn: conn,
}
return sc, nil
}
var (
dialsTotal = metrics.NewCounter(`vmagent_remotewrite_dials_total`)
dialErrors = metrics.NewCounter(`vmagent_remotewrite_dial_errors_total`)
conns = metrics.NewCounter(`vmagent_remotewrite_conns`)
)
type statConn struct {
closed uint64
net.Conn
}
func (sc *statConn) Read(p []byte) (int, error) {
n, err := sc.Conn.Read(p)
connReadsTotal.Inc()
if err != nil {
connReadErrors.Inc()
}
connBytesRead.Add(n)
return n, err
}
func (sc *statConn) Write(p []byte) (int, error) {
n, err := sc.Conn.Write(p)
connWritesTotal.Inc()
if err != nil {
connWriteErrors.Inc()
}
connBytesWritten.Add(n)
return n, err
}
func (sc *statConn) Close() error {
err := sc.Conn.Close()
if atomic.AddUint64(&sc.closed, 1) == 1 {
conns.Dec()
}
return err
}
var (
connReadsTotal = metrics.NewCounter(`vmagent_remotewrite_conn_reads_total`)
connWritesTotal = metrics.NewCounter(`vmagent_remotewrite_conn_writes_total`)
connReadErrors = metrics.NewCounter(`vmagent_remotewrite_conn_read_errors_total`)
connWriteErrors = metrics.NewCounter(`vmagent_remotewrite_conn_write_errors_total`)
connBytesRead = metrics.NewCounter(`vmagent_remotewrite_conn_bytes_read_total`)
connBytesWritten = metrics.NewCounter(`vmagent_remotewrite_conn_bytes_written_total`)
)

BIN
app/vmagent/vmagent.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

View File

@@ -0,0 +1,70 @@
package vmimport
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="vmimport"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="vmimport"}`)
)
// InsertHandler processes `/api/v1/import` request.
//
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
func InsertHandler(req *http.Request) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, insertRows)
})
}
func insertRows(rows []parser.Row) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompbmarshal.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
values := r.Values
timestamps := r.Timestamps
_ = timestamps[len(values)-1]
samplesLen := len(samples)
for j, value := range values {
samples = append(samples, prompbmarshal.Sample{
Value: value,
Timestamp: timestamps[j],
})
}
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
rowsTotal += len(values)
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.Push(&ctx.WriteRequest)
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

41
app/vmalert/README.md Normal file
View File

@@ -0,0 +1,41 @@
## VM Alert
#### Abstract
The application which accepts the alert rules, executes them on given source, sends(fires) an alert to(in) alert management system
### Components
#### Alert Config Reader
It accepts yaml config as input parameter in Prometheus format, parses it into Go struct.
#### Source Caller
Create own watchdog for every alert group (goroutines), which executes alert query on given source and issues an alert if source returns non-empty result.
Source can be any service which supports PromQL (MetricsQL).
#### Alert Management System Provider
Send positive alert to alert management system, provides interface for every concrete implementation.
Should be ingratiated with Prometheus alertmanager.
open questions:
- do we really need alert group or can just run every alert in own goroutine?
#### Web Server
Expose metrics
open questions:
- should the tool provide API or UI for managing alerting rules? Where to store config updated via the API or UI?
- should the tool provide “alerting rules validation mode” for validating and debugging alerting rules? This mode is useful when creating and debugging alerting rules.
#### Requirements:
- Stateless
- Avoid external dependencies if possible
- Reuse existing code from VictoriaMetrics repo
- Makefile rules for common tasks see Makefiles for other apps in the app/ dir
- Every package should be covered by tests
- Dockerfile
- Graceful shutdown
- Helm template
- Application uses command line flags for configuration
<img alt="VM Alert" src="vmalert.png">

View File

@@ -0,0 +1,32 @@
package config
import "time"
// Annotations basic annotation for alert rule
type Annotations struct {
Summary string
Description string
}
// Alert basic alert entity rule
type Alert struct {
Name string
Expr string
For time.Duration
Labels map[string]string
Annotations Annotations
Start time.Time
End time.Time
}
// Group grouping array of alert
type Group struct {
Name string
Rules []Alert
}
// Parse parses config from given file
func Parse(filepath string) ([]Group, error) {
return []Group{}, nil
}

View File

@@ -0,0 +1,14 @@
package datasource
import "context"
// Metrics the data returns from storage
type Metrics struct{}
// VMStorage represents vmstorage entity with ability to read and write metrics
type VMStorage struct{}
//Query basic query to the datasource
func (s *VMStorage) Query(ctx context.Context, query string) ([]Metrics, error) {
return nil, nil
}

60
app/vmalert/main.go Normal file
View File

@@ -0,0 +1,60 @@
package main
import (
"flag"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
)
var (
configPath = flag.String("config", "config.yaml", "Path to alert configuration file")
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
)
func main() {
envflag.Parse()
buildinfo.Init()
logger.Init()
logger.Infof("reading alert rules configuration file from %s", *configPath)
alertGroups, err := config.Parse(*configPath)
if err != nil {
logger.Fatalf("Cannot parse configuration file %s", err)
}
w := &watchdog{storage: &datasource.VMStorage{}}
for id := range alertGroups {
go func(group config.Group) {
w.run(group)
}(alertGroups[id])
}
go func() {
httpserver.Serve(*httpListenAddr, func(w http.ResponseWriter, r *http.Request) bool {
panic("not implemented")
})
}()
sig := procutil.WaitForSigterm()
logger.Infof("service received signal %s", sig)
if err := httpserver.Stop(*httpListenAddr); err != nil {
logger.Fatalf("cannot stop the webservice: %s", err)
}
w.stop()
}
type watchdog struct {
storage *datasource.VMStorage
}
func (w *watchdog) run(a config.Group) {
}
func (w *watchdog) stop() {
panic("not implemented")
}

View File

@@ -0,0 +1,26 @@
{% import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
) %}
{% stripspace %}
{% func amRequest(alert *config.Alert, generatorURL string) %}
{
"startsAt":{%q= alert.Start.Format(time.RFC3339Nano) %},
"generatorURL": {%q= generatorURL %},
{% if !alert.End.IsZero() %}
"endsAt":{%q= alert.End.Format(time.RFC3339Nano) %},
{% endif %}
"labels": {
"alertname":{%q= alert.Name %}
{% for k,v := range alert.Labels %}
,{%q= k %}:{%q= v %}
{% endfor %}
},
"annotations": {
"summary": {%q= alert.Annotations.Summary %},
"description": {%q= alert.Annotations.Description %}
}
}
{% endfunc %}
{% endstripspace %}

View File

@@ -0,0 +1,101 @@
// Code generated by qtc from "alert_manager_request.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmalert/provider/alert_manager_request.qtpl:1
package provider
//line app/vmalert/provider/alert_manager_request.qtpl:1
import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"time"
)
//line app/vmalert/provider/alert_manager_request.qtpl:7
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmalert/provider/alert_manager_request.qtpl:7
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmalert/provider/alert_manager_request.qtpl:7
func streamamRequest(qw422016 *qt422016.Writer, alert *config.Alert, generatorURL string) {
//line app/vmalert/provider/alert_manager_request.qtpl:7
qw422016.N().S(`{"startsAt":`)
//line app/vmalert/provider/alert_manager_request.qtpl:9
qw422016.N().Q(alert.Start.Format(time.RFC3339Nano))
//line app/vmalert/provider/alert_manager_request.qtpl:9
qw422016.N().S(`,"generatorURL":`)
//line app/vmalert/provider/alert_manager_request.qtpl:10
qw422016.N().Q(generatorURL)
//line app/vmalert/provider/alert_manager_request.qtpl:10
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:11
if !alert.End.IsZero() {
//line app/vmalert/provider/alert_manager_request.qtpl:11
qw422016.N().S(`"endsAt":`)
//line app/vmalert/provider/alert_manager_request.qtpl:12
qw422016.N().Q(alert.End.Format(time.RFC3339Nano))
//line app/vmalert/provider/alert_manager_request.qtpl:12
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:13
}
//line app/vmalert/provider/alert_manager_request.qtpl:13
qw422016.N().S(`"labels": {"alertname":`)
//line app/vmalert/provider/alert_manager_request.qtpl:15
qw422016.N().Q(alert.Name)
//line app/vmalert/provider/alert_manager_request.qtpl:16
for k, v := range alert.Labels {
//line app/vmalert/provider/alert_manager_request.qtpl:16
qw422016.N().S(`,`)
//line app/vmalert/provider/alert_manager_request.qtpl:17
qw422016.N().Q(k)
//line app/vmalert/provider/alert_manager_request.qtpl:17
qw422016.N().S(`:`)
//line app/vmalert/provider/alert_manager_request.qtpl:17
qw422016.N().Q(v)
//line app/vmalert/provider/alert_manager_request.qtpl:18
}
//line app/vmalert/provider/alert_manager_request.qtpl:18
qw422016.N().S(`},"annotations": {"summary":`)
//line app/vmalert/provider/alert_manager_request.qtpl:21
qw422016.N().Q(alert.Annotations.Summary)
//line app/vmalert/provider/alert_manager_request.qtpl:21
qw422016.N().S(`,"description":`)
//line app/vmalert/provider/alert_manager_request.qtpl:22
qw422016.N().Q(alert.Annotations.Description)
//line app/vmalert/provider/alert_manager_request.qtpl:22
qw422016.N().S(`}}`)
//line app/vmalert/provider/alert_manager_request.qtpl:25
}
//line app/vmalert/provider/alert_manager_request.qtpl:25
func writeamRequest(qq422016 qtio422016.Writer, alert *config.Alert, generatorURL string) {
//line app/vmalert/provider/alert_manager_request.qtpl:25
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/provider/alert_manager_request.qtpl:25
streamamRequest(qw422016, alert, generatorURL)
//line app/vmalert/provider/alert_manager_request.qtpl:25
qt422016.ReleaseWriter(qw422016)
//line app/vmalert/provider/alert_manager_request.qtpl:25
}
//line app/vmalert/provider/alert_manager_request.qtpl:25
func amRequest(alert *config.Alert, generatorURL string) string {
//line app/vmalert/provider/alert_manager_request.qtpl:25
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/provider/alert_manager_request.qtpl:25
writeamRequest(qb422016, alert, generatorURL)
//line app/vmalert/provider/alert_manager_request.qtpl:25
qs422016 := string(qb422016.B)
//line app/vmalert/provider/alert_manager_request.qtpl:25
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmalert/provider/alert_manager_request.qtpl:25
return qs422016
//line app/vmalert/provider/alert_manager_request.qtpl:25
}

View File

@@ -0,0 +1,66 @@
package provider
import (
"bytes"
"fmt"
"io"
"net/http"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
const alertsPath = "/api/v2/alerts"
var pool = sync.Pool{New: func() interface{} {
return &bytes.Buffer{}
}}
// AlertManager represents integration provider with Prometheus alert manager
type AlertManager struct {
alertURL string
argFunc AlertURLGenerator
client *http.Client
}
// AlertURLGenerator returns URL to single alert by given name
type AlertURLGenerator func(name string) string
// NewAlertManager is a constructor for AlertManager
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, c *http.Client) *AlertManager {
return &AlertManager{
alertURL: strings.TrimSuffix(alertManagerURL, "/") + alertsPath,
argFunc: fn,
client: c,
}
}
const (
jsonArrayOpen byte = 91 // [
jsonArrayClose byte = 93 // ]
)
// Send an alert or resolve message
func (am *AlertManager) Send(alert *config.Alert) error {
b := pool.Get().(*bytes.Buffer)
b.Reset()
defer pool.Put(b)
b.WriteByte(jsonArrayOpen)
writeamRequest(b, alert, am.argFunc(alert.Name))
b.WriteByte(jsonArrayClose)
resp, err := am.client.Post(am.alertURL, "application/json", b)
if err != nil {
return err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
b.Reset()
if _, err := io.Copy(b, resp.Body); err != nil {
logger.Errorf("unable to copy error response body to buffer %s", err)
}
return fmt.Errorf("invalid response from alertmanager %s", b)
}
return nil
}

View File

@@ -0,0 +1,80 @@
package provider
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
)
func TestAlertManager_Send(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
t.Errorf("should not be called")
})
c := -1
mux.HandleFunc(alertsPath, func(w http.ResponseWriter, r *http.Request) {
c++
if r.Method != http.MethodPost {
t.Errorf("expected POST method got %s", r.Method)
}
switch c {
case 0:
conn, _, _ := w.(http.Hijacker).Hijack()
_ = conn.Close()
case 1:
w.WriteHeader(500)
case 2:
var a []struct {
Labels map[string]string `json:"labels"`
StartsAt time.Time `json:"startsAt"`
EndAt time.Time `json:"endsAt"`
Annotations map[string]string `json:"annotations"`
GeneratorURL string `json:"generatorURL"`
}
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
t.Errorf("can not unmarshal data into alert %s", err)
t.FailNow()
}
if len(a) != 1 {
t.Errorf("expected 1 alert in array got %d", len(a))
}
if a[0].GeneratorURL != "alert0" {
t.Errorf("exptected alert0 as generatorURL got %s", a[0].GeneratorURL)
}
if a[0].Labels["alertname"] != "alert0" {
t.Errorf("exptected alert0 as alert name got %s", a[0].Labels["alertname"])
}
if a[0].StartsAt.IsZero() {
t.Errorf("exptected non-zero start time")
}
if a[0].EndAt.IsZero() {
t.Errorf("exptected non-zero end time")
}
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
am := NewAlertManager(srv.URL, func(name string) string {
return name
}, srv.Client())
if err := am.Send(&config.Alert{}); err == nil {
t.Error("expected connection error got nil")
}
if err := am.Send(&config.Alert{}); err == nil {
t.Error("expected wrong http code error got nil")
}
if err := am.Send(&config.Alert{
Name: "alert0",
Start: time.Now().UTC(),
End: time.Now().UTC(),
}); err != nil {
t.Errorf("unexpected error %s", err)
}
if c != 2 {
t.Errorf("expected 2 calls(count from zero) to server got %d", c)
}
}

View File

@@ -0,0 +1,8 @@
package provider
import "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
// AlertProvider is common interface for alert manager provider
type AlertProvider interface {
Send(rule config.Alert) error
}

BIN
app/vmalert/vmalert.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -9,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -25,7 +26,7 @@ var (
func main() {
flag.Usage = usage
flag.Parse()
envflag.Parse()
buildinfo.Init()
srcFS, err := newSrcFS()

View File

@@ -0,0 +1,36 @@
package common
import (
"runtime"
"sync"
)
// GetInsertCtx returns InsertCtx from the pool.
//
// Call PutInsertCtx for returning it to the pool.
func GetInsertCtx() *InsertCtx {
select {
case ctx := <-insertCtxPoolCh:
return ctx
default:
if v := insertCtxPool.Get(); v != nil {
return v.(*InsertCtx)
}
return &InsertCtx{}
}
}
// PutInsertCtx returns ctx to the pool.
//
// ctx cannot be used after the call.
func PutInsertCtx(ctx *InsertCtx) {
ctx.Reset(0)
select {
case insertCtxPoolCh <- ctx:
default:
insertCtxPool.Put(ctx)
}
}
var insertCtxPool sync.Pool
var insertCtxPoolCh = make(chan *InsertCtx, runtime.GOMAXPROCS(-1))

View File

@@ -1,161 +1,44 @@
package graphite
import (
"fmt"
"io"
"net"
"runtime"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="graphite"}`)
rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="graphite"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="graphite"}`)
)
// insertHandler processes remote write for graphite plaintext protocol.
// InsertHandler processes remote write for graphite plaintext protocol.
//
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
func insertHandler(r io.Reader) error {
return concurrencylimiter.Do(func() error {
return insertHandlerInternal(r)
func InsertHandler(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
}
func insertHandlerInternal(r io.Reader) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
for ctx.Read(r) {
if err := ctx.InsertRows(); err != nil {
return err
}
}
return ctx.Error()
}
func insertRows(rows []parser.Row) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
func (ctx *pushCtx) InsertRows() error {
rows := ctx.Rows.Rows
ic := &ctx.Common
ic.Reset(len(rows))
ctx.Reset(len(rows))
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
ic.AddLabel("", r.Metric)
ctx.Labels = ctx.Labels[:0]
ctx.AddLabel("", r.Metric)
for j := range r.Tags {
tag := &r.Tags[j]
ic.AddLabel(tag.Key, tag.Value)
ctx.AddLabel(tag.Key, tag.Value)
}
ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value)
}
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return ic.FlushBufs()
return ctx.FlushBufs()
}
const flushTimeout = 3 * time.Second
func (ctx *pushCtx) Read(r io.Reader) bool {
readCalls.Inc()
if ctx.err != nil {
return false
}
if c, ok := r.(net.Conn); ok {
if err := c.SetReadDeadline(time.Now().Add(flushTimeout)); err != nil {
readErrors.Inc()
ctx.err = fmt.Errorf("cannot set read deadline: %s", err)
return false
}
}
ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
if ctx.err != nil {
if ne, ok := ctx.err.(net.Error); ok && ne.Timeout() {
// Flush the read data on timeout and try reading again.
ctx.err = nil
} else {
if ctx.err != io.EOF {
readErrors.Inc()
ctx.err = fmt.Errorf("cannot read graphite plaintext protocol data: %s", ctx.err)
}
return false
}
}
ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
// Fill missing timestamps with the current timestamp rounded to seconds.
currentTimestamp := time.Now().Unix()
rows := ctx.Rows.Rows
for i := range rows {
r := &rows[i]
if r.Timestamp == 0 {
r.Timestamp = currentTimestamp
}
}
// Convert timestamps from seconds to milliseconds.
for i := range rows {
rows[i].Timestamp *= 1e3
}
return true
}
type pushCtx struct {
Rows Rows
Common common.InsertCtx
reqBuf []byte
tailBuf []byte
err error
}
func (ctx *pushCtx) Error() error {
if ctx.err == io.EOF {
return nil
}
return ctx.err
}
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
ctx.Common.Reset(0)
ctx.reqBuf = ctx.reqBuf[:0]
ctx.tailBuf = ctx.tailBuf[:0]
ctx.err = nil
}
var (
readCalls = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
readErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
)
func getPushCtx() *pushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))

View File

@@ -2,84 +2,56 @@ package influx
import (
"flag"
"fmt"
"io"
"net/http"
"runtime"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for `{measurement}{separator}{field_name}` metric name when inserted via Influx line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses `{measurement}` instead of `{measurement}{separator}{field_name}` for metic name if Influx line contains only a single field")
measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for '{measurement}{separator}{field_name}' metric name when inserted via Influx line protocol")
skipSingleField = flag.Bool("influxSkipSingleField", false, "Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if Influx line contains only a single field")
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="influx"}`)
rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="influx"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="influx"}`)
)
// InsertHandler processes remote write for influx line protocol.
// InsertHandlerForReader processes remote write for influx line protocol.
//
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
func InsertHandler(req *http.Request) error {
return concurrencylimiter.Do(func() error {
return insertHandlerInternal(req)
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
func InsertHandlerForReader(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, false, "", "", insertRows)
})
}
func insertHandlerInternal(req *http.Request) error {
readCalls.Inc()
r := req.Body
if req.Header.Get("Content-Encoding") == "gzip" {
zr, err := common.GetGzipReader(r)
if err != nil {
return fmt.Errorf("cannot read gzipped influx line protocol data: %s", err)
}
defer common.PutGzipReader(zr)
r = zr
}
q := req.URL.Query()
tsMultiplier := int64(1e6)
switch q.Get("precision") {
case "ns":
tsMultiplier = 1e6
case "u":
tsMultiplier = 1e3
case "ms":
tsMultiplier = 1
case "s":
tsMultiplier = -1e3
case "m":
tsMultiplier = -1e3 * 60
case "h":
tsMultiplier = -1e3 * 3600
}
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
db := q.Get("db")
ctx := getPushCtx()
defer putPushCtx(ctx)
for ctx.Read(r, tsMultiplier) {
if err := ctx.InsertRows(db); err != nil {
return err
}
}
return ctx.Error()
// InsertHandlerForHTTP processes remote write for influx line protocol.
//
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
func InsertHandlerForHTTP(req *http.Request) error {
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
q := req.URL.Query()
precision := q.Get("precision")
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
db := q.Get("db")
return parser.ParseStream(req.Body, isGzipped, precision, db, insertRows)
})
}
func (ctx *pushCtx) InsertRows(db string) error {
rows := ctx.Rows.Rows
func insertRows(db string, rows []parser.Row) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
rowsLen := 0
for i := range rows {
rowsLen += len(rows[i].Fields)
@@ -125,80 +97,16 @@ func (ctx *pushCtx) InsertRows(db string) error {
return ic.FlushBufs()
}
func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
if ctx.err != nil {
return false
}
ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
if ctx.err != nil {
if ctx.err != io.EOF {
readErrors.Inc()
ctx.err = fmt.Errorf("cannot read influx line protocol data: %s", ctx.err)
}
return false
}
ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
// Adjust timestamps according to tsMultiplier
currentTs := time.Now().UnixNano() / 1e6
if tsMultiplier >= 1 {
for i := range ctx.Rows.Rows {
row := &ctx.Rows.Rows[i]
if row.Timestamp == 0 {
row.Timestamp = currentTs
} else {
row.Timestamp /= tsMultiplier
}
}
} else if tsMultiplier < 0 {
tsMultiplier = -tsMultiplier
currentTs -= currentTs % tsMultiplier
for i := range ctx.Rows.Rows {
row := &ctx.Rows.Rows[i]
if row.Timestamp == 0 {
row.Timestamp = currentTs
} else {
row.Timestamp *= tsMultiplier
}
}
}
return true
}
var (
readCalls = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
readErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
)
type pushCtx struct {
Rows Rows
Common common.InsertCtx
reqBuf []byte
tailBuf []byte
Common common.InsertCtx
metricNameBuf []byte
metricGroupBuf []byte
err error
}
func (ctx *pushCtx) Error() error {
if ctx.err == io.EOF {
return nil
}
return ctx.err
}
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
ctx.Common.Reset(0)
ctx.reqBuf = ctx.reqBuf[:0]
ctx.tailBuf = ctx.tailBuf[:0]
ctx.metricNameBuf = ctx.metricNameBuf[:0]
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
ctx.err = nil
}
func getPushCtx() *pushCtx {

View File

@@ -6,52 +6,67 @@ import (
"net/http"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prompush"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite"
influxserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/influx"
opentsdbserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdb"
opentsdbhttpserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for Influx line protocol data. Usually :8189 must be set. Doesn't work if empty")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
"Usually :4242 must be set. Doesn't work if empty")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
maxInsertRequestSize = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 30, "The maximum number of labels accepted per time series. Superflouos labels are dropped")
)
var (
graphiteServer *graphite.Server
opentsdbServer *opentsdb.Server
opentsdbhttpServer *opentsdbhttp.Server
influxServer *influxserver.Server
graphiteServer *graphiteserver.Server
opentsdbServer *opentsdbserver.Server
opentsdbhttpServer *opentsdbhttpserver.Server
)
// Init initializes vminsert.
func Init() {
storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)
concurrencylimiter.Init()
writeconcurrencylimiter.Init()
if len(*influxListenAddr) > 0 {
influxServer = influxserver.MustStart(*influxListenAddr, influx.InsertHandlerForReader)
}
if len(*graphiteListenAddr) > 0 {
graphiteServer = graphite.MustStart(*graphiteListenAddr)
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, graphite.InsertHandler)
}
if len(*opentsdbListenAddr) > 0 {
opentsdbServer = opentsdb.MustStart(*opentsdbListenAddr, int64(*maxInsertRequestSize))
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, opentsdb.InsertHandler, opentsdbhttp.InsertHandler)
}
if len(*opentsdbHTTPListenAddr) > 0 {
opentsdbhttpServer = opentsdbhttp.MustStart(*opentsdbHTTPListenAddr, int64(*maxInsertRequestSize))
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, opentsdbhttp.InsertHandler)
}
promscrape.Init(prompush.Push)
}
// Stop stops vminsert.
func Stop() {
promscrape.Stop()
if len(*influxListenAddr) > 0 {
influxServer.MustStop()
}
if len(*graphiteListenAddr) > 0 {
graphiteServer.MustStop()
}
@@ -69,7 +84,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
switch path {
case "/api/v1/write":
prometheusWriteRequests.Inc()
if err := prometheus.InsertHandler(r, int64(*maxInsertRequestSize)); err != nil {
if err := promremotewrite.InsertHandler(r); err != nil {
prometheusWriteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
@@ -87,7 +102,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
case "/write", "/api/v2/write":
influxWriteRequests.Inc()
if err := influx.InsertHandler(r); err != nil {
if err := influx.InsertHandlerForHTTP(r); err != nil {
influxWriteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
@@ -100,6 +115,11 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
influxQueryRequests.Inc()
fmt.Fprintf(w, `{"results":[{"series":[{"values":[]}]}]}`)
return true
case "/targets":
promscrapeTargetsRequests.Inc()
w.Header().Set("Content-Type", "text/plain")
promscrape.WriteHumanReadableTargetsStatus(w)
return true
default:
// This is not our link
return false
@@ -117,4 +137,6 @@ var (
influxWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/write", protocol="influx"}`)
influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/query", protocol="influx"}`)
promscrapeTargetsRequests = metrics.NewCounter(`vm_http_requests_total{path="/targets"}`)
)

View File

@@ -1,160 +1,44 @@
package opentsdb
import (
"fmt"
"io"
"net"
"runtime"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb"}`)
rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="opentsdb"}`)
)
// insertHandler processes remote write for OpenTSDB put protocol.
// InsertHandler processes remote write for OpenTSDB put protocol.
//
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
func insertHandler(r io.Reader) error {
return concurrencylimiter.Do(func() error {
return insertHandlerInternal(r)
func InsertHandler(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
}
func insertHandlerInternal(r io.Reader) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
for ctx.Read(r) {
if err := ctx.InsertRows(); err != nil {
return err
}
}
return ctx.Error()
}
func insertRows(rows []parser.Row) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
func (ctx *pushCtx) InsertRows() error {
rows := ctx.Rows.Rows
ic := &ctx.Common
ic.Reset(len(rows))
ctx.Reset(len(rows))
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
ic.AddLabel("", r.Metric)
ctx.Labels = ctx.Labels[:0]
ctx.AddLabel("", r.Metric)
for j := range r.Tags {
tag := &r.Tags[j]
ic.AddLabel(tag.Key, tag.Value)
ctx.AddLabel(tag.Key, tag.Value)
}
ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value)
}
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return ic.FlushBufs()
return ctx.FlushBufs()
}
const flushTimeout = 3 * time.Second
func (ctx *pushCtx) Read(r io.Reader) bool {
readCalls.Inc()
if ctx.err != nil {
return false
}
if c, ok := r.(net.Conn); ok {
if err := c.SetReadDeadline(time.Now().Add(flushTimeout)); err != nil {
readErrors.Inc()
ctx.err = fmt.Errorf("cannot set read deadline: %s", err)
return false
}
}
ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
if ctx.err != nil {
if ne, ok := ctx.err.(net.Error); ok && ne.Timeout() {
// Flush the read data on timeout and try reading again.
ctx.err = nil
} else {
if ctx.err != io.EOF {
readErrors.Inc()
ctx.err = fmt.Errorf("cannot read OpenTSDB put protocol data: %s", ctx.err)
}
return false
}
}
ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
// Fill in missing timestamps
currentTimestamp := time.Now().Unix()
rows := ctx.Rows.Rows
for i := range rows {
r := &rows[i]
if r.Timestamp == 0 {
r.Timestamp = currentTimestamp
}
}
// Convert timestamps from seconds to milliseconds
for i := range rows {
rows[i].Timestamp *= 1e3
}
return true
}
type pushCtx struct {
Rows Rows
Common common.InsertCtx
reqBuf []byte
tailBuf []byte
err error
}
func (ctx *pushCtx) Error() error {
if ctx.err == io.EOF {
return nil
}
return ctx.err
}
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
ctx.Common.Reset(0)
ctx.reqBuf = ctx.reqBuf[:0]
ctx.tailBuf = ctx.tailBuf[:0]
ctx.err = nil
}
var (
readCalls = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
readErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
)
func getPushCtx() *pushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))

View File

@@ -2,149 +2,49 @@ package opentsdbhttp
import (
"fmt"
"io"
"net/http"
"runtime"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb-http"}`)
rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb-http"}`)
readCalls = metrics.NewCounter(`vm_read_calls_total{name="opentsdb-http"}`)
readErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb-http"}`)
unmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb-http"}`)
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdbhttp"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="opentsdbhttp"}`)
)
// insertHandler processes HTTP OpenTSDB put requests.
// InsertHandler processes HTTP OpenTSDB put requests.
// See http://opentsdb.net/docs/build/html/api_http/put.html
func insertHandler(req *http.Request, maxSize int64) error {
return concurrencylimiter.Do(func() error {
return insertHandlerInternal(req, maxSize)
})
func InsertHandler(req *http.Request) error {
path := req.URL.Path
switch path {
case "/api/put":
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, insertRows)
})
default:
return fmt.Errorf("unexpected path requested on HTTP OpenTSDB server: %q", path)
}
}
func insertHandlerInternal(req *http.Request, maxSize int64) error {
readCalls.Inc()
func insertRows(rows []parser.Row) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
r := req.Body
if req.Header.Get("Content-Encoding") == "gzip" {
zr, err := common.GetGzipReader(r)
if err != nil {
readErrors.Inc()
return fmt.Errorf("cannot read gzipped http protocol data: %s", err)
}
defer common.PutGzipReader(zr)
r = zr
}
ctx := getPushCtx()
defer putPushCtx(ctx)
// Read the request in ctx.reqBuf
lr := io.LimitReader(r, maxSize+1)
reqLen, err := ctx.reqBuf.ReadFrom(lr)
if err != nil {
readErrors.Inc()
return fmt.Errorf("cannot read HTTP OpenTSDB request: %s", err)
}
if reqLen > maxSize {
readErrors.Inc()
return fmt.Errorf("too big HTTP OpenTSDB request; mustn't exceed %d bytes", maxSize)
}
// Unmarshal the request to ctx.Rows
p := parserPool.Get()
defer parserPool.Put(p)
v, err := p.ParseBytes(ctx.reqBuf.B)
if err != nil {
unmarshalErrors.Inc()
return fmt.Errorf("cannot parse HTTP OpenTSDB json: %s", err)
}
ctx.Rows.Unmarshal(v)
// Fill in missing timestamps
currentTimestamp := time.Now().Unix()
rows := ctx.Rows.Rows
ctx.Reset(len(rows))
for i := range rows {
r := &rows[i]
if r.Timestamp == 0 {
r.Timestamp = currentTimestamp
}
}
// Convert timestamps in seconds to milliseconds if needed.
// See http://opentsdb.net/docs/javadoc/net/opentsdb/core/Const.html#SECOND_MASK
for i := range rows {
r := &rows[i]
if r.Timestamp&secondMask == 0 {
r.Timestamp *= 1e3
}
}
// Insert ctx.Rows to db.
ic := &ctx.Common
ic.Reset(len(rows))
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
ic.AddLabel("", r.Metric)
ctx.Labels = ctx.Labels[:0]
ctx.AddLabel("", r.Metric)
for j := range r.Tags {
tag := &r.Tags[j]
ic.AddLabel(tag.Key, tag.Value)
ctx.AddLabel(tag.Key, tag.Value)
}
ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value)
}
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return ic.FlushBufs()
return ctx.FlushBufs()
}
const secondMask int64 = 0x7FFFFFFF00000000
var parserPool fastjson.ParserPool
type pushCtx struct {
Rows Rows
Common common.InsertCtx
reqBuf bytesutil.ByteBuffer
}
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
ctx.Common.Reset(0)
ctx.reqBuf.Reset()
}
func getPushCtx() *pushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))

View File

@@ -1,112 +0,0 @@
package prometheus
import (
"fmt"
"net/http"
"runtime"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="prometheus"}`)
rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="prometheus"}`)
)
// InsertHandler processes remote write for prometheus.
func InsertHandler(r *http.Request, maxSize int64) error {
return concurrencylimiter.Do(func() error {
return insertHandlerInternal(r, maxSize)
})
}
func insertHandlerInternal(r *http.Request, maxSize int64) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
if err := ctx.Read(r, maxSize); err != nil {
return err
}
timeseries := ctx.req.Timeseries
rowsLen := 0
for i := range timeseries {
rowsLen += len(timeseries[i].Samples)
}
ic := &ctx.Common
ic.Reset(rowsLen)
rowsTotal := 0
for i := range timeseries {
ts := &timeseries[i]
var metricNameRaw []byte
for i := range ts.Samples {
r := &ts.Samples[i]
metricNameRaw = ic.WriteDataPointExt(metricNameRaw, ts.Labels, r.Timestamp, r.Value)
}
rowsTotal += len(ts.Samples)
}
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
return ic.FlushBufs()
}
type pushCtx struct {
Common common.InsertCtx
req prompb.WriteRequest
reqBuf []byte
}
func (ctx *pushCtx) reset() {
ctx.Common.Reset(0)
ctx.req.Reset()
ctx.reqBuf = ctx.reqBuf[:0]
}
func (ctx *pushCtx) Read(r *http.Request, maxSize int64) error {
prometheusReadCalls.Inc()
var err error
ctx.reqBuf, err = prompb.ReadSnappy(ctx.reqBuf[:0], r.Body, maxSize)
if err != nil {
prometheusReadErrors.Inc()
return fmt.Errorf("cannot read prompb.WriteRequest: %s", err)
}
if err = ctx.req.Unmarshal(ctx.reqBuf); err != nil {
prometheusUnmarshalErrors.Inc()
return fmt.Errorf("cannot unmarshal prompb.WriteRequest with size %d bytes: %s", len(ctx.reqBuf), err)
}
return nil
}
var (
prometheusReadCalls = metrics.NewCounter(`vm_read_calls_total{name="prometheus"}`)
prometheusReadErrors = metrics.NewCounter(`vm_read_errors_total{name="prometheus"}`)
prometheusUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="prometheus"}`)
)
func getPushCtx() *pushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))

View File

@@ -0,0 +1,113 @@
package prompush
import (
"runtime"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promscrape"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promscrape"}`)
)
const maxRowsPerBlock = 10000
// Push pushes wr to storage.
func Push(wr *prompbmarshal.WriteRequest) {
ctx := getPushCtx()
defer putPushCtx(ctx)
tss := wr.Timeseries
for len(tss) > 0 {
// Process big tss in smaller blocks in order to reduce maxmimum memory usage
tssBlock := tss
if len(tssBlock) > maxRowsPerBlock {
tssBlock = tss[:maxRowsPerBlock]
tss = tss[maxRowsPerBlock:]
} else {
tss = nil
}
ctx.push(tssBlock)
}
}
func (ctx *pushCtx) push(tss []prompbmarshal.TimeSeries) {
rowsLen := 0
for i := range tss {
rowsLen += len(tss[i].Samples)
}
ic := &ctx.Common
ic.Reset(rowsLen)
rowsTotal := 0
labels := ctx.labels[:0]
for i := range tss {
ts := &tss[i]
labels = labels[:0]
for j := range ts.Labels {
label := &ts.Labels[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeBytes(label.Name),
Value: bytesutil.ToUnsafeBytes(label.Value),
})
}
var metricNameRaw []byte
for i := range ts.Samples {
r := &ts.Samples[i]
metricNameRaw = ic.WriteDataPointExt(metricNameRaw, labels, r.Timestamp, r.Value)
}
rowsTotal += len(ts.Samples)
}
ctx.labels = labels
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
if err := ic.FlushBufs(); err != nil {
logger.Errorf("cannot flush promscrape data to storage: %s", err)
}
}
type pushCtx struct {
Common common.InsertCtx
labels []prompb.Label
}
func (ctx *pushCtx) reset() {
ctx.Common.Reset(0)
for i := range ctx.labels {
label := &ctx.labels[i]
label.Name = nil
label.Value = nil
}
ctx.labels = ctx.labels[:0]
}
func getPushCtx() *pushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))

View File

@@ -0,0 +1,47 @@
package promremotewrite
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promremotewrite"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promremotewrite"}`)
)
// InsertHandler processes remote write for prometheus.
func InsertHandler(req *http.Request) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, insertRows)
})
}
func insertRows(timeseries []prompb.TimeSeries) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
rowsLen := 0
for i := range timeseries {
rowsLen += len(timeseries[i].Samples)
}
ctx.Reset(rowsLen)
rowsTotal := 0
for i := range timeseries {
ts := &timeseries[i]
var metricNameRaw []byte
for i := range ts.Samples {
r := &ts.Samples[i]
metricNameRaw = ctx.WriteDataPointExt(metricNameRaw, ts.Labels, r.Timestamp, r.Value)
}
rowsTotal += len(ts.Samples)
}
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
return ctx.FlushBufs()
}

View File

@@ -1,61 +1,35 @@
package vmimport
import (
"flag"
"fmt"
"io"
"net/http"
"runtime"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var maxLineLen = flag.Int("import.maxLineLen", 100*1024*1024, "The maximum length in bytes of a single line accepted by `/api/v1/import`")
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="vmimport"}`)
rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="vmimport"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="vmimport"}`)
)
// InsertHandler processes `/api/v1/import` request.
//
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
func InsertHandler(req *http.Request) error {
return concurrencylimiter.Do(func() error {
return insertHandlerInternal(req)
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, insertRows)
})
}
func insertHandlerInternal(req *http.Request) error {
readCalls.Inc()
r := req.Body
if req.Header.Get("Content-Encoding") == "gzip" {
zr, err := common.GetGzipReader(r)
if err != nil {
return fmt.Errorf("cannot read gzipped vmimport data: %s", err)
}
defer common.PutGzipReader(zr)
r = zr
}
func insertRows(rows []parser.Row) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
for ctx.Read(r) {
if err := ctx.InsertRows(); err != nil {
return err
}
}
return ctx.Error()
}
func (ctx *pushCtx) InsertRows() error {
rows := ctx.Rows.Rows
rowsLen := 0
for i := range rows {
rowsLen += len(rows[i].Values)
@@ -85,54 +59,14 @@ func (ctx *pushCtx) InsertRows() error {
return ic.FlushBufs()
}
func (ctx *pushCtx) Read(r io.Reader) bool {
if ctx.err != nil {
return false
}
ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlockExt(r, ctx.reqBuf, ctx.tailBuf, *maxLineLen)
if ctx.err != nil {
if ctx.err != io.EOF {
readErrors.Inc()
ctx.err = fmt.Errorf("cannot read vmimport data: %s", ctx.err)
}
return false
}
ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
return true
}
var (
readCalls = metrics.NewCounter(`vm_read_calls_total{name="vmimport"}`)
readErrors = metrics.NewCounter(`vm_read_errors_total{name="vmimport"}`)
)
type pushCtx struct {
Rows Rows
Common common.InsertCtx
reqBuf []byte
tailBuf []byte
Common common.InsertCtx
metricNameBuf []byte
err error
}
func (ctx *pushCtx) Error() error {
if ctx.err == io.EOF {
return nil
}
return ctx.err
}
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
ctx.Common.Reset(0)
ctx.reqBuf = ctx.reqBuf[:0]
ctx.tailBuf = ctx.tailBuf[:0]
ctx.metricNameBuf = ctx.metricNameBuf[:0]
ctx.err = nil
}
func getPushCtx() *pushCtx {

View File

@@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -16,13 +17,14 @@ var (
"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir")
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Destination path where backup must be restored. "+
"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup")
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
maxBytesPerSecond = flag.Int("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
maxBytesPerSecond = flag.Int("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
skipBackupCompleteCheck = flag.Bool("skipBackupCompleteCheck", false, "Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file")
)
func main() {
flag.Usage = usage
flag.Parse()
envflag.Parse()
buildinfo.Init()
srcFS, err := newSrcFS()
@@ -34,9 +36,10 @@ func main() {
logger.Fatalf("%s", err)
}
a := &actions.Restore{
Concurrency: *concurrency,
Src: srcFS,
Dst: dstFS,
Concurrency: *concurrency,
Src: srcFS,
Dst: dstFS,
SkipBackupCompleteCheck: *skipBackupCompleteCheck,
}
if err := a.Run(); err != nil {
logger.Fatalf("cannot restore from backup: %s", err)

View File

@@ -21,10 +21,26 @@ import (
var (
deleteAuthKey = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", runtime.GOMAXPROCS(-1)*2, "The maximum number of concurrent search requests. It shouldn't exceed 2*vCPUs for better performance. See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
"It shouldn't be high, since a single request can saturate all the CPU cores. See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
resetCacheAuthKey = flag.String("search.resetCacheAuthKey", "", "Optional authKey for resetting rollup cache via /internal/resetCache call")
)
func getDefaultMaxConcurrentRequests() int {
n := runtime.GOMAXPROCS(-1)
if n <= 4 {
n *= 2
}
if n > 16 {
// A single request can saturate all the CPU cores, so there is no sense
// in allowing higher number of concurrent requests - they will just contend
// for unavailable CPU time.
n = 16
}
return n
}
// Init initializes vmselect
func Init() {
tmpDirPath := *vmstorage.DataPath + "/tmp"
@@ -56,6 +72,7 @@ var (
// RequestHandler handles remote read API requests for Prometheus
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
startTime := time.Now()
// Limit the number of concurrent queries.
select {
case concurrencyCh <- struct{}{}:
@@ -72,7 +89,9 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
timerpool.Put(t)
concurrencyLimitTimeout.Inc()
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot handle more than %d concurrent requests", cap(concurrencyCh)),
Err: fmt.Errorf("cannot handle more than %d concurrent search requests during %s; possible solutions: "+
"increase `-search.maxQueueDuration`, increase `-search.maxConcurrentRequests`, increase server capacity",
*maxConcurrentRequests, *maxQueueDuration),
StatusCode: http.StatusServiceUnavailable,
}
httpserver.Errorf(w, "%s", err)
@@ -81,13 +100,22 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
path := strings.Replace(r.URL.Path, "//", "/", -1)
if path == "/internal/resetRollupResultCache" {
if len(*resetCacheAuthKey) > 0 && r.FormValue("authKey") != *resetCacheAuthKey {
sendPrometheusError(w, r, fmt.Errorf("invalid authKey=%q for %q", r.FormValue("authKey"), path))
return true
}
promql.ResetRollupResultCache()
return true
}
if strings.HasPrefix(path, "/api/v1/label/") {
s := r.URL.Path[len("/api/v1/label/"):]
if strings.HasSuffix(s, "/values") {
labelValuesRequests.Inc()
labelName := s[:len(s)-len("/values")]
httpserver.EnableCORS(w, r)
if err := prometheus.LabelValuesHandler(labelName, w, r); err != nil {
if err := prometheus.LabelValuesHandler(startTime, labelName, w, r); err != nil {
labelValuesErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -100,7 +128,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/api/v1/query":
queryRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.QueryHandler(w, r); err != nil {
if err := prometheus.QueryHandler(startTime, w, r); err != nil {
queryErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -109,7 +137,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/api/v1/query_range":
queryRangeRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.QueryRangeHandler(w, r); err != nil {
if err := prometheus.QueryRangeHandler(startTime, w, r); err != nil {
queryRangeErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -118,7 +146,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/api/v1/series":
seriesRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.SeriesHandler(w, r); err != nil {
if err := prometheus.SeriesHandler(startTime, w, r); err != nil {
seriesErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -127,7 +155,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/api/v1/series/count":
seriesCountRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.SeriesCountHandler(w, r); err != nil {
if err := prometheus.SeriesCountHandler(startTime, w, r); err != nil {
seriesCountErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -136,7 +164,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/api/v1/labels":
labelsRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.LabelsHandler(w, r); err != nil {
if err := prometheus.LabelsHandler(startTime, w, r); err != nil {
labelsErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -145,7 +173,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/api/v1/labels/count":
labelsCountRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.LabelsCountHandler(w, r); err != nil {
if err := prometheus.LabelsCountHandler(startTime, w, r); err != nil {
labelsCountErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -153,7 +181,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
case "/api/v1/export":
exportRequests.Inc()
if err := prometheus.ExportHandler(w, r); err != nil {
if err := prometheus.ExportHandler(startTime, w, r); err != nil {
exportErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
@@ -161,7 +189,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
case "/federate":
federateRequests.Inc()
if err := prometheus.FederateHandler(w, r); err != nil {
if err := prometheus.FederateHandler(startTime, w, r); err != nil {
federateErrors.Inc()
httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
return true
@@ -179,6 +207,12 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, "%s", `{"status":"success","data":{"alerts":[]}}`)
return true
case "/api/v1/metadata":
// Return dumb placeholder
metadataRequests.Inc()
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, "%s", `{"status":"success","data":{}}`)
return true
case "/api/v1/admin/tsdb/delete_series":
deleteRequests.Inc()
authKey := r.FormValue("authKey")
@@ -186,7 +220,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
return true
}
if err := prometheus.DeleteHandler(r); err != nil {
if err := prometheus.DeleteHandler(startTime, r); err != nil {
deleteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
@@ -199,7 +233,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
logger.Errorf("error in %q: %s", r.URL.Path, err)
logger.Errorf("error in %q: %s", r.RequestURI, err)
w.Header().Set("Content-Type", "application/json")
statusCode := http.StatusUnprocessableEntity
@@ -241,6 +275,7 @@ var (
federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
federateErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
metadataRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/metadata"}`)
)

View File

@@ -103,7 +103,7 @@ func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
rowsProcessed := 0
for pts := range workCh {
if time.Until(rss.deadline.Deadline) < 0 {
err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.Timeout)
err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
break
}
if err = pts.Unpack(rss.tbf, rs, rss.tr, rss.fetchData, maxWorkersCount); err != nil {
@@ -266,7 +266,7 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap) {
dst.Timestamps = append(dst.Timestamps, top.Timestamps[top.NextIdx:]...)
dst.Values = append(dst.Values, top.Values[top.NextIdx:]...)
putSortBlock(top)
return
break
}
sbNext := sbh[0]
tsNext := sbNext.Timestamps[sbNext.NextIdx]
@@ -287,8 +287,16 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap) {
putSortBlock(top)
}
}
timestamps, values := storage.DeduplicateSamples(dst.Timestamps, dst.Values)
dedups := len(dst.Timestamps) - len(timestamps)
dedupsDuringSelect.Add(dedups)
dst.Timestamps = timestamps
dst.Values = values
}
var dedupsDuringSelect = metrics.NewCounter(`vm_deduplicated_samples_total{type="select"}`)
type sortBlock struct {
// b is used as a temporary storage for unpacked rows before they
// go to Timestamps and Values.
@@ -499,7 +507,7 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadli
}
if time.Until(deadline.Deadline) < 0 {
putTmpBlocksFile(tbf)
return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.Timeout)
return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.String())
}
metricName := sr.MetricBlock.MetricName
m[string(metricName)] = append(m[string(metricName)], addr)
@@ -575,13 +583,24 @@ func setupTfss(tagFilterss [][]storage.TagFilter) ([]*storage.TagFilters, error)
// Deadline contains deadline with the corresponding timeout for pretty error messages.
type Deadline struct {
Deadline time.Time
Timeout time.Duration
timeout time.Duration
flagHint string
}
// NewDeadline returns deadline for the given timeout.
func NewDeadline(timeout time.Duration) Deadline {
//
// flagHint must contain a hit for command-line flag, which could be used
// in order to increase timeout.
func NewDeadline(timeout time.Duration, flagHint string) Deadline {
return Deadline{
Deadline: time.Now().Add(timeout),
Timeout: timeout,
timeout: timeout,
flagHint: flagHint,
}
}
// String returns human-readable string representation for d.
func (d *Deadline) String() string {
return fmt.Sprintf("%.3f seconds; the timeout can be adjusted with `%s` command-line flag", d.timeout.Seconds(), d.flagHint)
}

View File

@@ -36,6 +36,9 @@ func maxInmemoryTmpBlocksFile() int {
if maxLen < 64*1024 {
return 64 * 1024
}
if maxLen > 4*1024*1024 {
return 4 * 1024 * 1024
}
return maxLen
}
@@ -47,6 +50,7 @@ type tmpBlocksFile struct {
buf []byte
f *os.File
r *fs.ReaderAt
offset uint64
}
@@ -65,6 +69,7 @@ func putTmpBlocksFile(tbf *tmpBlocksFile) {
tbf.MustClose()
tbf.buf = tbf.buf[:0]
tbf.f = nil
tbf.r = nil
tbf.offset = 0
tmpBlocksFilePool.Put(tbf)
}
@@ -118,17 +123,20 @@ func (tbf *tmpBlocksFile) Finalize() error {
if tbf.f == nil {
return nil
}
fname := tbf.f.Name()
if _, err := tbf.f.Write(tbf.buf); err != nil {
return fmt.Errorf("cannot flush the remaining %d bytes to tmpBlocksFile: %s", len(tbf.buf), err)
return fmt.Errorf("cannot write the remaining %d bytes to %q: %s", len(tbf.buf), fname, err)
}
tbf.buf = tbf.buf[:0]
if _, err := tbf.f.Seek(0, 0); err != nil {
logger.Panicf("FATAL: cannot seek to the start of file: %s", err)
r, err := fs.OpenReaderAt(fname)
if err != nil {
logger.Panicf("FATAL: cannot open %q: %s", fname, err)
}
// Hint the OS that the file is read almost sequentiallly.
// This should reduce the number of disk seeks, which is important
// for HDDs.
fs.MustFadviseSequentialRead(tbf.f, true)
r.MustFadviseSequentialRead(true)
tbf.r = r
return nil
}
@@ -140,13 +148,7 @@ func (tbf *tmpBlocksFile) MustReadBlockAt(dst *storage.Block, addr tmpBlockAddr)
bb := tmpBufPool.Get()
defer tmpBufPool.Put(bb)
bb.B = bytesutil.Resize(bb.B, addr.size)
n, err := tbf.f.ReadAt(bb.B, int64(addr.offset))
if err != nil {
logger.Panicf("FATAL: cannot read from %q at %s: %s", tbf.f.Name(), addr, err)
}
if n != len(bb.B) {
logger.Panicf("FATAL: too short number of bytes read at %s; got %d; want %d", addr, n, len(bb.B))
}
tbf.r.MustReadAt(bb.B, int64(addr.offset))
buf = bb.B
}
tail, err := storage.UnmarshalBlock(dst, buf)
@@ -164,6 +166,10 @@ func (tbf *tmpBlocksFile) MustClose() {
if tbf.f == nil {
return
}
if tbf.r != nil {
// tbf.r could be nil if Finalize wasn't called.
tbf.r.MustClose()
}
fname := tbf.f.Name()
// Remove the file at first, then close it.

View File

@@ -24,19 +24,18 @@ import (
var (
latencyOffset = flag.Duration("search.latencyOffset", time.Second*30, "The time when data points become visible in query results after the colection. "+
"Too small value can result in incomplete last points for query results")
maxExportDuration = flag.Duration("search.maxExportDuration", 10*time.Minute, "The maximum duration for `/api/v1/export` call")
maxExportDuration = flag.Duration("search.maxExportDuration", time.Hour*24*30, "The maximum duration for /api/v1/export call")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for search query execution")
maxQueryLen = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
maxLookback = flag.Duration("search.maxLookback", 0, "Synonim to `-search.lookback-delta` from Prometheus. "+
"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via `max_lookback` arg")
maxLookback = flag.Duration("search.maxLookback", 0, "Synonim to -search.lookback-delta from Prometheus. "+
"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg")
)
// Default step used if not set.
const defaultStep = 5 * 60 * 1000
// FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
func FederateHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
ct := currentTime()
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %s", err)
@@ -107,8 +106,7 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/federate"}`)
// ExportHandler exports data in raw format from /api/v1/export.
func ExportHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func ExportHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
ct := currentTime()
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %s", err)
@@ -136,7 +134,7 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {
end = start + defaultStep
}
if err := exportHandler(w, matches, start, end, format, deadline); err != nil {
return err
return fmt.Errorf("error when exporting data for queries=%q on the time range (start=%d, end=%d): %s", matches, start, end, err)
}
exportDuration.UpdateDuration(startTime)
return nil
@@ -200,8 +198,7 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
// DeleteHandler processes /api/v1/admin/tsdb/delete_series prometheus API request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
func DeleteHandler(r *http.Request) error {
startTime := time.Now()
func DeleteHandler(startTime time.Time, r *http.Request) error {
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %s", err)
}
@@ -235,8 +232,7 @@ var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
// LabelValuesHandler processes /api/v1/label/<labelName>/values request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
func LabelValuesHandler(labelName string, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func LabelValuesHandler(startTime time.Time, labelName string, w http.ResponseWriter, r *http.Request) error {
deadline := getDeadlineForQuery(r)
if err := r.ParseForm(); err != nil {
@@ -287,12 +283,18 @@ func labelValuesWithMatches(labelName string, matches []string, start, end int64
if err != nil {
return nil, err
}
for i, tfs := range tagFilterss {
// Add `labelName!=''` tag filter in order to filter out series without the labelName.
tagFilterss[i] = append(tfs, storage.TagFilter{
Key: []byte(labelName),
IsNegative: true,
})
// Add `labelName!=''` tag filter in order to filter out series without the labelName.
// There is no need in adding `__name__!=''` filter, since all the time series should
// already have non-empty name.
if labelName != "__name__" {
key := []byte(labelName)
for i, tfs := range tagFilterss {
tagFilterss[i] = append(tfs, storage.TagFilter{
Key: key,
IsNegative: true,
})
}
}
if start >= end {
end = start + defaultStep
@@ -333,8 +335,7 @@ func labelValuesWithMatches(labelName string, matches []string, start, end int64
var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/label/{}/values"}`)
// LabelsCountHandler processes /api/v1/labels/count request.
func LabelsCountHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func LabelsCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := getDeadlineForQuery(r)
labelEntries, err := netstorage.GetLabelEntries(deadline)
if err != nil {
@@ -352,8 +353,7 @@ var labelsCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
// LabelsHandler processes /api/v1/labels request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func LabelsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := getDeadlineForQuery(r)
if err := r.ParseForm(); err != nil {
@@ -442,8 +442,7 @@ func labelsWithMatches(matches []string, start, end int64, deadline netstorage.D
var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels"}`)
// SeriesCountHandler processes /api/v1/series/count request.
func SeriesCountHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func SeriesCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := getDeadlineForQuery(r)
n, err := netstorage.GetSeriesCount(deadline)
if err != nil {
@@ -460,8 +459,7 @@ var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
// SeriesHandler processes /api/v1/series request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers
func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func SeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
ct := currentTime()
if err := r.ParseForm(); err != nil {
@@ -536,8 +534,7 @@ var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
// QueryHandler processes /api/v1/query request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
func QueryHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func QueryHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
ct := currentTime()
query := r.FormValue("query")
@@ -560,7 +557,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
}
if len(query) > *maxQueryLen {
return fmt.Errorf(`too long query; got %d bytes; mustn't exceed %d bytes`, len(query), *maxQueryLen)
return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), *maxQueryLen)
}
if !getBool(r, "nocache") && ct-start < queryOffset {
// Adjust start time only if `nocache` arg isn't set.
@@ -580,7 +577,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
end := start
start = end - window
if err := exportHandler(w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
return err
return fmt.Errorf("error when exporting data for query=%q on the time range (start=%d, end=%d): %s", childQuery, start, end, err)
}
queryDuration.UpdateDuration(startTime)
return nil
@@ -605,7 +602,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
end := start
start = end - window
if err := queryRangeHandler(w, childQuery, start, end, step, r, ct); err != nil {
return err
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %s", childQuery, start, end, step, err)
}
queryDuration.UpdateDuration(startTime)
return nil
@@ -620,7 +617,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
}
result, err := promql.Exec(&ec, query, true)
if err != nil {
return fmt.Errorf("cannot execute %q: %s", query, err)
return fmt.Errorf("error when executing query=%q for (time=%d, step=%d): %s", query, start, step, err)
}
w.Header().Set("Content-Type", "application/json")
@@ -648,8 +645,7 @@ func parsePositiveDuration(s string, step int64) (int64, error) {
// QueryRangeHandler processes /api/v1/query_range request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
func QueryRangeHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
ct := currentTime()
query := r.FormValue("query")
@@ -669,7 +665,7 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
return err
}
if err := queryRangeHandler(w, query, start, end, step, r, ct); err != nil {
return err
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %s", query, start, end, step, err)
}
queryRangeDuration.UpdateDuration(startTime)
return nil
@@ -685,7 +681,7 @@ func queryRangeHandler(w http.ResponseWriter, query string, start, end, step int
// Validate input args.
if len(query) > *maxQueryLen {
return fmt.Errorf(`too long query; got %d bytes; mustn't exceed %d bytes`, len(query), *maxQueryLen)
return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), *maxQueryLen)
}
if start > end {
end = start + defaultStep
@@ -707,7 +703,7 @@ func queryRangeHandler(w http.ResponseWriter, query string, start, end, step int
}
result, err := promql.Exec(&ec, query, false)
if err != nil {
return fmt.Errorf("cannot execute %q: %s", query, err)
return fmt.Errorf("cannot execute query: %s", err)
}
queryOffset := getLatencyOffsetMilliseconds()
if ct-end < queryOffset {
@@ -870,15 +866,15 @@ func getMaxLookback(r *http.Request) (int64, error) {
func getDeadlineForQuery(r *http.Request) netstorage.Deadline {
dMax := int64(maxQueryDuration.Seconds() * 1e3)
return getDeadlineWithMaxDuration(r, dMax)
return getDeadlineWithMaxDuration(r, dMax, "-search.maxQueryDuration")
}
func getDeadlineForExport(r *http.Request) netstorage.Deadline {
dMax := int64(maxExportDuration.Seconds() * 1e3)
return getDeadlineWithMaxDuration(r, dMax)
return getDeadlineWithMaxDuration(r, dMax, "-search.maxExportDuration")
}
func getDeadlineWithMaxDuration(r *http.Request, dMax int64) netstorage.Deadline {
func getDeadlineWithMaxDuration(r *http.Request, dMax int64, flagHint string) netstorage.Deadline {
d, err := getDuration(r, "timeout", 0)
if err != nil {
d = 0
@@ -887,7 +883,7 @@ func getDeadlineWithMaxDuration(r *http.Request, dMax int64) netstorage.Deadline
d = dMax
}
timeout := time.Duration(d) * time.Millisecond
return netstorage.NewDeadline(timeout)
return netstorage.NewDeadline(timeout, flagHint)
}
func getBool(r *http.Request, argKey string) bool {

View File

@@ -32,7 +32,7 @@ var binaryOpFuncs = map[string]binaryOpFunc{
"or": binaryOpOr,
"unless": binaryOpUnless,
// New op
// New ops
"if": newBinaryOpArithFunc(binaryop.If),
"ifnot": newBinaryOpArithFunc(binaryop.Ifnot),
"default": newBinaryOpArithFunc(binaryop.Default),
@@ -285,10 +285,21 @@ func resetMetricGroupIfRequired(be *metricsql.BinaryOpExpr, ts *timeseries) {
func binaryOpAnd(bfa *binaryOpFuncArg) ([]*timeseries, error) {
mLeft, mRight := createTimeseriesMapByTagSet(bfa.be, bfa.left, bfa.right)
var rvs []*timeseries
for k := range mRight {
if tss := mLeft[k]; tss != nil {
rvs = append(rvs, tss...)
for k, tssRight := range mRight {
tssLeft := mLeft[k]
if tssLeft == nil {
continue
}
for i := range tssLeft[0].Values {
if !isAllNaNs(tssRight, i) {
continue
}
for _, tsLeft := range tssLeft {
tsLeft.Values[i] = nan
}
}
tssLeft = removeNaNs(tssLeft)
rvs = append(rvs, tssLeft...)
}
return rvs, nil
}
@@ -310,14 +321,35 @@ func binaryOpOr(bfa *binaryOpFuncArg) ([]*timeseries, error) {
func binaryOpUnless(bfa *binaryOpFuncArg) ([]*timeseries, error) {
mLeft, mRight := createTimeseriesMapByTagSet(bfa.be, bfa.left, bfa.right)
var rvs []*timeseries
for k, tss := range mLeft {
if mRight[k] == nil {
rvs = append(rvs, tss...)
for k, tssLeft := range mLeft {
tssRight := mRight[k]
if tssRight == nil {
rvs = append(rvs, tssLeft...)
continue
}
for i := range tssLeft[0].Values {
if isAllNaNs(tssRight, i) {
continue
}
for _, tsLeft := range tssLeft {
tsLeft.Values[i] = nan
}
}
tssLeft = removeNaNs(tssLeft)
rvs = append(rvs, tssLeft...)
}
return rvs, nil
}
func isAllNaNs(tss []*timeseries, idx int) bool {
for _, ts := range tss {
if !math.IsNaN(ts.Values[idx]) {
return false
}
}
return true
}
func createTimeseriesMapByTagSet(be *metricsql.BinaryOpExpr, left, right []*timeseries) (map[string][]*timeseries, map[string][]*timeseries) {
groupTags := be.GroupModifier.Args
groupOp := strings.ToLower(be.GroupModifier.Op)

View File

@@ -158,14 +158,14 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
re := &metricsql.RollupExpr{
Expr: me,
}
rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, e, re, nil)
if err != nil {
return nil, fmt.Errorf(`cannot evaluate %q: %s`, me.AppendString(nil), err)
}
return rv, nil
}
if re, ok := e.(*metricsql.RollupExpr); ok {
rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, e, re, nil)
if err != nil {
return nil, fmt.Errorf(`cannot evaluate %q: %s`, re.AppendString(nil), err)
}
@@ -201,7 +201,7 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
if err != nil {
return nil, err
}
rv, err := evalRollupFunc(ec, fe.Name, rf, re, nil)
rv, err := evalRollupFunc(ec, fe.Name, rf, e, re, nil)
if err != nil {
return nil, fmt.Errorf(`cannot evaluate %q: %s`, fe.AppendString(nil), err)
}
@@ -222,7 +222,7 @@ func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
return nil, err
}
iafc := newIncrementalAggrFuncContext(ae, callbacks)
return evalRollupFunc(ec, fe.Name, rf, re, iafc)
return evalRollupFunc(ec, fe.Name, rf, e, re, iafc)
}
}
args, err := evalExprs(ec, ae.Args)
@@ -323,6 +323,10 @@ func tryGetArgRollupFuncWithMetricExpr(ae *metricsql.AggrFuncExpr) (*metricsql.F
return nil, nil
}
rollupArgIdx := getRollupArgIdx(fe.Name)
if rollupArgIdx >= len(fe.Args) {
// Incorrect number of args for rollup func.
return nil, nil
}
arg := fe.Args[rollupArgIdx]
if me, ok := arg.(*metricsql.MetricExpr); ok {
if me.IsEmpty() {
@@ -359,6 +363,9 @@ func evalExprs(ec *EvalConfig, es []metricsql.Expr) ([][]*timeseries, error) {
func evalRollupFuncArgs(ec *EvalConfig, fe *metricsql.FuncExpr) ([]interface{}, *metricsql.RollupExpr, error) {
var re *metricsql.RollupExpr
rollupArgIdx := getRollupArgIdx(fe.Name)
if len(fe.Args) <= rollupArgIdx {
return nil, nil, fmt.Errorf("expecting at least %d args to %q; got %d args; expr: %q", rollupArgIdx+1, fe.Name, len(fe.Args), fe.AppendString(nil))
}
args := make([]interface{}, len(fe.Args))
for i, arg := range fe.Args {
if i == rollupArgIdx {
@@ -403,7 +410,7 @@ func getRollupExprArg(arg metricsql.Expr) *metricsql.RollupExpr {
return &reNew
}
func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *metricsql.RollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
ecNew := ec
var offset int64
if len(re.Offset) > 0 {
@@ -412,7 +419,7 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *metricsql.Ro
if err != nil {
return nil, err
}
ecNew = newEvalConfig(ec)
ecNew = newEvalConfig(ecNew)
ecNew.Start -= offset
ecNew.End -= offset
if ecNew.MayCache {
@@ -422,15 +429,25 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *metricsql.Ro
ecNew.End = end
}
}
if name == "rollup_candlestick" {
// Automatically apply `offset -step` to `rollup_candlestick` function
// in order to obtain expected OHLC results.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309#issuecomment-582113462
step := ecNew.Step
ecNew = newEvalConfig(ecNew)
ecNew.Start += step
ecNew.End += step
offset -= step
}
var rvs []*timeseries
var err error
if me, ok := re.Expr.(*metricsql.MetricExpr); ok {
rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, me, iafc, re.Window)
rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, expr, me, iafc, re.Window)
} else {
if iafc != nil {
logger.Panicf("BUG: iafc must be nil for rollup %q over subquery %q", name, re.AppendString(nil))
}
rvs, err = evalRollupFuncWithSubquery(ecNew, name, rf, re)
rvs, err = evalRollupFuncWithSubquery(ecNew, name, rf, expr, re)
}
if err != nil {
return nil, err
@@ -449,8 +466,8 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *metricsql.Ro
return rvs, nil
}
func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *metricsql.RollupExpr) ([]*timeseries, error) {
// Do not use rollupResultCacheV here, since it works only with metricsql.MetricExpr.
func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr) ([]*timeseries, error) {
// TODO: determine whether to use rollupResultCacheV here.
var step int64
if len(re.Step) > 0 {
var err error
@@ -481,9 +498,19 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
if err != nil {
return nil, err
}
if len(tssSQ) == 0 {
if name == "absent_over_time" {
tss := evalNumber(ec, 1)
return tss, nil
}
return nil, nil
}
sharedTimestamps := getTimestamps(ec.Start, ec.End, ec.Step)
preFunc, rcs := getRollupConfigs(name, rf, ec.Start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
preFunc, rcs, err := getRollupConfigs(name, rf, expr, ec.Start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
if err != nil {
return nil, err
}
tss := make([]*timeseries, 0, len(tssSQ)*len(rcs))
var tssLock sync.Mutex
removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
@@ -491,6 +518,13 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
values, timestamps = removeNanValues(values[:0], timestamps[:0], tsSQ.Values, tsSQ.Timestamps)
preFunc(values, timestamps)
for _, rc := range rcs {
if tsm := newTimeseriesMap(name, sharedTimestamps, &tsSQ.MetricName); tsm != nil {
rc.DoTimeseriesMap(tsm, values, timestamps)
tssLock.Lock()
tss = tsm.AppendTimeseriesTo(tss)
tssLock.Unlock()
continue
}
var ts timeseries
doRollupForTimeseries(rc, &ts, &tsSQ.MetricName, values, timestamps, sharedTimestamps, removeMetricGroup)
tssLock.Lock()
@@ -558,7 +592,8 @@ var (
rollupResultCacheMiss = metrics.NewCounter(`vm_rollup_result_cache_miss_total`)
)
func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowStr string) ([]*timeseries, error) {
func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowStr string) ([]*timeseries, error) {
if me.IsEmpty() {
return evalNumber(ec, nan), nil
}
@@ -572,7 +607,7 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
}
// Search for partial results in cache.
tssCached, start := rollupResultCacheV.Get(name, ec, me, iafc, window)
tssCached, start := rollupResultCacheV.Get(ec, expr, window)
if start > ec.End {
// The result is fully cached.
rollupResultCacheFullHits.Inc()
@@ -584,11 +619,25 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
rollupResultCacheMiss.Inc()
}
// Obtain rollup configs before fetching data from db,
// so type errors can be caught earlier.
sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
preFunc, rcs, err := getRollupConfigs(name, rf, expr, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
if err != nil {
return nil, err
}
// Fetch the remaining part of the result.
tfs := toTagFilters(me.LabelFilters)
minTimestamp := start - maxSilenceInterval
if window > ec.Step {
minTimestamp -= window
} else {
minTimestamp -= ec.Step
}
sq := &storage.SearchQuery{
MinTimestamp: start - window - maxSilenceInterval,
MaxTimestamp: ec.End + ec.Step,
MinTimestamp: minTimestamp,
MaxTimestamp: ec.End,
TagFilterss: [][]storage.TagFilter{tfs},
}
rss, err := netstorage.ProcessSearchQuery(sq, true, ec.Deadline)
@@ -598,14 +647,16 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
rssLen := rss.Len()
if rssLen == 0 {
rss.Cancel()
var tss []*timeseries
if name == "absent_over_time" {
tss = getAbsentTimeseries(ec, me)
}
// Add missing points until ec.End.
// Do not cache the result, since missing points
// may be backfilled in the future.
tss := mergeTimeseries(tssCached, nil, start, ec)
tss = mergeTimeseries(tssCached, tss, start, ec)
return tss, nil
}
sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
preFunc, rcs := getRollupConfigs(name, rf, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
// Verify timeseries fit available memory after the rollup.
// Take into account points from tssCached.
@@ -617,8 +668,8 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
if iafc.ae.Modifier.Op != "" {
// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
// since each group can have own set of time series in memory.
// Estimate the number of such groups is lower than 100 :)
timeseriesLen *= 100
// Estimate the number of such groups is lower than 1000 :)
timeseriesLen *= 1000
}
}
rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs)))
@@ -637,16 +688,15 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
var tss []*timeseries
if iafc != nil {
tss, err = evalRollupWithIncrementalAggregate(iafc, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
tss, err = evalRollupWithIncrementalAggregate(name, iafc, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
} else {
tss, err = evalRollupNoIncrementalAggregate(rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
tss, err = evalRollupNoIncrementalAggregate(name, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
}
if err != nil {
return nil, err
}
tss = mergeTimeseries(tssCached, tss, start, ec)
rollupResultCacheV.Put(name, ec, me, iafc, window, tss)
rollupResultCacheV.Put(ec, expr, window, tss)
return tss, nil
}
@@ -662,13 +712,20 @@ func getRollupMemoryLimiter() *memoryLimiter {
return &rollupMemoryLimiter
}
func evalRollupWithIncrementalAggregate(iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
func evalRollupWithIncrementalAggregate(name string, iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
preFunc(rs.Values, rs.Timestamps)
ts := getTimeseries()
defer putTimeseries(ts)
for _, rc := range rcs {
if tsm := newTimeseriesMap(name, sharedTimestamps, &rs.MetricName); tsm != nil {
rc.DoTimeseriesMap(tsm, rs.Values, rs.Timestamps)
for _, ts := range tsm.m {
iafc.updateTimeseries(ts, workerID)
}
continue
}
ts.Reset()
doRollupForTimeseries(rc, ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
iafc.updateTimeseries(ts, workerID)
@@ -685,13 +742,20 @@ func evalRollupWithIncrementalAggregate(iafc *incrementalAggrFuncContext, rss *n
return tss, nil
}
func evalRollupNoIncrementalAggregate(rss *netstorage.Results, rcs []*rollupConfig,
func evalRollupNoIncrementalAggregate(name string, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
tss := make([]*timeseries, 0, rss.Len()*len(rcs))
var tssLock sync.Mutex
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
preFunc(rs.Values, rs.Timestamps)
for _, rc := range rcs {
if tsm := newTimeseriesMap(name, sharedTimestamps, &rs.MetricName); tsm != nil {
rc.DoTimeseriesMap(tsm, rs.Values, rs.Timestamps)
tssLock.Lock()
tss = tsm.AppendTimeseriesTo(tss)
tssLock.Unlock()
continue
}
var ts timeseries
doRollupForTimeseries(rc, &ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
tssLock.Lock()
@@ -719,62 +783,6 @@ func doRollupForTimeseries(rc *rollupConfig, tsDst *timeseries, mnSrc *storage.M
tsDst.denyReuse = true
}
func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64, lookbackDelta int64, sharedTimestamps []int64) (
func(values []float64, timestamps []int64), []*rollupConfig) {
preFunc := func(values []float64, timestamps []int64) {}
if rollupFuncsRemoveCounterResets[name] {
preFunc = func(values []float64, timestamps []int64) {
removeCounterResets(values)
}
}
newRollupConfig := func(rf rollupFunc, tagValue string) *rollupConfig {
return &rollupConfig{
TagValue: tagValue,
Func: rf,
Start: start,
End: end,
Step: step,
Window: window,
MayAdjustWindow: rollupFuncsMayAdjustWindow[name],
LookbackDelta: lookbackDelta,
Timestamps: sharedTimestamps,
}
}
appendRollupConfigs := func(dst []*rollupConfig) []*rollupConfig {
dst = append(dst, newRollupConfig(rollupMin, "min"))
dst = append(dst, newRollupConfig(rollupMax, "max"))
dst = append(dst, newRollupConfig(rollupAvg, "avg"))
return dst
}
var rcs []*rollupConfig
switch name {
case "rollup":
rcs = appendRollupConfigs(rcs)
case "rollup_rate", "rollup_deriv":
preFuncPrev := preFunc
preFunc = func(values []float64, timestamps []int64) {
preFuncPrev(values, timestamps)
derivValues(values, timestamps)
}
rcs = appendRollupConfigs(rcs)
case "rollup_increase", "rollup_delta":
preFuncPrev := preFunc
preFunc = func(values []float64, timestamps []int64) {
preFuncPrev(values, timestamps)
deltaValues(values)
}
rcs = appendRollupConfigs(rcs)
case "rollup_candlestick":
rcs = append(rcs, newRollupConfig(rollupFirst, "open"))
rcs = append(rcs, newRollupConfig(rollupLast, "close"))
rcs = append(rcs, newRollupConfig(rollupMin, "low"))
rcs = append(rcs, newRollupConfig(rollupMax, "high"))
default:
rcs = append(rcs, newRollupConfig(rf, ""))
}
return preFunc, rcs
}
var bbPool bytesutil.ByteBufferPool
func evalNumber(ec *EvalConfig, n float64) []*timeseries {

View File

@@ -26,8 +26,8 @@ func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result,
defer func() {
d := time.Since(startTime)
if d >= *logSlowQueryDuration {
logger.Infof("slow query according to -search.logSlowQueryDuration=%s: duration=%s, start=%d, end=%d, step=%d, query=%q",
*logSlowQueryDuration, d, ec.Start/1000, ec.End/1000, ec.Step/1000, q)
logger.Infof("slow query according to -search.logSlowQueryDuration=%s: duration=%.3f seconds, start=%d, end=%d, step=%d, query=%q",
*logSlowQueryDuration, d.Seconds(), ec.Start/1000, ec.End/1000, ec.Step/1000, q)
slowQueries.Inc()
}
}()
@@ -40,25 +40,11 @@ func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result,
return nil, err
}
// Add an additional point to the end. This point is used
// in calculating the last value for rate, deriv, increase
// and delta funcs.
ec.End += ec.Step
rv, err := evalExpr(ec, e)
if err != nil {
return nil, err
}
// Remove the additional point at the end.
for _, ts := range rv {
ts.Values = ts.Values[:len(ts.Values)-1]
// ts.Timestamps may be shared between timeseries, so truncate it with len(ts.Values) instead of len(ts.Timestamps)-1
ts.Timestamps = ts.Timestamps[:len(ts.Values)]
}
ec.End -= ec.Step
if isFirstPointOnly {
// Remove all the points except the first one from every time series.
for _, ts := range rv {
@@ -85,7 +71,8 @@ func maySortResults(e metricsql.Expr, tss []*timeseries) bool {
return true
}
switch fe.Name {
case "sort", "sort_desc":
case "sort", "sort_desc",
"sort_by_label", "sort_by_label_desc":
return false
default:
return true

View File

@@ -21,7 +21,7 @@ func TestExecSuccess(t *testing.T) {
Start: start,
End: end,
Step: step,
Deadline: netstorage.NewDeadline(time.Minute),
Deadline: netstorage.NewDeadline(time.Minute, ""),
}
for i := 0; i < 5; i++ {
result, err := Exec(ec, q, false)
@@ -532,6 +532,12 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run("absent_over_time(time())", func(t *testing.T) {
t.Parallel()
q := `absent_over_time(time())`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run("absent(123)", func(t *testing.T) {
t.Parallel()
q := `absent(123)`
@@ -555,6 +561,17 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("absent_over_time(nan[200s:10s])", func(t *testing.T) {
t.Parallel()
q := `absent_over_time(nan[200s:10s])`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`absent(scalar(multi-timeseries))`, func(t *testing.T) {
t.Parallel()
q := `
@@ -571,6 +588,34 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`absent_over_time(scalar(multi-timeseries))`, func(t *testing.T) {
t.Parallel()
q := `
absent_over_time(label_set(scalar(1 or label_set(2, "xx", "foo")), "yy", "foo"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{
Key: []byte("yy"),
Value: []byte("foo"),
}}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`absent(time() > 1500)`, func(t *testing.T) {
t.Parallel()
q := `
absent(time() > 1500)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, nan, nan, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("clamp_max(time(), 1400)", func(t *testing.T) {
t.Parallel()
q := `clamp_max(time(), 1400)`
@@ -1421,6 +1466,38 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`label_match()`, func(t *testing.T) {
t.Parallel()
q := `
label_match((
alias(time(), "foo"),
alias(2*time(), "bar"),
), "__name__", "f.+")`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("foo")
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`label_mismatch()`, func(t *testing.T) {
t.Parallel()
q := `
label_mismatch((
alias(time(), "foo"),
alias(2*time(), "bar"),
), "__name__", "f.+")`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2000, 2400, 2800, 3200, 3600, 4000},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("bar")
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`two_timeseries`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(time() or label_set(2, "xx", "foo"))`
@@ -1520,6 +1597,48 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`sort_by_label()`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label((
alias(1, "foo"),
alias(2, "bar"),
), "__name__")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("bar")
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("foo")
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`sort_by_label_desc()`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label_desc((
alias(1, "foo"),
alias(2, "bar"),
), "__name__")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("foo")
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("bar")
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`a cmp scalar (leave MetricGroup)`, func(t *testing.T) {
t.Parallel()
q := `sort_desc((
@@ -1643,12 +1762,34 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`time() and time() > 1300`, func(t *testing.T) {
t.Parallel()
q := `time() and time() > 1300`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, 1400, 1600, 1800, 2000},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`time() unless 2`, func(t *testing.T) {
t.Parallel()
q := `time() unless 2`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`time() unless time() > 1500`, func(t *testing.T) {
t.Parallel()
q := `time() unless time() > 1500`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, 1200, 1400, nan, nan, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`timseries-with-tags unless 2`, func(t *testing.T) {
t.Parallel()
q := `label_set(time(), "foo", "bar") unless 2`
@@ -2263,18 +2404,36 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`histogram_share(scalar)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(123, time())`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-no-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6, label_set(100, "foo", "bar"))`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-no-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(123, label_set(100, "foo", "bar"))`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-invalid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6, label_set(100, "le", "foobar"))`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-invalid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(50, label_set(100, "le", "foobar"))`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-valid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6, label_set(100, "le", "200"))`
@@ -2286,6 +2445,39 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(80, label_set(100, "le", "200"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.4, 0.4, 0.4, 0.4, 0.4, 0.4},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(200, label_set(100, "le", "200"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(300, label_set(100, "le", "200"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-valid-le, boundsLabel)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_quantile(0.6, label_set(100, "le", "200"), "foobar"))`
@@ -2315,6 +2507,35 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-valid-le, boundsLabel)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_share(120, label_set(100, "le", "200"), "foobar"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 0, 0, 0, 0, 0},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foobar"),
Value: []byte("lower"),
}}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.6, 0.6, 0.6, 0.6, 0.6, 0.6},
Timestamps: timestampsExpected,
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{{
Key: []byte("foobar"),
Value: []byte("upper"),
}}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-valid-le-max-phi)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(1, (
@@ -2329,6 +2550,20 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le-max-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(200, (
label_set(100, "le", "200"),
label_set(0, "le", "55"),
))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-valid-le-min-phi)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0, (
@@ -2343,6 +2578,48 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le-min-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(0, (
label_set(100, "le", "200"),
label_set(0, "le", "55"),
))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 0, 0, 0, 0, 0},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le-low-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(55, (
label_set(100, "le", "200"),
label_set(0, "le", "55"),
))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 0, 0, 0, 0, 0},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le-mid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(105, (
label_set(100, "le", "200"),
label_set(0, "le", "55"),
))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.3448275862068966, 0.3448275862068966, 0.3448275862068966, 0.3448275862068966, 0.3448275862068966, 0.3448275862068966},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(single-value-valid-le-min-phi-no-zero-bucket)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0, label_set(100, "le", "200"))`
@@ -2365,6 +2642,17 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(scalar-phi)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(time() / 8, label_set(100, "le", "200"))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.625, 0.75, 0.875, 1, 1, 1},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(valid)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_quantile(0.6,
@@ -2395,6 +2683,36 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`histogram_share(valid)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_share(25,
label_set(90, "foo", "bar", "le", "10")
or label_set(100, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
or label_set(200, "tag", "xx", "le", "10")
or label_set(300, "tag", "xx", "le", "30")
))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.325, 0.325, 0.325, 0.325, 0.325, 0.325},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.9166666666666666, 0.9166666666666666, 0.9166666666666666, 0.9166666666666666, 0.9166666666666666, 0.9166666666666666},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("tag"),
Value: []byte("xx"),
}}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`histogram_quantile(negative-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6,
@@ -2423,7 +2741,7 @@ func TestExecSuccess(t *testing.T) {
)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Values: []float64{30, 30, 30, 30, 30, 30},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{
@@ -2452,6 +2770,25 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(normal-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(35,
label_set(0, "foo", "bar", "le", "10")
or label_set(100, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(normal-bucket-count, boundsLabel)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_quantile(0.2,
@@ -2502,6 +2839,56 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`histogram_share(normal-bucket-count, boundsLabel)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_share(22,
label_set(0, "foo", "bar", "le", "10")
or label_set(100, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf"),
"xxx"
))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 0, 0, 0, 0, 0},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xxx"),
Value: []byte("lower"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.2, 0.2, 0.2, 0.2, 0.2, 0.2},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xxx"),
Value: []byte("upper"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`histogram_quantile(zero-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6,
@@ -2631,6 +3018,23 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3, r4, r5, r6}
f(q, resultExpected)
})
t.Run(`prometheus_buckets(zero-vmrange-value)`, func(t *testing.T) {
t.Parallel()
q := `sort(prometheus_buckets(label_set(0, "vmrange", "0...0")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 0, 0, 0, 0, 0},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("le"),
Value: []byte("+Inf"),
},
}
resultsExpected := []netstorage.Result{r1}
f(q, resultsExpected)
})
t.Run(`prometheus_buckets(valid)`, func(t *testing.T) {
t.Parallel()
q := `sort(prometheus_buckets((
@@ -2925,6 +3329,17 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`range_over_time(time)`, func(t *testing.T) {
t.Parallel()
q := `range_over_time(alias(time()/100, "foobar")[3i])`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{4, 4, 4, 4, 4, 4},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`sum(multi-vector)`, func(t *testing.T) {
t.Parallel()
q := `sum(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss"))`
@@ -3083,6 +3498,150 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_over_time`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{14, 15, 12, 13, 15, 11},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("vmrange"),
Value: []byte("2.0e0...2.5e0"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{13, 14, 12, 8, 12, 13},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("vmrange"),
Value: []byte("1.0e0...1.5e0"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{13, 11, 16, 19, 13, 16},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("vmrange"),
Value: []byte("1.5e0...2.0e0"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`sum(histogram_over_time) by (vmrange)`, func(t *testing.T) {
t.Parallel()
q := `sort(sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s])) by (vmrange))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{14, 15, 12, 13, 15, 11},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("vmrange"),
Value: []byte("2.0e0...2.5e0"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{13, 14, 12, 8, 12, 13},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("vmrange"),
Value: []byte("1.0e0...1.5e0"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{13, 11, 16, 19, 13, 16},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("vmrange"),
Value: []byte("1.5e0...2.0e0"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`sum(histogram_over_time)`, func(t *testing.T) {
t.Parallel()
q := `sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{40, 40, 40, 40, 40, 40},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`topk_max(histogram_over_time)`, func(t *testing.T) {
t.Parallel()
q := `topk_max(1, histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{13, 11, 16, 19, 13, 16},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("vmrange"),
Value: []byte("1.5e0...2.0e0"),
},
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`share_gt_over_time`, func(t *testing.T) {
t.Parallel()
q := `share_gt_over_time(rand(0)[200s:10s], 0.7)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.35, 0.3, 0.5, 0.3, 0.3, 0.25},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`share_le_over_time`, func(t *testing.T) {
t.Parallel()
q := `share_le_over_time(rand(0)[200s:10s], 0.7)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.65, 0.7, 0.5, 0.7, 0.7, 0.75},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`increases_over_time`, func(t *testing.T) {
t.Parallel()
q := `increases_over_time(rand(0)[200s:10s])`
@@ -3260,12 +3819,12 @@ func TestExecSuccess(t *testing.T) {
q := `sort(bottomk_avg(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, nan, nan, nan},
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
Key: []byte("baz"),
Value: []byte("sss"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
@@ -3409,6 +3968,22 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`keep_next_value()`, func(t *testing.T) {
t.Parallel()
q := `keep_next_value(label_set(time() < 1300 default time() > 1700, "__name__", "foobar", "x", "y"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, 1200, 1800, 1800, 1800, 2000},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("foobar")
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("x"),
Value: []byte("y"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`distinct_over_time([500s])`, func(t *testing.T) {
t.Parallel()
q := `distinct_over_time((time() < 1700)[500s])`
@@ -4045,6 +4620,112 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`hoeffding_bound_lower()`, func(t *testing.T) {
t.Parallel()
q := `hoeffding_bound_lower(0.9, rand(0)[:10s])`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.2516770508510652, 0.2830570387745462, 0.27716232108436645, 0.3679356319931767, 0.3168460474120903, 0.23156726248243734},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`hoeffding_bound_upper()`, func(t *testing.T) {
t.Parallel()
q := `hoeffding_bound_upper(0.9, alias(rand(0), "foobar")[:10s])`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.6510581320042821, 0.7261021731890429, 0.7245290097397009, 0.8113950442584258, 0.7736122275568004, 0.6658564048254882},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("foobar")
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`aggr_over_time(single-func)`, func(t *testing.T) {
t.Parallel()
q := `aggr_over_time("increase", rand(0)[:10s])`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{5.465672601448873, 6.642207999066246, 6.8400051805114295, 7.182425481980655, 5.1677922402706, 6.594060518641982},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("increase"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`aggr_over_time(multi-func)`, func(t *testing.T) {
t.Parallel()
q := `sort(aggr_over_time(("min_over_time", "count_over_time", "max_over_time"), round(rand(0),0.1)[:10s]))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 0, 0, 0, 0, 0},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("min_over_time"),
}}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.8, 0.9, 1, 0.9, 1, 0.9},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("max_over_time"),
}}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{20, 20, 20, 20, 20, 20},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("count_over_time"),
}}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`avg(aggr_over_time(multi-func))`, func(t *testing.T) {
t.Parallel()
q := `avg(aggr_over_time(("min_over_time", "max_over_time"), time()[:10s]))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{905, 1105, 1305, 1505, 1705, 1905},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`avg(aggr_over_time(multi-func)) by (rollup)`, func(t *testing.T) {
t.Parallel()
q := `sort(avg(aggr_over_time(("min_over_time", "max_over_time"), time()[:10s])) by (rollup))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{810, 1010, 1210, 1410, 1610, 1810},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("min_over_time"),
}}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("max_over_time"),
}}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`rollup_candlestick()`, func(t *testing.T) {
t.Parallel()
q := `sort(rollup_candlestick(round(rand(0),0.01)[:10s]))`
@@ -4059,21 +4740,21 @@ func TestExecSuccess(t *testing.T) {
}}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.32, 0.82, 0.13, 0.28, 0.86, 0.57},
Values: []float64{0.9, 0.32, 0.82, 0.13, 0.28, 0.86},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("close"),
Value: []byte("open"),
}}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.9, 0.32, 0.82, 0.13, 0.28, 0.86},
Values: []float64{0.1, 0.04, 0.49, 0.46, 0.57, 0.92},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{{
Key: []byte("rollup"),
Value: []byte("open"),
Value: []byte("close"),
}}
r4 := netstorage.Result{
MetricName: metricNameExpected,
@@ -4125,7 +4806,7 @@ func TestExecSuccess(t *testing.T) {
q := `sort(rollup(time()[:50s]))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{800, 1000, 1200, 1400, 1600, 1800},
Values: []float64{850, 1050, 1250, 1450, 1650, 1850},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
@@ -4645,7 +5326,7 @@ func TestExecError(t *testing.T) {
Start: 1000,
End: 2000,
Step: 100,
Deadline: netstorage.NewDeadline(time.Minute),
Deadline: netstorage.NewDeadline(time.Minute, ""),
}
for i := 0; i < 4; i++ {
rv, err := Exec(ec, q, false)
@@ -4692,11 +5373,15 @@ func TestExecError(t *testing.T) {
f(`label_set(1, "foo")`)
f(`label_del()`)
f(`label_keep()`)
f(`label_match()`)
f(`label_mismatch()`)
f(`round()`)
f(`round(1,2,3)`)
f(`scalar()`)
f(`sort(1,2)`)
f(`sort_desc()`)
f(`sort_by_label()`)
f(`sort_by_label_desc()`)
f(`timestamp()`)
f(`vector()`)
f(`histogram_quantile()`)
@@ -4739,6 +5424,7 @@ func TestExecError(t *testing.T) {
f(`median()`)
f(`median("foo", "bar")`)
f(`keep_last_value()`)
f(`keep_next_value()`)
f(`distinct_over_time()`)
f(`distinct()`)
f(`alias()`)
@@ -4746,6 +5432,18 @@ func TestExecError(t *testing.T) {
f(`alias(1, "foo", "bar")`)
f(`lifetime()`)
f(`lag()`)
f(`aggr_over_time()`)
f(`aggr_over_time(foo)`)
f(`aggr_over_time("foo", bar, 1)`)
f(`sum(aggr_over_time())`)
f(`sum(aggr_over_time(foo))`)
f(`count(aggr_over_time("foo", bar, 1))`)
f(`hoeffding_bound_lower()`)
f(`hoeffding_bound_lower(1)`)
f(`hoeffding_bound_lower(0.99, foo, 1)`)
f(`hoeffding_bound_upper()`)
f(`hoeffding_bound_upper(1)`)
f(`hoeffding_bound_upper(0.99, foo, 1)`)
// Invalid argument type
f(`median_over_time({}, 2)`)
@@ -4780,7 +5478,11 @@ func TestExecError(t *testing.T) {
f(`label_transform(1, "foo", 3, 4)`)
f(`label_transform(1, "foo", "bar", 4)`)
f(`label_transform(1, "foo", "invalid(regexp", "baz`)
f(`label_match(1, 2, 3)`)
f(`label_mismatch(1, 2, 3)`)
f(`alias(1, 2)`)
f(`aggr_over_time(1, 2)`)
f(`aggr_over_time(("foo", "bar"), 3)`)
// Duplicate timeseries
f(`(label_set(1, "foo", "bar") or label_set(2, "foo", "baz"))

View File

@@ -8,12 +8,13 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/histogram"
)
var rollupFuncs = map[string]newRollupFunc{
"default_rollup": newRollupFuncOneArg(rollupDefault), // default rollup func
// Standard rollup funcs from PromQL.
// See funcs accepting range-vector on https://prometheus.io/docs/prometheus/latest/querying/functions/ .
"changes": newRollupFuncOneArg(rollupChanges),
@@ -25,7 +26,7 @@ var rollupFuncs = map[string]newRollupFunc{
"increase": newRollupFuncOneArg(rollupIncrease), // + rollupFuncsRemoveCounterResets
"irate": newRollupFuncOneArg(rollupIderiv), // + rollupFuncsRemoveCounterResets
"predict_linear": newRollupPredictLinear,
"rate": newRollupFuncOneArg(rollupDerivIncrease), // + rollupFuncsRemoveCounterResets
"rate": newRollupFuncOneArg(rollupDerivFast), // + rollupFuncsRemoveCounterResets
"resets": newRollupFuncOneArg(rollupResets),
"avg_over_time": newRollupFuncOneArg(rollupAvg),
"min_over_time": newRollupFuncOneArg(rollupMin),
@@ -35,38 +36,98 @@ var rollupFuncs = map[string]newRollupFunc{
"quantile_over_time": newRollupQuantile,
"stddev_over_time": newRollupFuncOneArg(rollupStddev),
"stdvar_over_time": newRollupFuncOneArg(rollupStdvar),
"absent_over_time": newRollupFuncOneArg(rollupAbsent),
// Additional rollup funcs.
"sum2_over_time": newRollupFuncOneArg(rollupSum2),
"geomean_over_time": newRollupFuncOneArg(rollupGeomean),
"first_over_time": newRollupFuncOneArg(rollupFirst),
"last_over_time": newRollupFuncOneArg(rollupLast),
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
"increases_over_time": newRollupFuncOneArg(rollupIncreases),
"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
"integrate": newRollupFuncOneArg(rollupIntegrate),
"ideriv": newRollupFuncOneArg(rollupIderiv),
"lifetime": newRollupFuncOneArg(rollupLifetime),
"lag": newRollupFuncOneArg(rollupLag),
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
"rollup": newRollupFuncOneArg(rollupFake),
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_deriv": newRollupFuncOneArg(rollupFake),
"rollup_delta": newRollupFuncOneArg(rollupFake),
"rollup_increase": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_candlestick": newRollupFuncOneArg(rollupFake),
"default_rollup": newRollupFuncOneArg(rollupDefault), // default rollup func
"range_over_time": newRollupFuncOneArg(rollupRange),
"sum2_over_time": newRollupFuncOneArg(rollupSum2),
"geomean_over_time": newRollupFuncOneArg(rollupGeomean),
"first_over_time": newRollupFuncOneArg(rollupFirst),
"last_over_time": newRollupFuncOneArg(rollupLast),
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
"increases_over_time": newRollupFuncOneArg(rollupIncreases),
"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
"integrate": newRollupFuncOneArg(rollupIntegrate),
"ideriv": newRollupFuncOneArg(rollupIderiv),
"lifetime": newRollupFuncOneArg(rollupLifetime),
"lag": newRollupFuncOneArg(rollupLag),
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
"tmin_over_time": newRollupFuncOneArg(rollupTmin),
"tmax_over_time": newRollupFuncOneArg(rollupTmax),
"share_le_over_time": newRollupShareLE,
"share_gt_over_time": newRollupShareGT,
"histogram_over_time": newRollupFuncOneArg(rollupHistogram),
"rollup": newRollupFuncOneArg(rollupFake),
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_deriv": newRollupFuncOneArg(rollupFake),
"rollup_delta": newRollupFuncOneArg(rollupFake),
"rollup_increase": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_candlestick": newRollupFuncOneArg(rollupFake),
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
}
var rollupFuncsMayAdjustWindow = map[string]bool{
"default_rollup": true,
"first_over_time": true,
"last_over_time": true,
"deriv": true,
"deriv_fast": true,
"irate": true,
"rate": true,
"lifetime": true,
"scrape_interval": true,
// rollupAggrFuncs are functions that can be passed to `aggr_over_time()`
var rollupAggrFuncs = map[string]rollupFunc{
// Standard rollup funcs from PromQL.
"changes": rollupChanges,
"delta": rollupDelta,
"deriv": rollupDerivSlow,
"deriv_fast": rollupDerivFast,
"idelta": rollupIdelta,
"increase": rollupIncrease, // + rollupFuncsRemoveCounterResets
"irate": rollupIderiv, // + rollupFuncsRemoveCounterResets
"rate": rollupDerivFast, // + rollupFuncsRemoveCounterResets
"resets": rollupResets,
"avg_over_time": rollupAvg,
"min_over_time": rollupMin,
"max_over_time": rollupMax,
"sum_over_time": rollupSum,
"count_over_time": rollupCount,
"stddev_over_time": rollupStddev,
"stdvar_over_time": rollupStdvar,
"absent_over_time": rollupAbsent,
// Additional rollup funcs.
"range_over_time": rollupRange,
"sum2_over_time": rollupSum2,
"geomean_over_time": rollupGeomean,
"first_over_time": rollupFirst,
"last_over_time": rollupLast,
"distinct_over_time": rollupDistinct,
"increases_over_time": rollupIncreases,
"decreases_over_time": rollupDecreases,
"integrate": rollupIntegrate,
"ideriv": rollupIderiv,
"lifetime": rollupLifetime,
"lag": rollupLag,
"scrape_interval": rollupScrapeInterval,
"tmin_over_time": rollupTmin,
"tmax_over_time": rollupTmax,
}
var rollupFuncsCannotAdjustWindow = map[string]bool{
"changes": true,
"delta": true,
"holt_winters": true,
"idelta": true,
"increase": true,
"predict_linear": true,
"resets": true,
"sum_over_time": true,
"count_over_time": true,
"quantile_over_time": true,
"stddev_over_time": true,
"stdvar_over_time": true,
"absent_over_time": true,
"sum2_over_time": true,
"geomean_over_time": true,
"distinct_over_time": true,
"increases_over_time": true,
"decreases_over_time": true,
"integrate": true,
}
var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -78,13 +139,64 @@ var rollupFuncsRemoveCounterResets = map[string]bool{
}
var rollupFuncsKeepMetricGroup = map[string]bool{
"default_rollup": true,
"avg_over_time": true,
"min_over_time": true,
"max_over_time": true,
"quantile_over_time": true,
"rollup": true,
"geomean_over_time": true,
"default_rollup": true,
"avg_over_time": true,
"min_over_time": true,
"max_over_time": true,
"quantile_over_time": true,
"rollup": true,
"geomean_over_time": true,
"hoeffding_bound_lower": true,
"hoeffding_bound_upper": true,
}
func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) {
afe, ok := expr.(*metricsql.AggrFuncExpr)
if ok {
// This is for incremental aggregate function case:
//
// sum(aggr_over_time(...))
//
// See aggr_incremental.go for details.
expr = afe.Args[0]
}
fe, ok := expr.(*metricsql.FuncExpr)
if !ok {
logger.Panicf("BUG: unexpected expression; want metricsql.FuncExpr; got %T; value: %s", expr, expr.AppendString(nil))
}
if fe.Name != "aggr_over_time" {
logger.Panicf("BUG: unexpected function name: %q; want `aggr_over_time`", fe.Name)
}
if len(fe.Args) != 2 {
return nil, fmt.Errorf("unexpected number of args to aggr_over_time(); got %d; want %d", len(fe.Args), 2)
}
arg := fe.Args[0]
var aggrFuncNames []string
if se, ok := arg.(*metricsql.StringExpr); ok {
aggrFuncNames = append(aggrFuncNames, se.S)
} else {
fe, ok := arg.(*metricsql.FuncExpr)
if !ok || fe.Name != "" {
return nil, fmt.Errorf("%s cannot be passed to aggr_over_time(); expecting quoted aggregate function name or a list of quoted aggregate function names",
arg.AppendString(nil))
}
for _, e := range fe.Args {
se, ok := e.(*metricsql.StringExpr)
if !ok {
return nil, fmt.Errorf("%s cannot be passed here; expecting quoted aggregate function name", e.AppendString(nil))
}
aggrFuncNames = append(aggrFuncNames, se.S)
}
}
if len(aggrFuncNames) == 0 {
return nil, fmt.Errorf("aggr_over_time() must contain at least a single aggregate function name")
}
for _, s := range aggrFuncNames {
if rollupAggrFuncs[s] == nil {
return nil, fmt.Errorf("%q cannot be used in `aggr_over_time` function; expecting quoted aggregate function name", s)
}
}
return aggrFuncNames, nil
}
func getRollupArgIdx(funcName string) int {
@@ -92,10 +204,84 @@ func getRollupArgIdx(funcName string) int {
if rollupFuncs[funcName] == nil {
logger.Panicf("BUG: getRollupArgIdx is called for non-rollup func %q", funcName)
}
if funcName == "quantile_over_time" {
switch funcName {
case "quantile_over_time", "aggr_over_time",
"hoeffding_bound_lower", "hoeffding_bound_upper":
return 1
default:
return 0
}
return 0
}
func getRollupConfigs(name string, rf rollupFunc, expr metricsql.Expr, start, end, step, window int64, lookbackDelta int64, sharedTimestamps []int64) (
func(values []float64, timestamps []int64), []*rollupConfig, error) {
preFunc := func(values []float64, timestamps []int64) {}
if rollupFuncsRemoveCounterResets[name] {
preFunc = func(values []float64, timestamps []int64) {
removeCounterResets(values)
}
}
newRollupConfig := func(rf rollupFunc, tagValue string) *rollupConfig {
return &rollupConfig{
TagValue: tagValue,
Func: rf,
Start: start,
End: end,
Step: step,
Window: window,
MayAdjustWindow: !rollupFuncsCannotAdjustWindow[name],
LookbackDelta: lookbackDelta,
Timestamps: sharedTimestamps,
}
}
appendRollupConfigs := func(dst []*rollupConfig) []*rollupConfig {
dst = append(dst, newRollupConfig(rollupMin, "min"))
dst = append(dst, newRollupConfig(rollupMax, "max"))
dst = append(dst, newRollupConfig(rollupAvg, "avg"))
return dst
}
var rcs []*rollupConfig
switch name {
case "rollup":
rcs = appendRollupConfigs(rcs)
case "rollup_rate", "rollup_deriv":
preFuncPrev := preFunc
preFunc = func(values []float64, timestamps []int64) {
preFuncPrev(values, timestamps)
derivValues(values, timestamps)
}
rcs = appendRollupConfigs(rcs)
case "rollup_increase", "rollup_delta":
preFuncPrev := preFunc
preFunc = func(values []float64, timestamps []int64) {
preFuncPrev(values, timestamps)
deltaValues(values)
}
rcs = appendRollupConfigs(rcs)
case "rollup_candlestick":
rcs = append(rcs, newRollupConfig(rollupOpen, "open"))
rcs = append(rcs, newRollupConfig(rollupClose, "close"))
rcs = append(rcs, newRollupConfig(rollupLow, "low"))
rcs = append(rcs, newRollupConfig(rollupHigh, "high"))
case "aggr_over_time":
aggrFuncNames, err := getRollupAggrFuncNames(expr)
if err != nil {
return nil, nil, fmt.Errorf("invalid args to %s: %s", expr.AppendString(nil), err)
}
for _, aggrFuncName := range aggrFuncNames {
if rollupFuncsRemoveCounterResets[aggrFuncName] {
// There is no need to save the previous preFunc, since it is either empty or the same.
preFunc = func(values []float64, timestamps []int64) {
removeCounterResets(values)
}
}
rf := rollupAggrFuncs[aggrFuncName]
rcs = append(rcs, newRollupConfig(rf, aggrFuncName))
}
default:
rcs = append(rcs, newRollupConfig(rf, ""))
}
return preFunc, rcs, nil
}
func getRollupFunc(funcName string) newRollupFunc {
@@ -112,11 +298,12 @@ type rollupFuncArg struct {
currTimestamp int64
idx int
step int64
window int64
// Real previous value even if it is located too far from the current window.
// It matches prevValue if prevValue is not nan.
realPrevValue float64
tsm *timeseriesMap
}
func (rfa *rollupFuncArg) reset() {
@@ -127,8 +314,8 @@ func (rfa *rollupFuncArg) reset() {
rfa.currTimestamp = 0
rfa.idx = 0
rfa.step = 0
rfa.window = 0
rfa.realPrevValue = nan
rfa.tsm = nil
}
// rollupFunc must return rollup value for the given rfa.
@@ -167,15 +354,74 @@ var (
// The maximum interval without previous rows.
const maxSilenceInterval = 5 * 60 * 1000
type timeseriesMap struct {
origin *timeseries
labelName string
h metrics.Histogram
m map[string]*timeseries
}
func newTimeseriesMap(funcName string, sharedTimestamps []int64, mnSrc *storage.MetricName) *timeseriesMap {
if funcName != "histogram_over_time" {
return nil
}
values := make([]float64, len(sharedTimestamps))
for i := range values {
values[i] = nan
}
var origin timeseries
origin.MetricName.CopyFrom(mnSrc)
origin.MetricName.ResetMetricGroup()
origin.Timestamps = sharedTimestamps
origin.Values = values
return &timeseriesMap{
origin: &origin,
labelName: "vmrange",
m: make(map[string]*timeseries),
}
}
func (tsm *timeseriesMap) AppendTimeseriesTo(dst []*timeseries) []*timeseries {
for _, ts := range tsm.m {
dst = append(dst, ts)
}
return dst
}
func (tsm *timeseriesMap) GetOrCreateTimeseries(labelValue string) *timeseries {
ts := tsm.m[labelValue]
if ts != nil {
return ts
}
ts = &timeseries{}
ts.CopyFromShallowTimestamps(tsm.origin)
ts.MetricName.RemoveTag(tsm.labelName)
ts.MetricName.AddTag(tsm.labelName, labelValue)
tsm.m[labelValue] = ts
return ts
}
// Do calculates rollups for the given timestamps and values, appends
// them to dstValues and returns results.
//
// rc.Timestamps are used as timestamps for dstValues.
//
// timestamps must cover time range [rc.Start - rc.Window - maxSilenceInterval ... rc.End + rc.Step].
// timestamps must cover time range [rc.Start - rc.Window - maxSilenceInterval ... rc.End].
//
// Cannot be called from concurrent goroutines.
// Do cannot be called from concurrent goroutines.
func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []int64) []float64 {
return rc.doInternal(dstValues, nil, values, timestamps)
}
// DoTimeseriesMap calculates rollups for the given timestamps and values and puts them to tsm.
func (rc *rollupConfig) DoTimeseriesMap(tsm *timeseriesMap, values []float64, timestamps []int64) {
ts := getTimeseries()
ts.Values = rc.doInternal(ts.Values[:0], tsm, values, timestamps)
putTimeseries(ts)
}
func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, values []float64, timestamps []int64) []float64 {
// Sanity checks.
if rc.Step <= 0 {
logger.Panicf("BUG: Step must be bigger than 0; got %d", rc.Step)
@@ -208,8 +454,8 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
rfa := getRollupFuncArg()
rfa.idx = 0
rfa.step = rc.Step
rfa.window = window
rfa.realPrevValue = nan
rfa.tsm = tsm
i := 0
j := 0
@@ -421,6 +667,15 @@ func newRollupFuncOneArg(rf rollupFunc) newRollupFunc {
}
}
func newRollupFuncTwoArgs(rf rollupFunc) newRollupFunc {
return func(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
}
return rf, nil
}
}
func newRollupHoltWinters(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 3); err != nil {
return nil, err
@@ -529,6 +784,116 @@ func linearRegression(rfa *rollupFuncArg) (float64, float64) {
return v, k
}
func newRollupShareLE(args []interface{}) (rollupFunc, error) {
return newRollupShareFilter(args, countFilterLE)
}
func countFilterLE(values []float64, le float64) int {
n := 0
for _, v := range values {
if v <= le {
n++
}
}
return n
}
func newRollupShareGT(args []interface{}) (rollupFunc, error) {
return newRollupShareFilter(args, countFilterGT)
}
func countFilterGT(values []float64, gt float64) int {
n := 0
for _, v := range values {
if v > gt {
n++
}
}
return n
}
func newRollupShareFilter(args []interface{}, countFilter func(values []float64, limit float64) int) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
}
limits, err := getScalar(args[1], 1)
if err != nil {
return nil, err
}
rf := func(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
return nan
}
limit := limits[rfa.idx]
n := countFilter(values, limit)
return float64(n) / float64(len(values))
}
return rf, nil
}
func newRollupHoeffdingBoundLower(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
}
phis, err := getScalar(args[0], 0)
if err != nil {
return nil, err
}
rf := func(rfa *rollupFuncArg) float64 {
bound, avg := rollupHoeffdingBoundInternal(rfa, phis)
return avg - bound
}
return rf, nil
}
func newRollupHoeffdingBoundUpper(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
}
phis, err := getScalar(args[0], 0)
if err != nil {
return nil, err
}
rf := func(rfa *rollupFuncArg) float64 {
bound, avg := rollupHoeffdingBoundInternal(rfa, phis)
return avg + bound
}
return rf, nil
}
func rollupHoeffdingBoundInternal(rfa *rollupFuncArg, phis []float64) (float64, float64) {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
return nan, nan
}
if len(values) == 1 {
return 0, values[0]
}
vMax := rollupMax(rfa)
vMin := rollupMin(rfa)
vAvg := rollupAvg(rfa)
vRange := vMax - vMin
if vRange <= 0 {
return 0, vAvg
}
phi := phis[rfa.idx]
if phi >= 1 {
return inf, vAvg
}
if phi <= 0 {
return 0, vAvg
}
// See https://en.wikipedia.org/wiki/Hoeffding%27s_inequality
// and https://www.youtube.com/watch?v=6UwcqiNsZ8U&feature=youtu.be&t=1237
bound := vRange * math.Sqrt(math.Log(1/(1-phi))/(2*float64(len(values))))
return bound, vAvg
}
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
@@ -560,6 +925,21 @@ func newRollupQuantile(args []interface{}) (rollupFunc, error) {
return rf, nil
}
func rollupHistogram(rfa *rollupFuncArg) float64 {
values := rfa.values
tsm := rfa.tsm
tsm.h.Reset()
for _, v := range values {
tsm.h.Update(v)
}
idx := rfa.idx
tsm.h.VisitNonZeroBuckets(func(vmrange string, count uint64) {
ts := tsm.GetOrCreateTimeseries(vmrange)
ts.Values[idx] = float64(count)
})
return nan
}
func rollupAvg(rfa *rollupFuncArg) float64 {
// Do not use `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation,
// since it is slower and has no significant benefits in precision.
@@ -568,7 +948,10 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
return rfa.prevValue
// Do not take into account rfa.prevValue, since it may lead
// to inconsistent results comparing to Prometheus on broken time series
// with irregular data points.
return nan
}
var sum float64
for _, v := range values {
@@ -580,14 +963,14 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
func rollupMin(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
minValue := rfa.prevValue
values := rfa.values
if math.IsNaN(minValue) {
if len(values) == 0 {
return nan
}
minValue = values[0]
if len(values) == 0 {
// Do not take into account rfa.prevValue, since it may lead
// to inconsistent results comparing to Prometheus on broken time series
// with irregular data points.
return nan
}
minValue := values[0]
for _, v := range values {
if v < minValue {
minValue = v
@@ -599,14 +982,14 @@ func rollupMin(rfa *rollupFuncArg) float64 {
func rollupMax(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
maxValue := rfa.prevValue
values := rfa.values
if math.IsNaN(maxValue) {
if len(values) == 0 {
return nan
}
maxValue = values[0]
if len(values) == 0 {
// Do not take into account rfa.prevValue, since it may lead
// to inconsistent results comparing to Prometheus on broken time series
// with irregular data points.
return nan
}
maxValue := values[0]
for _, v := range values {
if v > maxValue {
maxValue = v
@@ -615,6 +998,44 @@ func rollupMax(rfa *rollupFuncArg) float64 {
return maxValue
}
func rollupTmin(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
timestamps := rfa.timestamps
if len(values) == 0 {
return nan
}
minValue := values[0]
minTimestamp := timestamps[0]
for i, v := range values {
if v < minValue {
minValue = v
minTimestamp = timestamps[i]
}
}
return float64(minTimestamp) * 1e-3
}
func rollupTmax(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
timestamps := rfa.timestamps
if len(values) == 0 {
return nan
}
maxValue := values[0]
maxTimestamp := timestamps[0]
for i, v := range values {
if v > maxValue {
maxValue = v
maxTimestamp = timestamps[i]
}
}
return float64(maxTimestamp) * 1e-3
}
func rollupSum(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
@@ -632,6 +1053,12 @@ func rollupSum(rfa *rollupFuncArg) float64 {
return sum
}
func rollupRange(rfa *rollupFuncArg) float64 {
max := rollupMax(rfa)
min := rollupMin(rfa)
return max - min
}
func rollupSum2(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
@@ -660,6 +1087,13 @@ func rollupGeomean(rfa *rollupFuncArg) float64 {
return math.Pow(p, 1/float64(len(values)))
}
func rollupAbsent(rfa *rollupFuncArg) float64 {
if len(rfa.values) == 0 {
return 1
}
return nan
}
func rollupCount(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
@@ -723,12 +1157,18 @@ func rollupDeltaInternal(rfa *rollupFuncArg, canUseRealPrevValue bool) float64 {
if len(values) == 0 {
return nan
}
// Assume that the previous non-existing value was 0.
if canUseRealPrevValue && !math.IsNaN(rfa.realPrevValue) {
// Fix against removeCounterResets.
prevValue = rfa.realPrevValue
} else {
// Assume that the previous non-existing value was 0
// only if the first value is quite small.
// This should prevent from improper increase() results for os-level counters
// such as cpu time or bytes sent over the network interface.
// These counters may start long ago before the first value appears in the db.
if values[0] < 1e6 {
prevValue = 0
if canUseRealPrevValue && !math.IsNaN(rfa.realPrevValue) {
prevValue = rfa.realPrevValue
}
} else {
prevValue = values[0]
}
}
if len(values) == 0 {
@@ -770,14 +1210,6 @@ func rollupDerivSlow(rfa *rollupFuncArg) float64 {
}
func rollupDerivFast(rfa *rollupFuncArg) float64 {
return rollupDerivFastInternal(rfa, false)
}
func rollupDerivIncrease(rfa *rollupFuncArg) float64 {
return rollupDerivFastInternal(rfa, true)
}
func rollupDerivFastInternal(rfa *rollupFuncArg, canUseRealPrevValue bool) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
@@ -788,16 +1220,22 @@ func rollupDerivFastInternal(rfa *rollupFuncArg, canUseRealPrevValue bool) float
if len(values) == 0 {
return nan
}
// Assume that the value changed from 0 to the current value during rfa.window
if canUseRealPrevValue && !math.IsNaN(rfa.realPrevValue) {
// Fix against removeCounterResets.
prevValue = rfa.realPrevValue
} else {
prevValue = 0
if len(values) == 1 {
// It is impossible to determine the duration during which the value changed
// from 0 to the current value.
// The following attempts didn't work well:
// - using scrape interval as the duration. It fails on Prometheus restarts when it
// skips scraping for the counter. This results in too high rate() value for the first point
// after Prometheus restarts.
// - using window or step as the duration. It results in too small rate() values for the first
// points of time series.
//
// So just return nan
return nan
}
prevTimestamp = timestamps[0] - rfa.window
}
if len(values) == 0 {
prevValue = values[0]
prevTimestamp = timestamps[0]
} else if len(values) == 0 {
// Assume that the value didn't change on the given interval.
return 0
}
@@ -814,8 +1252,20 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
values := rfa.values
timestamps := rfa.timestamps
if len(values) < 2 {
if len(values) == 0 || math.IsNaN(rfa.prevValue) {
// It is impossible to calculate derivative on 0 or 1 values.
if len(values) == 0 {
return nan
}
if math.IsNaN(rfa.prevValue) {
// It is impossible to determine the duration during which the value changed
// from 0 to the current value.
// The following attempts didn't work well:
// - using scrape interval as the duration. It fails on Prometheus restarts when it
// skips scraping for the counter. This results in too high rate() value for the first point
// after Prometheus restarts.
// - using window or step as the duration. It results in too small rate() values for the first
// points of time series.
//
// So just return nan
return nan
}
return (values[0] - rfa.prevValue) / (float64(timestamps[0]-rfa.prevTimestamp) * 1e-3)
@@ -969,17 +1419,92 @@ func rollupResets(rfa *rollupFuncArg) float64 {
return float64(n)
}
func rollupFirst(rfa *rollupFuncArg) float64 {
// See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness
v := rfa.prevValue
// getCandlestickValues returns a subset of rfa.values suitable for rollup_candlestick
//
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309 for details.
func getCandlestickValues(rfa *rollupFuncArg) []float64 {
currTimestamp := rfa.currTimestamp
timestamps := rfa.timestamps
for len(timestamps) > 0 && timestamps[len(timestamps)-1] >= currTimestamp {
timestamps = timestamps[:len(timestamps)-1]
}
if len(timestamps) == 0 {
return nil
}
return rfa.values[:len(timestamps)]
}
func getFirstValueForCandlestick(rfa *rollupFuncArg) float64 {
if rfa.prevTimestamp+rfa.step >= rfa.currTimestamp {
return rfa.prevValue
}
return nan
}
func rollupOpen(rfa *rollupFuncArg) float64 {
v := getFirstValueForCandlestick(rfa)
if !math.IsNaN(v) {
return v
}
values := getCandlestickValues(rfa)
if len(values) == 0 {
return nan
}
return values[0]
}
func rollupClose(rfa *rollupFuncArg) float64 {
values := getCandlestickValues(rfa)
if len(values) == 0 {
return getFirstValueForCandlestick(rfa)
}
return values[len(values)-1]
}
func rollupHigh(rfa *rollupFuncArg) float64 {
values := getCandlestickValues(rfa)
max := getFirstValueForCandlestick(rfa)
if math.IsNaN(max) {
if len(values) == 0 {
return nan
}
max = values[0]
values = values[1:]
}
for _, v := range values {
if v > max {
max = v
}
}
return max
}
func rollupLow(rfa *rollupFuncArg) float64 {
values := getCandlestickValues(rfa)
min := getFirstValueForCandlestick(rfa)
if math.IsNaN(min) {
if len(values) == 0 {
return nan
}
min = values[0]
values = values[1:]
}
for _, v := range values {
if v < min {
min = v
}
}
return min
}
func rollupFirst(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
// Do not take into account rfa.prevValue, since it may lead
// to inconsistent results comparing to Prometheus on broken time series
// with irregular data points.
return nan
}
return values[0]
@@ -992,7 +1517,10 @@ func rollupLast(rfa *rollupFuncArg) float64 {
// before calling rollup funcs.
values := rfa.values
if len(values) == 0 {
return rfa.prevValue
// Do not take into account rfa.prevValue, since it may lead
// to inconsistent results comparing to Prometheus on broken time series
// with irregular data points.
return nan
}
return values[len(values)-1]
}

View File

@@ -18,7 +18,12 @@ import (
"github.com/VictoriaMetrics/metrics"
)
var disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")
var (
disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")
cacheTimestampOffset = flag.Duration("search.cacheTimestampOffset", 5*time.Minute, "The maximum duration since the current time for response data, "+
"which is always queried from the original raw data, without using the response cache. Increase this value if you see gaps in responses "+
"due to time synchronization issues between VictoriaMetrics and data sources")
)
var rollupResultCacheV = &rollupResultCache{
c: workingsetcache.New(1024*1024, time.Hour), // This is a cache for testing.
@@ -74,8 +79,8 @@ func InitRollupResultCache(cachePath string) {
return stats
}
if len(rollupResultCachePath) > 0 {
logger.Infof("loaded rollupResult cache from %q in %s; entriesCount: %d, sizeBytes: %d",
rollupResultCachePath, time.Since(startTime), fcs().EntriesCount, fcs().BytesSize)
logger.Infof("loaded rollupResult cache from %q in %.3f seconds; entriesCount: %d, sizeBytes: %d",
rollupResultCachePath, time.Since(startTime).Seconds(), fcs().EntriesCount, fcs().BytesSize)
}
metrics.NewGauge(`vm_cache_entries{type="promql/rollupResult"}`, func() float64 {
@@ -113,8 +118,8 @@ func StopRollupResultCache() {
rollupResultCacheV.c.UpdateStats(&fcs)
rollupResultCacheV.c.Stop()
rollupResultCacheV.c = nil
logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
logger.Infof("saved rollupResult cache to %q in %.3f seconds; entriesCount: %d, sizeBytes: %d",
rollupResultCachePath, time.Since(startTime).Seconds(), fcs.EntriesCount, fcs.BytesSize)
}
type rollupResultCache struct {
@@ -127,9 +132,10 @@ var rollupResultCacheResets = metrics.NewCounter(`vm_cache_resets_total{type="pr
func ResetRollupResultCache() {
rollupResultCacheResets.Inc()
rollupResultCacheV.c.Reset()
logger.Infof("rollupResult cache has been cleared")
}
func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, window int64) (tss []*timeseries, newStart int64) {
func (rrc *rollupResultCache) Get(ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) {
if *disableCache || !ec.mayCache() {
return nil, ec.Start
}
@@ -138,7 +144,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricsql
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step)
metainfoBuf := rrc.c.Get(nil, bb.B)
if len(metainfoBuf) == 0 {
return nil, ec.Start
@@ -158,7 +164,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricsql
if len(compressedResultBuf.B) == 0 {
mi.RemoveKey(key)
metainfoBuf = mi.Marshal(metainfoBuf[:0])
bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step)
rrc.c.Set(bb.B, metainfoBuf)
return nil, ec.Start
}
@@ -210,15 +216,15 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricsql
var resultBufPool bytesutil.ByteBufferPool
func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, window int64, tss []*timeseries) {
func (rrc *rollupResultCache) Put(ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) {
if *disableCache || len(tss) == 0 || !ec.mayCache() {
return
}
// Remove values up to currentTime - step - maxSilenceInterval,
// Remove values up to currentTime - step - cacheTimestampOffset,
// since these values may be added later.
timestamps := tss[0].Timestamps
deadline := (time.Now().UnixNano() / 1e6) - ec.Step - maxSilenceInterval
deadline := (time.Now().UnixNano() / 1e6) - ec.Step - cacheTimestampOffset.Milliseconds()
i := len(timestamps) - 1
for i >= 0 && timestamps[i] > deadline {
i--
@@ -261,7 +267,7 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricsql
bb.B = key.Marshal(bb.B[:0])
rrc.c.SetBig(bb.B, compressedResultBuf.B)
bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step)
metainfoBuf := rrc.c.Get(nil, bb.B)
var mi rollupResultCacheMetainfo
if len(metainfoBuf) > 0 {
@@ -289,24 +295,13 @@ var (
var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")
// Increment this value every time the format of the cache changes.
const rollupResultCacheVersion = 6
const rollupResultCacheVersion = 7
func marshalRollupResultCacheKey(dst []byte, funcName string, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, window, step int64) []byte {
func marshalRollupResultCacheKey(dst []byte, expr metricsql.Expr, window, step int64) []byte {
dst = append(dst, rollupResultCacheVersion)
if iafc == nil {
dst = append(dst, 0)
} else {
dst = append(dst, 1)
dst = iafc.ae.AppendString(dst)
}
dst = encoding.MarshalUint64(dst, uint64(len(funcName)))
dst = append(dst, funcName...)
dst = encoding.MarshalInt64(dst, window)
dst = encoding.MarshalInt64(dst, step)
tfs := toTagFilters(me.LabelFilters)
for i := range tfs {
dst = tfs[i].Marshal(dst)
}
dst = expr.AppendString(dst)
return dst
}

View File

@@ -9,7 +9,6 @@ import (
func TestRollupResultCache(t *testing.T) {
ResetRollupResultCache()
funcName := "foo"
window := int64(456)
ec := &EvalConfig{
Start: 1000,
@@ -24,15 +23,18 @@ func TestRollupResultCache(t *testing.T) {
Value: "xxx",
}},
}
iafc := &incrementalAggrFuncContext{
ae: &metricsql.AggrFuncExpr{
Name: "foobar",
},
fe := &metricsql.FuncExpr{
Name: "foo",
Args: []metricsql.Expr{me},
}
ae := &metricsql.AggrFuncExpr{
Name: "foobar",
Args: []metricsql.Expr{fe},
}
// Try obtaining an empty value.
t.Run("empty", func(t *testing.T) {
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != ec.Start {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, ec.Start)
}
@@ -42,7 +44,7 @@ func TestRollupResultCache(t *testing.T) {
})
// Store timeseries overlapping with start
t.Run("start-overlap-no-iafc", func(t *testing.T) {
t.Run("start-overlap-no-ae", func(t *testing.T) {
ResetRollupResultCache()
tss := []*timeseries{
{
@@ -50,8 +52,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 1400 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
}
@@ -63,7 +65,7 @@ func TestRollupResultCache(t *testing.T) {
}
testTimeseriesEqual(t, tss, tssExpected)
})
t.Run("start-overlap-with-iafc", func(t *testing.T) {
t.Run("start-overlap-with-ae", func(t *testing.T) {
ResetRollupResultCache()
tss := []*timeseries{
{
@@ -71,8 +73,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.Put(funcName, ec, me, iafc, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, iafc, window)
rollupResultCacheV.Put(ec, ae, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, ae, window)
if newStart != 1400 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
}
@@ -94,8 +96,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{333, 0, 1, 2},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -113,8 +115,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -132,8 +134,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -151,8 +153,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 1000 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
}
@@ -170,8 +172,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2, 3, 4, 5, 6, 7},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 2200 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
}
@@ -193,8 +195,8 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{1, 2, 3, 4, 5, 6},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 2200 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
}
@@ -218,8 +220,8 @@ func TestRollupResultCache(t *testing.T) {
}
tss = append(tss, ts)
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
tssResult, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss)
tssResult, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 2200 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
}
@@ -247,10 +249,10 @@ func TestRollupResultCache(t *testing.T) {
Values: []float64{0, 1, 2},
},
}
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss1)
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss2)
rollupResultCacheV.Put(funcName, ec, me, nil, window, tss3)
tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
rollupResultCacheV.Put(ec, fe, window, tss1)
rollupResultCacheV.Put(ec, fe, window, tss2)
rollupResultCacheV.Put(ec, fe, window, tss3)
tss, newStart := rollupResultCacheV.Get(ec, fe, window)
if newStart != 1400 {
t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
}

View File

@@ -59,7 +59,7 @@ func TestRollupIderivDuplicateTimestamps(t *testing.T) {
}
n = rollupIderiv(rfa)
if n != 500 {
t.Fatalf("unexpected value; got %v; want %v", n, 0.5)
t.Fatalf("unexpected value; got %v; want %v", n, 500)
}
rfa = &rollupFuncArg{
@@ -192,6 +192,52 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpecte
}
}
func TestRollupShareLEOverTime(t *testing.T) {
f := func(le, vExpected float64) {
t.Helper()
les := []*timeseries{{
Values: []float64{le},
Timestamps: []int64{123},
}}
var me metricsql.MetricExpr
args := []interface{}{&metricsql.RollupExpr{Expr: &me}, les}
testRollupFunc(t, "share_le_over_time", args, &me, vExpected)
}
f(-123, 0)
f(0, 0)
f(10, 0)
f(12, 0.08333333333333333)
f(30, 0.16666666666666666)
f(50, 0.75)
f(100, 0.9166666666666666)
f(123, 1)
f(1000, 1)
}
func TestRollupShareGTOverTime(t *testing.T) {
f := func(gt, vExpected float64) {
t.Helper()
gts := []*timeseries{{
Values: []float64{gt},
Timestamps: []int64{123},
}}
var me metricsql.MetricExpr
args := []interface{}{&metricsql.RollupExpr{Expr: &me}, gts}
testRollupFunc(t, "share_gt_over_time", args, &me, vExpected)
}
f(-123, 1)
f(0, 1)
f(10, 1)
f(12, 0.9166666666666666)
f(30, 0.8333333333333334)
f(50, 0.25)
f(100, 0.08333333333333333)
f(123, 0)
f(1000, 0)
}
func TestRollupQuantileOverTime(t *testing.T) {
f := func(phi, vExpected float64) {
t.Helper()
@@ -264,6 +310,48 @@ func TestRollupHoltWinters(t *testing.T) {
f(0.9, 0.9, 33.99637566941818)
}
func TestRollupHoeffdingBoundLower(t *testing.T) {
f := func(phi, vExpected float64) {
t.Helper()
phis := []*timeseries{{
Values: []float64{phi},
Timestamps: []int64{123},
}}
var me metricsql.MetricExpr
args := []interface{}{phis, &metricsql.RollupExpr{Expr: &me}}
testRollupFunc(t, "hoeffding_bound_lower", args, &me, vExpected)
}
f(0.5, 28.21949401521037)
f(-1, 47.083333333333336)
f(0, 47.083333333333336)
f(1, -inf)
f(2, -inf)
f(0.1, 39.72878000047643)
f(0.9, 12.701803086472331)
}
func TestRollupHoeffdingBoundUpper(t *testing.T) {
f := func(phi, vExpected float64) {
t.Helper()
phis := []*timeseries{{
Values: []float64{phi},
Timestamps: []int64{123},
}}
var me metricsql.MetricExpr
args := []interface{}{phis, &metricsql.RollupExpr{Expr: &me}}
testRollupFunc(t, "hoeffding_bound_upper", args, &me, vExpected)
}
f(0.5, 65.9471726514563)
f(-1, 47.083333333333336)
f(0, 47.083333333333336)
f(1, inf)
f(2, inf)
f(0.1, 54.43788666619024)
f(0.9, 81.46486358019433)
}
func TestRollupNewRollupFuncSuccess(t *testing.T) {
f := func(funcName string, vExpected float64) {
t.Helper()
@@ -276,15 +364,18 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
f("changes", 11)
f("delta", 34)
f("deriv", -266.85860231406065)
f("deriv_fast", 272)
f("deriv_fast", -712)
f("idelta", 0)
f("increase", 398)
f("irate", 0)
f("rate", 3184)
f("rate", 2200)
f("resets", 5)
f("range_over_time", 111)
f("avg_over_time", 47.083333333333336)
f("min_over_time", 12)
f("max_over_time", 123)
f("tmin_over_time", 0.08)
f("tmax_over_time", 0.005)
f("sum_over_time", 565)
f("sum2_over_time", 37951)
f("geomean_over_time", 39.33466603189148)
@@ -411,7 +502,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 123, 123, 123, 34, 34}
valuesExpected := []float64{nan, 123, nan, 34, nan, 44}
timestampsExpected := []int64{0, 5, 10, 15, 20, 25}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -425,7 +516,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{12, 44, 34, nan}
valuesExpected := []float64{44, 32, 34, nan}
timestampsExpected := []int64{100, 120, 140, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -439,7 +530,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, nan, 123, 54, 44}
valuesExpected := []float64{nan, nan, 123, 34, 32}
timestampsExpected := []int64{-50, 0, 50, 100, 150}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -501,7 +592,7 @@ func TestRollupFuncsLookbackDelta(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{99, 12, 44, nan, 32, 34, nan}
valuesExpected := []float64{99, nan, 44, nan, 32, 34, nan}
timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -515,7 +606,7 @@ func TestRollupFuncsLookbackDelta(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{99, 12, 44, 44, 32, 34, nan}
valuesExpected := []float64{99, nan, 44, nan, 32, 34, nan}
timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -529,7 +620,7 @@ func TestRollupFuncsLookbackDelta(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{34, 12, 12, 44, 44, 34, nan}
valuesExpected := []float64{99, nan, 44, nan, 32, 34, nan}
timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -546,7 +637,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 123, 21, 12, 34}
valuesExpected := []float64{nan, 123, 54, 44, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -574,7 +665,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 21, 12, 12, 34}
valuesExpected := []float64{nan, 21, 12, 32, 34}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -784,7 +875,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, nan, 30750, 0, -8900, 0}
valuesExpected := []float64{nan, nan, nan, 0, -8900, 0}
timestampsExpected := []int64{0, 4, 8, 12, 16, 20}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})

View File

@@ -65,9 +65,12 @@ var transformFuncs = map[string]transformFunc{
"label_move": transformLabelMove,
"label_transform": transformLabelTransform,
"label_value": transformLabelValue,
"label_match": transformLabelMatch,
"label_mismatch": transformLabelMismatch,
"union": transformUnion,
"": transformUnion, // empty func is a synonim to union
"keep_last_value": transformKeepLastValue,
"keep_next_value": transformKeepNextValue,
"start": newTransformFuncZeroArgs(transformStart),
"end": newTransformFuncZeroArgs(transformEnd),
"step": newTransformFuncZeroArgs(transformStep),
@@ -93,6 +96,9 @@ var transformFuncs = map[string]transformFunc{
"asin": newTransformFuncOneArg(transformAsin),
"acos": newTransformFuncOneArg(transformAcos),
"prometheus_buckets": transformPrometheusBuckets,
"histogram_share": transformHistogramShare,
"sort_by_label": newTransformFuncSortByLabel(false),
"sort_by_label_desc": newTransformFuncSortByLabel(true),
}
func getTransformFunc(s string) transformFunc {
@@ -146,29 +152,10 @@ func transformAbsent(tfa *transformFuncArg) ([]*timeseries, error) {
return nil, err
}
arg := args[0]
if len(arg) == 0 {
// Copy tags from arg
rvs := evalNumber(tfa.ec, 1)
rv := rvs[0]
me, ok := tfa.fe.Args[0].(*metricsql.MetricExpr)
if !ok {
return rvs, nil
}
tfs := toTagFilters(me.LabelFilters)
for i := range tfs {
tf := &tfs[i]
if len(tf.Key) == 0 {
continue
}
if tf.IsRegexp || tf.IsNegative {
continue
}
rv.MetricName.AddTagBytes(tf.Key, tf.Value)
}
rvs := getAbsentTimeseries(tfa.ec, tfa.fe.Args[0])
return rvs, nil
}
for _, ts := range arg {
ts.MetricName.ResetMetricGroup()
for i, v := range ts.Values {
@@ -183,6 +170,28 @@ func transformAbsent(tfa *transformFuncArg) ([]*timeseries, error) {
return arg, nil
}
func getAbsentTimeseries(ec *EvalConfig, arg metricsql.Expr) []*timeseries {
// Copy tags from arg
rvs := evalNumber(ec, 1)
rv := rvs[0]
me, ok := arg.(*metricsql.MetricExpr)
if !ok {
return rvs
}
tfs := toTagFilters(me.LabelFilters)
for i := range tfs {
tf := &tfs[i]
if len(tf.Key) == 0 {
continue
}
if tf.IsRegexp || tf.IsNegative {
continue
}
rv.MetricName.AddTagBytes(tf.Key, tf.Value)
}
return rvs
}
func transformCeil(v float64) float64 {
return math.Ceil(v)
}
@@ -357,6 +366,7 @@ func vmrangeBucketsToLE(tss []*timeseries) []*timeseries {
ts := xs.ts
if isZeroTS(ts) {
// Skip time series with zeros. They are substituted by xssNew below.
xsPrev = xs
continue
}
if xs.start != xsPrev.end {
@@ -396,6 +406,105 @@ func vmrangeBucketsToLE(tss []*timeseries) []*timeseries {
return rvs
}
func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 2 || len(args) > 3 {
return nil, fmt.Errorf("unexpected number of args; got %d; want 2...3", len(args))
}
les, err := getScalar(args[0], 0)
if err != nil {
return nil, fmt.Errorf("cannot parse le: %s", err)
}
// Convert buckets with `vmrange` labels to buckets with `le` labels.
tss := vmrangeBucketsToLE(args[1])
// Parse boundsLabel. See https://github.com/prometheus/prometheus/issues/5706 for details.
var boundsLabel string
if len(args) > 2 {
s, err := getString(args[2], 2)
if err != nil {
return nil, fmt.Errorf("cannot parse boundsLabel (arg #3): %s", err)
}
boundsLabel = s
}
// Group metrics by all tags excluding "le"
m := groupLeTimeseries(tss)
// Calculate share for les
share := func(i int, les []float64, xss []leTimeseries) (q, lower, upper float64) {
leReq := les[i]
if math.IsNaN(leReq) || len(xss) == 0 {
return nan, nan, nan
}
fixBrokenBuckets(i, xss)
if leReq < 0 {
return 0, 0, 0
}
if math.IsInf(leReq, 1) {
return 1, 1, 1
}
var vPrev, lePrev float64
for _, xs := range xss {
v := xs.ts.Values[i]
le := xs.le
if leReq >= le {
vPrev = v
lePrev = le
continue
}
// precondition: lePrev <= leReq < le
vLast := xss[len(xss)-1].ts.Values[i]
lower = vPrev / vLast
if math.IsInf(le, 1) {
return lower, lower, 1
}
if lePrev == leReq {
return lower, lower, lower
}
upper = v / vLast
q = lower + (v-vPrev)/vLast*(leReq-lePrev)/(le-lePrev)
return q, lower, upper
}
// precondition: leReq > leLast
return 1, 1, 1
}
rvs := make([]*timeseries, 0, len(m))
for _, xss := range m {
sort.Slice(xss, func(i, j int) bool {
return xss[i].le < xss[j].le
})
dst := xss[0].ts
var tsLower, tsUpper *timeseries
if len(boundsLabel) > 0 {
tsLower = &timeseries{}
tsLower.CopyFromShallowTimestamps(dst)
tsLower.MetricName.RemoveTag(boundsLabel)
tsLower.MetricName.AddTag(boundsLabel, "lower")
tsUpper = &timeseries{}
tsUpper.CopyFromShallowTimestamps(dst)
tsUpper.MetricName.RemoveTag(boundsLabel)
tsUpper.MetricName.AddTag(boundsLabel, "upper")
}
for i := range dst.Values {
q, lower, upper := share(i, les, xss)
dst.Values[i] = q
if len(boundsLabel) > 0 {
tsLower.Values[i] = lower
tsUpper.Values[i] = upper
}
}
rvs = append(rvs, dst)
if len(boundsLabel) > 0 {
rvs = append(rvs, tsLower)
rvs = append(rvs, tsUpper)
}
}
return rvs, nil
}
func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 2 || len(args) > 3 {
@@ -420,73 +529,35 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
}
// Group metrics by all tags excluding "le"
type x struct {
le float64
ts *timeseries
}
m := make(map[string][]x)
bb := bbPool.Get()
for _, ts := range tss {
tagValue := ts.MetricName.GetTagValue("le")
if len(tagValue) == 0 {
continue
}
le, err := strconv.ParseFloat(bytesutil.ToUnsafeString(tagValue), 64)
if err != nil {
continue
}
ts.MetricName.ResetMetricGroup()
ts.MetricName.RemoveTag("le")
bb.B = marshalMetricTagsSorted(bb.B[:0], &ts.MetricName)
m[string(bb.B)] = append(m[string(bb.B)], x{
le: le,
ts: ts,
})
}
bbPool.Put(bb)
m := groupLeTimeseries(tss)
// Calculate quantile for each group in m
lastNonInf := func(i int, xss []x) float64 {
lastNonInf := func(i int, xss []leTimeseries) float64 {
for len(xss) > 0 {
xsLast := xss[len(xss)-1]
v := xsLast.ts.Values[i]
if v == 0 {
return nan
}
if !math.IsNaN(v) && !math.IsInf(xsLast.le, 0) {
if !math.IsInf(xsLast.le, 0) {
return xsLast.le
}
xss = xss[:len(xss)-1]
}
return nan
}
quantile := func(i int, phis []float64, xss []x) (q, lower, upper float64) {
quantile := func(i int, phis []float64, xss []leTimeseries) (q, lower, upper float64) {
phi := phis[i]
if math.IsNaN(phi) {
return nan, nan, nan
}
// Fix broken buckets.
// They are already sorted by le, so their values must be in ascending order,
// since the next bucket value includes all the previous buckets.
vPrev := float64(0)
for _, xs := range xss {
v := xs.ts.Values[i]
if v < vPrev {
xs.ts.Values[i] = vPrev
} else if !math.IsNaN(v) {
vPrev = v
}
}
vLast := nan
for len(xss) > 0 {
fixBrokenBuckets(i, xss)
vLast := float64(0)
if len(xss) > 0 {
vLast = xss[len(xss)-1].ts.Values[i]
if !math.IsNaN(vLast) {
break
}
xss = xss[:len(xss)-1]
}
if vLast == 0 || math.IsNaN(vLast) {
if vLast == 0 {
return nan, nan, nan
}
if phi < 0 {
@@ -496,15 +567,10 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
return inf, vLast, inf
}
vReq := vLast * phi
vPrev = 0
vPrev := float64(0)
lePrev := float64(0)
for _, xs := range xss {
v := xs.ts.Values[i]
if math.IsNaN(v) {
// Skip NaNs - they may appear if the selected time range
// contains multiple different bucket sets.
continue
}
le := xs.le
if v <= 0 {
// Skip zero buckets.
@@ -563,6 +629,50 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
return rvs, nil
}
type leTimeseries struct {
le float64
ts *timeseries
}
func groupLeTimeseries(tss []*timeseries) map[string][]leTimeseries {
m := make(map[string][]leTimeseries)
bb := bbPool.Get()
for _, ts := range tss {
tagValue := ts.MetricName.GetTagValue("le")
if len(tagValue) == 0 {
continue
}
le, err := strconv.ParseFloat(bytesutil.ToUnsafeString(tagValue), 64)
if err != nil {
continue
}
ts.MetricName.ResetMetricGroup()
ts.MetricName.RemoveTag("le")
bb.B = marshalMetricTagsSorted(bb.B[:0], &ts.MetricName)
m[string(bb.B)] = append(m[string(bb.B)], leTimeseries{
le: le,
ts: ts,
})
}
bbPool.Put(bb)
return m
}
func fixBrokenBuckets(i int, xss []leTimeseries) {
// Fix broken buckets.
// They are already sorted by le, so their values must be in ascending order,
// since the next bucket includes all the previous buckets.
vPrev := float64(0)
for _, xs := range xss {
v := xs.ts.Values[i]
if v < vPrev || math.IsNaN(v) {
xs.ts.Values[i] = vPrev
} else {
vPrev = v
}
}
}
func transformHour(t time.Time) int {
return t.Hour()
}
@@ -617,13 +727,37 @@ func transformKeepLastValue(tfa *transformFuncArg) ([]*timeseries, error) {
if len(values) == 0 {
continue
}
prevValue := values[0]
lastValue := values[0]
for i, v := range values {
if math.IsNaN(v) {
v = prevValue
if !math.IsNaN(v) {
lastValue = v
continue
}
values[i] = v
prevValue = v
values[i] = lastValue
}
}
return rvs, nil
}
func transformKeepNextValue(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
}
rvs := args[0]
for _, ts := range rvs {
values := ts.Values
if len(values) == 0 {
continue
}
nextValue := values[len(values)-1]
for i := len(values) - 1; i >= 0; i-- {
v := values[i]
if !math.IsNaN(v) {
nextValue = v
continue
}
values[i] = nextValue
}
}
return rvs, nil
@@ -688,10 +822,6 @@ func transformRangeQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
hf.Reset()
lastIdx := -1
values := ts.Values
if len(values) > 0 {
// Ignore the last value. See Exec func for details.
values = values[:len(values)-1]
}
for i, v := range values {
if math.IsNaN(v) {
continue
@@ -742,14 +872,7 @@ func transformRangeLast(tfa *transformFuncArg) ([]*timeseries, error) {
func setLastValues(tss []*timeseries) {
for _, ts := range tss {
values := ts.Values
if len(values) < 2 {
continue
}
// Do not take into account the last value, since it shouldn't be included
// in the range. See Exec func for details.
values = values[:len(values)-1]
values = skipTrailingNaNs(values)
values := skipTrailingNaNs(ts.Values)
if len(values) == 0 {
continue
}
@@ -1099,6 +1222,62 @@ func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
return rvs, nil
}
func transformLabelMatch(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 3); err != nil {
return nil, err
}
labelName, err := getString(args[1], 1)
if err != nil {
return nil, fmt.Errorf("cannot get label name: %s", err)
}
labelRe, err := getString(args[2], 2)
if err != nil {
return nil, fmt.Errorf("cannot get regexp: %s", err)
}
r, err := metricsql.CompileRegexpAnchored(labelRe)
if err != nil {
return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
}
tss := args[0]
rvs := tss[:0]
for _, ts := range tss {
labelValue := ts.MetricName.GetTagValue(labelName)
if r.Match(labelValue) {
rvs = append(rvs, ts)
}
}
return rvs, nil
}
func transformLabelMismatch(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 3); err != nil {
return nil, err
}
labelName, err := getString(args[1], 1)
if err != nil {
return nil, fmt.Errorf("cannot get label name: %s", err)
}
labelRe, err := getString(args[2], 2)
if err != nil {
return nil, fmt.Errorf("cannot get regexp: %s", err)
}
r, err := metricsql.CompileRegexpAnchored(labelRe)
if err != nil {
return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
}
tss := args[0]
rvs := tss[:0]
for _, ts := range tss {
labelValue := ts.MetricName.GetTagValue(labelName)
if !r.Match(labelValue) {
rvs = append(rvs, ts)
}
}
return rvs, nil
}
func transformLn(v float64) float64 {
return math.Log(v)
}
@@ -1178,6 +1357,29 @@ func transformScalar(tfa *transformFuncArg) ([]*timeseries, error) {
return arg, nil
}
func newTransformFuncSortByLabel(isDesc bool) transformFunc {
return func(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 2); err != nil {
return nil, err
}
label, err := getString(args[1], 1)
if err != nil {
return nil, fmt.Errorf("cannot parse label name for sorting: %s", err)
}
rvs := args[0]
sort.SliceStable(rvs, func(i, j int) bool {
a := rvs[i].MetricName.GetTagValue(label)
b := rvs[j].MetricName.GetTagValue(label)
if isDesc {
return string(b) < string(a)
}
return string(a) < string(b)
})
return rvs, nil
}
}
func newTransformFuncSort(isDesc bool) transformFunc {
return func(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
@@ -1190,7 +1392,7 @@ func newTransformFuncSort(isDesc bool) transformFunc {
b := rvs[j].Values
n := len(a) - 1
for n >= 0 {
if !math.IsNaN(a[n]) && !math.IsNaN(b[n]) {
if !math.IsNaN(a[n]) && !math.IsNaN(b[n]) && a[n] != b[n] {
break
}
n--
@@ -1198,11 +1400,10 @@ func newTransformFuncSort(isDesc bool) transformFunc {
if n < 0 {
return false
}
cmp := a[n] < b[n]
if isDesc {
cmp = !cmp
return b[n] < a[n]
}
return cmp
return a[n] < b[n]
})
return rvs, nil
}
@@ -1333,9 +1534,7 @@ func transformStart(tfa *transformFuncArg) float64 {
}
func transformEnd(tfa *transformFuncArg) float64 {
// Subtract step from end, since it shouldn't go to the range.
// See Exec func for details.
return float64(tfa.ec.End-tfa.ec.Step) * 1e-3
return float64(tfa.ec.End) * 1e-3
}
// copyTimeseriesMetricNames returns a copy of arg with real copy of MetricNames,

View File

@@ -62,8 +62,8 @@ func InitWithoutMetrics() {
blocksCount := tm.SmallBlocksCount + tm.BigBlocksCount
rowsCount := tm.SmallRowsCount + tm.BigRowsCount
sizeBytes := tm.SmallSizeBytes + tm.BigSizeBytes
logger.Infof("successfully opened storage %q in %s; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d",
*DataPath, time.Since(startTime), partsCount, blocksCount, rowsCount, sizeBytes)
logger.Infof("successfully opened storage %q in %.3f seconds; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d",
*DataPath, time.Since(startTime).Seconds(), partsCount, blocksCount, rowsCount, sizeBytes)
}
// Storage is a storage.
@@ -133,7 +133,7 @@ func Stop() {
startTime := time.Now()
WG.WaitAndBlock()
Storage.MustClose()
logger.Infof("successfully closed the storage in %s", time.Since(startTime))
logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())
logger.Infof("the storage has been stopped")
}
@@ -374,6 +374,10 @@ func registerStorageMetrics() {
return float64(idbm().SizeBytes)
})
metrics.NewGauge(`vm_deduplicated_samples_total{type="merge"}`, func() float64 {
return float64(m().DedupsDuringMerge)
})
metrics.NewGauge(`vm_rows_ignored_total{reason="big_timestamp"}`, func() float64 {
return float64(m().TooBigTimestampRows)
})
@@ -461,6 +465,9 @@ func registerStorageMetrics() {
metrics.NewGauge(`vm_cache_entries{type="storage/regexps"}`, func() float64 {
return float64(storage.RegexpCacheSize())
})
metrics.NewGauge(`vm_cache_size_entries{type="storage/prefetchedMetricIDs"}`, func() float64 {
return float64(m().PrefetchedMetricIDsSize)
})
metrics.NewGauge(`vm_cache_size_bytes{type="storage/tsid"}`, func() float64 {
return float64(m().TSIDCacheSizeBytes)
@@ -483,6 +490,9 @@ func registerStorageMetrics() {
metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/uselessTagFilters"}`, func() float64 {
return float64(idbm().UselessTagFiltersCacheSizeBytes)
})
metrics.NewGauge(`vm_cache_size_bytes{type="storage/prefetchedMetricIDs"}`, func() float64 {
return float64(m().PrefetchedMetricIDsSizeBytes)
})
metrics.NewGauge(`vm_cache_requests_total{type="storage/tsid"}`, func() float64 {
return float64(m().TSIDCacheRequests)

View File

@@ -14,7 +14,7 @@
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.5.0"
"version": "6.5.2"
},
{
"type": "panel",
@@ -60,12 +60,12 @@
}
]
},
"description": "Overview for single node VictoriaMetrics v1.30.3 or higher",
"description": "Overview for single node VictoriaMetrics v1.32.8 or higher",
"editable": true,
"gnetId": 10229,
"graphTooltip": 0,
"id": null,
"iteration": 1575825261972,
"iteration": 1580634216692,
"links": [
{
"icon": "doc",
@@ -1565,7 +1565,7 @@
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"description": "How many datapoints are in RAM queue waiting to be written into storage. The less is better.",
"description": "How many datapoints are in RAM queue waiting to be written into storage. The number of pending data points should be in the range from 0 to `2*<ingestion_rate>`, since VictoriaMetrics pushes pending data to persistent storage every second.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2930,7 +2930,7 @@
]
},
"time": {
"from": "now-1h",
"from": "now-30m",
"to": "now"
},
"timepicker": {
@@ -2961,5 +2961,5 @@
"timezone": "",
"title": "VictoriaMetrics",
"uid": "wNf0q_kZk",
"version": 1
"version": 2
}

View File

@@ -1,7 +1,7 @@
# All these commands must run from repository root.
DOCKER_NAMESPACE := docker.io/victoriametrics
BUILDER_IMAGE := local/builder:go1.13.5
BUILDER_IMAGE := local/builder:go1.14.0
CERTS_IMAGE := local/certs:1.0.3
package-certs:

View File

@@ -1,2 +1,2 @@
FROM golang:1.13.5
FROM golang:1.14.0
STOPSIGNAL SIGINT

View File

@@ -2,7 +2,7 @@ version: '3.5'
services:
prometheus:
container_name: prometheus
image: prom/prometheus:v2.14.0
image: prom/prometheus:v2.15.2
depends_on:
- "victoriametrics"
ports:
@@ -35,7 +35,7 @@ services:
restart: always
grafana:
container_name: grafana
image: grafana/grafana:6.5.0
image: grafana/grafana:6.5.2
entrypoint: >
/bin/sh -c "
cd /var/lib/grafana &&

View File

@@ -20,3 +20,4 @@
* [Evaluation performance and correctness: VictoriaMetrics response](https://medium.com/@valyala/evaluating-performance-and-correctness-victoriametrics-response-e27315627e87)
* [Improving histogram usability for Prometheus and Grafana](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
* [Prometheus storage: tech terms for humans](https://medium.com/@valyala/prometheus-storage-technical-terms-for-humans-4ab4de6c3d48)
* [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)

View File

@@ -1,8 +1,28 @@
## Case studies
## Case studies and talks
Below are approved public case studies from VictoriaMetrics users. Join our [community Slack channel](http://slack.victoriametrics.com/)
Below are approved public case studies and talks from VictoriaMetrics users. Join our [community Slack channel](http://slack.victoriametrics.com/)
and feel free asking for references, reviews and additional case studies from real VictoriaMetrics users there.
### Adidas
See [slides](https://promcon.io/2019-munich/slides/remote-write-storage-wars.pdf) and [video](https://youtu.be/OsH6gPdxR4s)
from [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk at [PromCon 2019](https://promcon.io/2019-munich/).
VictoriaMetrics is compared to Thanos, Corex and M3DB in the talk.
### COLOPL
[COLOPL](http://www.colopl.co.jp/en/) is Japaneese Game Development company. It started using VictoriaMetrics
after evaulating the following remote storage solutions for Prometheus:
* Cortex
* Thanos
* M3DB
* VictoriaMetrics
See [slides](https://speakerdeck.com/inletorder/monitoring-platform-with-victoria-metrics) and [video](https://www.youtube.com/watch?v=hUpHIluxw80)
from `Large-scale, super-load system monitoring platform built with VictoriaMetrics` talk at [Prometheus Meetup Tokyo #3](https://prometheus.connpass.com/event/157721/).
### Wix.com
@@ -30,7 +50,7 @@ Numbers:
* Enough head room/scaling capacity for future growth, up to 100M active time series.
* Ability to split DB replicas per workload. Alert queries go to one replica, user queries go to another (speed for users, effective cache).
> Optimizing for those points and our specific workload VictoriaMetrics proved to be the best option. As an icing on a cake weve got [PromQL extensions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL) - `default 0` and `histogram` are my favorite ones, for example. What we specially like is having a lot of tsdb params easily available via config options, that makes tsdb easy to tune for specific use case. Also worth noting is a great community in [Slack channel](http://slack.victoriametrics.com/) and of course maintainer support.
> Optimizing for those points and our specific workload VictoriaMetrics proved to be the best option. As an icing on a cake weve got [PromQL extensions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) - `default 0` and `histogram` are my favorite ones, for example. What we specially like is having a lot of tsdb params easily available via config options, that makes tsdb easy to tune for specific use case. Also worth noting is a great community in [Slack channel](http://slack.victoriametrics.com/) and of course maintainer support.
Alex Ulstein, Head of Monitoring, Wix.com

View File

@@ -3,7 +3,7 @@
VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.
It is recommended using [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics) instead of cluster version
for ingestion rates lower than 10 million of data points per second.
for ingestion rates lower than a million of data points per second.
Single-node version [scales perfectly](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
with the number of CPU cores, RAM and available storage space.
Single-node version is easier to configure and operate comparing to cluster version, so think twice before sticking to cluster version.
@@ -133,6 +133,7 @@ with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafan
- `<suffix>` may have the following values:
- `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
- `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
- `opentsdb/api/put` - for accepting [OpenTSDB HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html).
- `prometheus/api/v1/import` - for importing data obtained via `api/v1/export` on `vmselect` (see below).
* URLs for querying: `http://<vmselect>:8481/select/<accountID>/prometheus/<suffix>`, where:

View File

@@ -1,90 +1,3 @@
# MetricsQL
VictoriaMetrics implements MetricsQL - query language inspired by [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
It is backwards compatible with PromQL, so Grafana dashboards backed by Prometheus datasource should work the same after switching from Prometheus to VictoriaMetrics.
The following functionality is implemented differently in MetricsQL comparing to PromQL in order to improve user experience:
* MetricsQL takes into account the previous point before the window in square brackets for range functions such as `rate` and `increase`.
It also doesn't extrapolate range function results. This addresses [this issue from Prometheus](https://github.com/prometheus/prometheus/issues/3746).
* MetricsQL returns the expected non-empty responses for requests with `step` values smaller than scrape interval. This addresses [this issue from Grafana](https://github.com/grafana/grafana/issues/11451).
* MetricsQL treats `scalar` type the same as `instant vector` without labels, since subtle difference between these types usually confuses users.
See [the corresponding Prometheus docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#expression-language-data-types) for details.
Other PromQL functionality should work the same in MetricsQL. [File an issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues)
if you notice discrepancies between PromQL and MetricsQL results other than mentioned above.
MetricsQL provides additional functionality mentioned below, which is aimed towards solving practical cases.
Feel free [filing a feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you think MetricsQL misses certain useful functionality.
*Note that the functionality mentioned below doesn't work in PromQL, so it is impossible switching back to Prometheus after you start using it.*
This functionality can be tried at [an editable Grafana dashboard](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo).
- [`WITH` templates](https://play.victoriametrics.com/promql/expand-with-exprs). This feature simplifies writing and managing complex queries. Go to [`WITH` templates playground](https://victoriametrics.com/promql/expand-with-exprs) and try it.
- Metric names and metric labels may contain escaped chars. For instance, `foo\-bar{baz\=aa="b"}` is valid expression. It returns time series with name `foo-bar` containing label `baz=aa` with value `b`. Additionally, `\xXX` escape sequence is supported, where `XX` is hexadecimal representation of escaped char.
- `offset`, range duration and step value for range vector may refer to the current step aka `$__interval` value from Grafana.
For instance, `rate(metric[10i] offset 5i)` would return per-second rate over a range covering 10 previous steps with the offset of 5 steps.
- `offset` may be put anywere in the query. For instance, `sum(foo) offset 24h`.
- `offset` may be negative. For example, `q offset -1h`.
- `default` binary operator. `q1 default q2` substitutes `NaN` values from `q1` with the corresponding values from `q2`.
- `histogram_quantile` accepts optional third arg - `boundsLabel`. In this case it returns `lower` and `upper` bounds for the estimated percentile. See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
- `if` binary operator. `q1 if q2` removes values from `q1` for `NaN` values from `q2`.
- `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for non-`NaN` values from `q2`.
- Trailing commas on all the lists are allowed - label filters, function args and with expressions. For instance, the following queries are valid: `m{foo="bar",}`, `f(a, b,)`, `WITH (x=y,) x`. This simplifies maintenance of multi-line queries.
- String literals may be concatenated. This is useful with `WITH` templates: `WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`.
- Range duration in functions such as [rate](https://prometheus.io/docs/prometheus/latest/querying/functions/#rate()) may be omitted. VictoriaMetrics automatically selects range duration depending on the current step used for building the graph. For instance, the following query is valid in VictoriaMetrics: `rate(node_network_receive_bytes_total)`.
- [Range duration](https://prometheus.io/docs/prometheus/latest/querying/basics/#range-vector-selectors) and [offset](https://prometheus.io/docs/prometheus/latest/querying/basics/#offset-modifier) may be fractional. For instance, `rate(node_network_receive_bytes_total[1.5m] offset 0.5d)`.
- Comments starting with `#` and ending with newline. For instance, `up # this is a comment for 'up' metric`.
- Rollup functions - `rollup(m[d])`, `rollup_rate(m[d])`, `rollup_deriv(m[d])`, `rollup_increase(m[d])`, `rollup_delta(m[d])` - return `min`, `max` and `avg`
values for all the `m` data points over `d` duration.
- `rollup_candlestick(m[d])` - returns `open`, `close`, `low` and `high` values (OHLC) for all the `m` data points over `d` duration. This function is useful for financial applications.
- `union(q1, ... qN)` function for building multiple graphs for `q1`, ... `qN` subqueries with a single query. The `union` function name may be skipped -
the following queries are equivalent: `union(q1, q2)` and `(q1, q2)`.
- `ru(freeResources, maxResources)` function for returning resource utilization percentage in the range `0% - 100%`. For instance, `ru(node_memory_MemFree_bytes, node_memory_MemTotal_bytes)` returns memory utilization over [node_exporter](https://github.com/prometheus/node_exporter) metrics.
- `ttf(slowlyChangingFreeResources)` function for returning the time in seconds when the given `slowlyChangingFreeResources` expression reaches zero. For instance, `ttf(node_filesystem_avail_byte)` returns the time to storage space exhaustion. This function may be useful for capacity planning.
- Functions for label manipulation:
- `alias(q, name)` for setting metric name across all the time series `q`.
- `label_set(q, label1, value1, ... labelN, valueN)` for setting the given values for the given labels on `q`.
- `label_del(q, label1, ... labelN)` for deleting the given labels from `q`.
- `label_keep(q, label1, ... labelN)` for deleting all the labels except the given labels from `q`.
- `label_copy(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for copying label values from `src_*` to `dst_*`.
- `label_move(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for moving label values from `src_*` to `dst_*`.
- `label_transform(q, label, regexp, replacement)` for replacing all the `regexp` occurences with `replacement` in the `label` values from `q`.
- `label_value(q, label)` - returns numeric values for the given `label` from `q`.
- `step()` function for returning the step in seconds used in the query.
- `start()` and `end()` functions for returning the start and end timestamps of the `[start ... end]` range used in the query.
- `integrate(m[d])` for returning integral over the given duration `d` for the given metric `m`.
- `ideriv(m)` - for calculating `instant` derivative for `m`.
- `deriv_fast(m[d])` - for calculating `fast` derivative for `m` based on the first and the last points from duration `d`.
- `running_` functions - `running_sum`, `running_min`, `running_max`, `running_avg` - for calculating [running values](https://en.wikipedia.org/wiki/Running_total) on the selected time range.
- `range_` functions - `range_sum`, `range_min`, `range_max`, `range_avg`, `range_first`, `range_last`, `range_median`, `range_quantile` - for calculating global value over the selected time range.
- `smooth_exponential(q, sf)` - smooths `q` using [exponential moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) with the given smooth factor `sf`.
- `remove_resets(q)` - removes counter resets from `q`.
- `lag(q[d])` - returns lag between the current timestamp and the timestamp from the previous data point in `q` over `d`.
- `lifetime(q[d])` - returns lifetime of `q` over `d` in seconds. It is expected that `d` exceeds the lifetime of `q`.
- `scrape_interval(q[d])` - returns the average interval in seconds between data points of `q` over `d` aka `scrape interval`.
- Trigonometric functions - `sin(q)`, `cos(q)`, `asin(q)`, `acos(q)` and `pi()`.
- `median_over_time(m[d])` - calculates median values for `m` over `d` time window. Shorthand to `quantile_over_time(0.5, m[d])`.
- `median(q)` - median aggregate. Shorthand to `quantile(0.5, q)`.
- `limitk(k, q)` - limits the number of time series returned from `q` to `k`.
- `keep_last_value(q)` - fills missing data (gaps) in `q` with the previous value.
- `distinct_over_time(m[d])` - returns distinct number of values for `m` data points over `d` duration.
- `distinct(q)` - returns a time series with the number of unique values for each timestamp in `q`.
- `sum2_over_time(m[d])` - returns sum of squares for all the `m` values over `d` duration.
- `sum2(q)` - returns a time series with sum of square values for each timestamp in `q`.
- `geomean_over_time(m[d])` - returns [geomean](https://en.wikipedia.org/wiki/Geometric_mean) value for all the `m` value over `d` duration.
- `geomean(q)` - returns a time series with [geomean](https://en.wikipedia.org/wiki/Geometric_mean) value for each timestamp in `q`.
- `rand()`, `rand_normal()` and `rand_exponential()` functions - for generating pseudo-random series with even, normal and exponential distribution.
- `increases_over_time(m[d])` and `decreases_over_time(m[d])` - returns the number of `m` increases or decreases over the given duration `d`.
- `prometheus_buckets(q)` - converts [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) buckets to Prometheus buckets with `le` labels.
- `histogram(q)` - calculates aggregate histogram over `q` time series for each point on the graph. See [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) for more details.
- `topk_*` and `bottomk_*` aggregate functions, which return up to K time series. Note that the standard `topk` function may return more than K time series -
see [this article](https://www.robustperception.io/graph-top-n-time-series-in-grafana) for details.
- `topk_min(k, q)` - returns top K time series with the max minimums on the given time range
- `topk_max(k, q)` - returns top K time series with the max maximums on the given time range
- `topk_avg(k, q)` - returns top K time series with the max averages on the given time range
- `topk_median(k, q)` - returns top K time series with the max medians on the given time range
- `bottomk_min(k, q)` - returns bottom K time series with the min minimums on the given time range
- `bottomk_max(k, q)` - returns bottom K time series with the min maximums on the given time range
- `bottomk_avg(k, q)` - returns bottom K time series with the min averages on the given time range
- `bottomk_median(k, q)` - returns bottom K time series with the min medians on the given time range
The page has been moved to [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL).

View File

@@ -8,7 +8,7 @@ To provide the best long-term [remote storage](https://prometheus.io/docs/operat
### Which features does VictoriaMetrics have?
* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL).
* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
[Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
@@ -16,7 +16,7 @@ To provide the best long-term [remote storage](https://prometheus.io/docs/operat
* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
may be crammed into a limited storage comparing to TimescaleDB.
* Optimized for storage with high-latency IO and low iops (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
and [comparing Thanos to VictoriaMetrics](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
* Easy operation:
@@ -54,14 +54,56 @@ Yes. Prometheus continues writing data to local storage after enabling remote st
and new data is available for querying via Prometheus as usual.
### How does VictoriaMetrics compare to other clustered TSDBs on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/improbable-eng/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.?
### How does VictoriaMetrics compare to other clustered TSDBs on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.?
VictoriaMetrics is simpler, faster, more cost-effective and it provides [useful extensions for PromQL](ExtendedPromQL). The simplicity is twofold:
- It is simpler to configure and operate. There is no need in configuring third-party [sidecars](https://github.com/improbable-eng/thanos/blob/master/docs/components/sidecar.md)
or fighting with [gossip protocol](https://github.com/improbable-eng/thanos/blob/master/docs/proposals/completed/201809_gossip-removal.md).
- VictoriaMetrics has simpler architecture, which means less bugs and more useful features in a long run comparing to competing TSDBs.
VictoriaMetrics is simpler, faster, more cost-effective and it provides [MetricsQL with useful extensions for PromQL](MetricsQL). The simplicity is twofold:
- It is simpler to configure and operate. There is no need in configuring third-party [sidecars](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md)
or fighting with [gossip protocol](https://github.com/thanos-io/thanos/blob/master/docs/proposals/completed/201809_gossip-removal.md).
- VictoriaMetrics has simpler architecture, which means less bugs and more useful features in the long run comparing to competing TSDBs.
See [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
See [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
VictoriaMetrics also [uses less RAM than Thanos components](https://github.com/thanos-io/thanos/issues/448).
### What is the difference between VictoriaMetrics and [Cortex](https://github.com/cortexproject/cortex)?
VictoriaMetrics is similar to Cortex in the following aspects:
- Both systems accept data from Prometheus via standard [remote_write API](https://prometheus.io/docs/practices/remote_write/),
i.e. there is no need in running sidecars unlike in [Thanos](https://github.com/thanos-io/thanos) case.
- Both systems support multi-tenancy out of the box. See [the corresponding docs for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format).
The main differences between Corex and VictoriaMetrics:
- Cortex re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
- Cortex provides [Ruler](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ruler) and [Alertmanager](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#alertmanager) components,
which are currently missing in VictoriaMetrics. However, these components can be substituted by [Promxy](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
- Cortex heavily relies on third-party services such as Consul, Memcache, DynamoDB, BigTable, Cassandra, etc.
This may increase operational complexity and reduce system reliability comparing to VictoriaMetrics' case,
which doesn't use any external services. Compare [Cortex Architecture](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md)
to [VictoriaMetrics architecture](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#architecture-overview).
- VictoriaMetrics provides [production-ready single-node solution](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md),
which is much easier to setup and operate than Cortex cluster.
- Cortex may lose up to 12 hours of recent data on Ingestor failure - see [the corresponding docs](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ingesters-failure-and-data-loss).
VictoriaMetrics may lose only a few seconds of recent data, which isn't synced to persistent storage yet.
See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
- Cortex is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from Adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
### What is the difference between VictoriaMetrics and [Thanos](https://github.com/thanos-io/thanos)?
- Thanos re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
- Thanos provides [Ruler component](https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md),
while VictoriaMetrics relies on [Promxy for alerting and recording rules](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
- VictoriaMetrics accepts data via [standard remote_write API for Prometheus](https://prometheus.io/docs/practices/remote_write/),
while Thanos uses non-standard [Sidecar](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md), which must run alongside each Prometheus instance.
- Thanos Sidecar requires disabling data compaction in Prometheus, which may hurt Prometheus performance and increase RAM usage.
- Thanos stores data on object storage (Amazon S3 or Google GCS), while VictoriaMetrics stores data on block storage (GCP persistent disks, Amazon EBS or bare metal HDD).
- Thanos may lose up to 2 hours of recent data, which wasn't uploaded yet to object storage. VictoriaMetrics may lose only a few seconds of recent data,
which isn't synced to persistent storage yet. See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
- Thanos may be harder to setup and operate comparing to VictoriaMetrics, since it has more moving parts, which can be connected with less reliable networks.
See [this article for details](https://medium.com/faun/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
- Thanos is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from Adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
### How does VictoriaMetrics compare to [InfluxDB](https://www.influxdata.com/time-series-platform/influxdb/)?
@@ -76,12 +118,13 @@ TimescaleDB insists on using SQL as a query language. While SQL is more powerful
Additionally, VictoriaMetrics requires [up to 70x less storage space comparing to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data.
### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/improbable-eng/thanos), [Cortex](https://github.com/cortexproject/cortex)?
### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex)?
No. VictoriaMetrics core is written in Go from scratch by [fasthttp](https://github.com/valyala/fasthttp) [author](https://github.com/valyala).
The architecture is [optimized for storing and querying large amounts of time series data with high cardinality](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac). VictoriaMetrics storage uses [certain ideas from ClickHouse](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). Special thanks to [Alexey Milovidov](https://github.com/alexey-milovidov).
### Are there performance comparisons with other solutions?
Yes:
@@ -115,7 +158,7 @@ This is slow and expensive.
Prometheus remote read API isn't intended for querying foreign data aka `global query view`. See [this issue](https://github.com/prometheus/prometheus/issues/4456) for details.
So just query VictoriaMetrics directly via [Prometheus Querying API](https://prometheus.io/docs/prometheus/latest/querying/api/)
or via [Prometheus datasoruce in Grafana](http://docs.grafana.org/features/datasources/prometheus/).
or via [Prometheus datasource in Grafana](http://docs.grafana.org/features/datasources/prometheus/).
### Does VictoriaMetrics deduplicate data from Prometheus instances scraping the same targets (aka `HA pairs`)?
@@ -133,8 +176,8 @@ The deduplication for Prometheus HA pair may be easily implemented on top of Vic
### Where is the source code of VictoriaMetrics?
Source code for the following versions is available in the following places:
* [Single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics).
* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
* [Single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics)
* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster)
### Does VictoriaMetrics fit for data from IoT sensors and industrial sensors?
@@ -147,7 +190,11 @@ and scales horizontally to multiple nodes.
### Where can I ask questions about VictoriaMetrics?
See [VictoriaMetrics-users group](https://groups.google.com/forum/#!forum/victorametrics-users).
Questions about VictoriaMetrics can be asked via the following channels:
- [Slack channel](http://slack.victoriametrics.com/)
- [Telegram channel](https://t.me/VictoriaMetrics_en)
- [Google group](https://groups.google.com/forum/#!forum/victorametrics-users)
### Where can I file bugs and feature requests regarding VictoriaMetrics?

View File

@@ -3,7 +3,7 @@
* [Quick start](Quick-Start)
* [`WITH` templates playground](https://play.victoriametrics.com/promql/expand-with-exprs)
* [Grafana playground](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo)
* [MetricsQL](ExtendedPromQL)
* [MetricsQL](MetricsQL)
* [Single-node version](Single-server-VictoriaMetrics)
* [FAQ](FAQ)
* [Cluster version](Cluster-VictoriaMetrics)
@@ -11,3 +11,4 @@
* [Case Studies](CaseStudies)
* [vmbackup](vmbackup)
* [vmrestore](vmrestore)
* [vmagent](vmagent)

111
docs/MetricsQL.md Normal file
View File

@@ -0,0 +1,111 @@
# MetricsQL
VictoriaMetrics implements MetricsQL - query language inspired by [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
It is backwards compatible with PromQL, so Grafana dashboards backed by Prometheus datasource should work the same after switching from Prometheus to VictoriaMetrics.
[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql) can be used for parsing MetricsQL in external apps.
The following functionality is implemented differently in MetricsQL comparing to PromQL in order to improve user experience:
* MetricsQL takes into account the previous point before the window in square brackets for range functions such as `rate` and `increase`.
It also doesn't extrapolate range function results. This addresses [this issue from Prometheus](https://github.com/prometheus/prometheus/issues/3746).
* MetricsQL returns the expected non-empty responses for requests with `step` values smaller than scrape interval. This addresses [this issue from Grafana](https://github.com/grafana/grafana/issues/11451).
* MetricsQL treats `scalar` type the same as `instant vector` without labels, since subtle difference between these types usually confuses users.
See [the corresponding Prometheus docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#expression-language-data-types) for details.
Other PromQL functionality should work the same in MetricsQL. [File an issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues)
if you notice discrepancies between PromQL and MetricsQL results other than mentioned above.
MetricsQL provides additional functionality mentioned below, which is aimed towards solving practical cases.
Feel free [filing a feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you think MetricsQL misses certain useful functionality.
*Note that the functionality mentioned below doesn't work in PromQL, so it is impossible switching back to Prometheus after you start using it.*
This functionality can be tried at [an editable Grafana dashboard](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo).
- [`WITH` templates](https://play.victoriametrics.com/promql/expand-with-exprs). This feature simplifies writing and managing complex queries. Go to [`WITH` templates playground](https://victoriametrics.com/promql/expand-with-exprs) and try it.
- Metric names and metric labels may contain escaped chars. For instance, `foo\-bar{baz\=aa="b"}` is valid expression. It returns time series with name `foo-bar` containing label `baz=aa` with value `b`. Additionally, `\xXX` escape sequence is supported, where `XX` is hexadecimal representation of escaped char.
- `offset`, range duration and step value for range vector may refer to the current step aka `$__interval` value from Grafana.
For instance, `rate(metric[10i] offset 5i)` would return per-second rate over a range covering 10 previous steps with the offset of 5 steps.
- `offset` may be put anywere in the query. For instance, `sum(foo) offset 24h`.
- `offset` may be negative. For example, `q offset -1h`.
- `default` binary operator. `q1 default q2` substitutes `NaN` values from `q1` with the corresponding values from `q2`.
- `histogram_quantile` accepts optional third arg - `boundsLabel`. In this case it returns `lower` and `upper` bounds for the estimated percentile. See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
- `if` binary operator. `q1 if q2` removes values from `q1` for `NaN` values from `q2`.
- `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for non-`NaN` values from `q2`.
- Trailing commas on all the lists are allowed - label filters, function args and with expressions. For instance, the following queries are valid: `m{foo="bar",}`, `f(a, b,)`, `WITH (x=y,) x`. This simplifies maintenance of multi-line queries.
- String literals may be concatenated. This is useful with `WITH` templates: `WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`.
- Range duration in functions such as [rate](https://prometheus.io/docs/prometheus/latest/querying/functions/#rate()) may be omitted. VictoriaMetrics automatically selects range duration depending on the current step used for building the graph. For instance, the following query is valid in VictoriaMetrics: `rate(node_network_receive_bytes_total)`.
- [Range duration](https://prometheus.io/docs/prometheus/latest/querying/basics/#range-vector-selectors) and [offset](https://prometheus.io/docs/prometheus/latest/querying/basics/#offset-modifier) may be fractional. For instance, `rate(node_network_receive_bytes_total[1.5m] offset 0.5d)`.
- Comments starting with `#` and ending with newline. For instance, `up # this is a comment for 'up' metric`.
- Rollup functions - `rollup(m[d])`, `rollup_rate(m[d])`, `rollup_deriv(m[d])`, `rollup_increase(m[d])`, `rollup_delta(m[d])` - return `min`, `max` and `avg`
values for all the `m` data points over `d` duration.
- `rollup_candlestick(m[d])` - returns `open`, `close`, `low` and `high` values (OHLC) for all the `m` data points over `d` duration. This function is useful for financial applications.
- `union(q1, ... qN)` function for building multiple graphs for `q1`, ... `qN` subqueries with a single query. The `union` function name may be skipped -
the following queries are equivalent: `union(q1, q2)` and `(q1, q2)`.
- `ru(freeResources, maxResources)` function for returning resource utilization percentage in the range `0% - 100%`. For instance, `ru(node_memory_MemFree_bytes, node_memory_MemTotal_bytes)` returns memory utilization over [node_exporter](https://github.com/prometheus/node_exporter) metrics.
- `ttf(slowlyChangingFreeResources)` function for returning the time in seconds when the given `slowlyChangingFreeResources` expression reaches zero. For instance, `ttf(node_filesystem_avail_byte)` returns the time to storage space exhaustion. This function may be useful for capacity planning.
- Functions for label manipulation:
- `alias(q, name)` for setting metric name across all the time series `q`.
- `label_set(q, label1, value1, ... labelN, valueN)` for setting the given values for the given labels on `q`.
- `label_del(q, label1, ... labelN)` for deleting the given labels from `q`.
- `label_keep(q, label1, ... labelN)` for deleting all the labels except the given labels from `q`.
- `label_copy(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for copying label values from `src_*` to `dst_*`.
- `label_move(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for moving label values from `src_*` to `dst_*`.
- `label_transform(q, label, regexp, replacement)` for replacing all the `regexp` occurences with `replacement` in the `label` values from `q`.
- `label_value(q, label)` - returns numeric values for the given `label` from `q`.
- `label_match(q, label, regexp)` and `label_mismatch(q, label, regexp)` for filtering time series with labels matching (or not matching) the given regexps.
- `sort_by_label(q, label)` and `sort_by_label_desc(q, label)` for sorting time series by the given `label`.
- `step()` function for returning the step in seconds used in the query.
- `start()` and `end()` functions for returning the start and end timestamps of the `[start ... end]` range used in the query.
- `integrate(m[d])` for returning integral over the given duration `d` for the given metric `m`.
- `ideriv(m)` - for calculating `instant` derivative for `m`.
- `deriv_fast(m[d])` - for calculating `fast` derivative for `m` based on the first and the last points from duration `d`.
- `running_` functions - `running_sum`, `running_min`, `running_max`, `running_avg` - for calculating [running values](https://en.wikipedia.org/wiki/Running_total) on the selected time range.
- `range_` functions - `range_sum`, `range_min`, `range_max`, `range_avg`, `range_first`, `range_last`, `range_median`, `range_quantile` - for calculating global value over the selected time range.
- `smooth_exponential(q, sf)` - smooths `q` using [exponential moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) with the given smooth factor `sf`.
- `remove_resets(q)` - removes counter resets from `q`.
- `lag(q[d])` - returns lag between the current timestamp and the timestamp from the previous data point in `q` over `d`.
- `lifetime(q[d])` - returns lifetime of `q` over `d` in seconds. It is expected that `d` exceeds the lifetime of `q`.
- `scrape_interval(q[d])` - returns the average interval in seconds between data points of `q` over `d` aka `scrape interval`.
- Trigonometric functions - `sin(q)`, `cos(q)`, `asin(q)`, `acos(q)` and `pi()`.
- `range_over_time(m[d])` - returns value range for `m` over `d` time window, i.e. `max_over_time(m[d])-min_over_time(m[d])`.
- `median_over_time(m[d])` - calculates median values for `m` over `d` time window. Shorthand to `quantile_over_time(0.5, m[d])`.
- `median(q)` - median aggregate. Shorthand to `quantile(0.5, q)`.
- `limitk(k, q)` - limits the number of time series returned from `q` to `k`.
- `keep_last_value(q)` - fills missing data (gaps) in `q` with the previous non-empty value.
- `keep_next_value(q)` - fills missing data (gaps) in `q` with the next non-empty value.
- `distinct_over_time(m[d])` - returns distinct number of values for `m` data points over `d` duration.
- `distinct(q)` - returns a time series with the number of unique values for each timestamp in `q`.
- `sum2_over_time(m[d])` - returns sum of squares for all the `m` values over `d` duration.
- `sum2(q)` - returns a time series with sum of square values for each timestamp in `q`.
- `geomean_over_time(m[d])` - returns [geomean](https://en.wikipedia.org/wiki/Geometric_mean) value for all the `m` value over `d` duration.
- `geomean(q)` - returns a time series with [geomean](https://en.wikipedia.org/wiki/Geometric_mean) value for each timestamp in `q`.
- `rand()`, `rand_normal()` and `rand_exponential()` functions - for generating pseudo-random series with even, normal and exponential distribution.
- `increases_over_time(m[d])` and `decreases_over_time(m[d])` - returns the number of `m` increases or decreases over the given duration `d`.
- `prometheus_buckets(q)` - converts [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) buckets to Prometheus buckets with `le` labels.
- `histogram(q)` - calculates aggregate histogram over `q` time series for each point on the graph. See [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) for more details.
- `histogram_over_time(m[d])` - calculates [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) for `m` over `d`.
For example, the following query calculates median temperature by country over the last 24 hours:
`histogram_quantile(0.5, sum(histogram_over_time(temperature[24h])) by (vmbucket, country))`.
- `histogram_share(le, buckets)` - returns share (in the range 0..1) for `buckets`. Useful for calculating SLI and SLO.
For instance, the following query returns the share of requests which are performed under 1.5 seconds: `histogram_share(1.5, sum(request_duration_seconds_bucket) by (le))`.
- `topk_*` and `bottomk_*` aggregate functions, which return up to K time series. Note that the standard `topk` function may return more than K time series -
see [this article](https://www.robustperception.io/graph-top-n-time-series-in-grafana) for details.
- `topk_min(k, q)` - returns top K time series with the max minimums on the given time range
- `topk_max(k, q)` - returns top K time series with the max maximums on the given time range
- `topk_avg(k, q)` - returns top K time series with the max averages on the given time range
- `topk_median(k, q)` - returns top K time series with the max medians on the given time range
- `bottomk_min(k, q)` - returns bottom K time series with the min minimums on the given time range
- `bottomk_max(k, q)` - returns bottom K time series with the min maximums on the given time range
- `bottomk_avg(k, q)` - returns bottom K time series with the min averages on the given time range
- `bottomk_median(k, q)` - returns bottom K time series with the min medians on the given time range
- `share_le_over_time(m[d], le)` - returns share (in the range 0..1) of values in `m` over `d`, which are smaller or equal to `le`. Useful for calculating SLI and SLO.
Example: `share_le_over_time(memory_usage_bytes[24h], 100*1024*1024)` returns the share of time series values for the last 24 hours when memory usage was below or equal to 100MB.
- `share_gt_over_time(m[d], gt)` - returns share (in the range 0..1) of values in `m` over `d`, which are bigger than `gt`. Useful for calculating SLI and SLO.
Example: `share_gt_over_time(up[24h], 0)` - returns service availability for the last 24 hours.
- `tmin_over_time(m[d])` - returns timestamp for the minimum value for `m` over `d` time range.
- `tmax_over_time(m[d])` - returns timestamp for the maximum value for `m` over `d` time range.
- `aggr_over_time(("aggr_func1", "aggr_func2", ...), m[d])` - simultaneously calculates all the listed `aggr_func*` for `m` over `d` time range.
`aggr_func*` can contain any functions that accept range vector. For instance, `aggr_over_time(("min_over_time", "max_over_time", "rate"), m[d])`
would calculate `min_over_time`, `max_over_time` and `rate` for `m[d]`.
- `hoeffding_bound_upper(phi, m[d])` and `hoeffding_bound_lower(phi, m[d])` - return upper and lower [Hoeffding bounds](https://en.wikipedia.org/wiki/Hoeffding%27s_inequality)
for the given `phi` in the range `[0..1]`.

View File

@@ -7,18 +7,18 @@ in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just downl
Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
## Case studies and talks
## Case studies
* [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
* [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
* [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
* [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
## Prominent features
* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL) query language, which is inspired by PromQL.
VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) query language, which is inspired by PromQL.
* Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
@@ -28,9 +28,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
may be crammed into limited storage comparing to TimescaleDB.
* Optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
and [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
[comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
* Easy operation:
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
* All the configuration is done via explicit command-line flags with reasonable defaults.
@@ -39,9 +41,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
to S3 or GCS with [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md) / [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md).
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
* Storage is protected from corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* Supports metrics' ingestion and [backfilling](#backfilling) via the following protocols:
* Supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
* [Metrics from Prometheus exporters](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format)
such as [node_exporter](https://github.com/prometheus/node_exporter). See [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter) for details.
* [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
* [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
* [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) over HTTP, TCP and UDP.
* [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
if `-graphiteListenAddr` is set.
* [OpenTSDB put message](#sending-data-via-telnet-put-protocol) if `-opentsdbListenAddr` is set.
@@ -50,60 +54,61 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various Enterprise workloads.
* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
## Operation
### Table of contents
- [How to start VictoriaMetrics](#how-to-start-victoriametrics)
- [Prometheus setup](#prometheus-setup)
- [Grafana setup](#grafana-setup)
- [How to upgrade VictoriaMetrics?](#how-to-upgrade-victoriametrics)
- [How to apply new config to VictoriaMetrics?](#how-to-apply-new-config-to-victoriametrics)
- [How to send data from InfluxDB-compatible agents such as Telegraf?](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
- [How to send data from Graphite-compatible agents such as StatsD?](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
- [Querying Graphite data](#querying-graphite-data)
- [How to send data from OpenTSDB-compatible agents?](#how-to-send-data-from-opentsdb-compatible-agents)
- [Prometheus querying API usage](#prometheus-querying-api-usage)
- [How to build from sources](#how-to-build-from-sources)
- [Development build](#development-build)
- [Production build](#production-build)
- [ARM build](#arm-build)
- [Pure Go build (CGO_ENABLED=0)](#pure-go-build-cgo_enabled0)
- [Building docker images](#building-docker-images)
- [Start with docker-compose](#start-with-docker-compose)
- [Setting up service](#setting-up-service)
- [Third-party contributions](#third-party-contributions)
- [How to work with snapshots?](#how-to-work-with-snapshots)
- [How to delete time series?](#how-to-delete-time-series)
- [How to export time series?](#how-to-export-time-series)
- [How to import time series data?](#how-to-import-time-series-data)
- [Federation](#federation)
- [Capacity planning](#capacity-planning)
- [High availability](#high-availability)
- [Multiple retentions](#multiple-retentions)
- [Downsampling](#downsampling)
- [Multi-tenancy](#multi-tenancy)
- [Scalability and cluster version](#scalability-and-cluster-version)
- [Alerting](#alerting)
- [Security](#security)
- [Tuning](#tuning)
- [Monitoring](#monitoring)
- [Troubleshooting](#troubleshooting)
- [Backfilling](#backfilling)
- [Profiling](#profiling)
- [Integrations](#integrations)
- [Roadmap](#roadmap)
- [Contacts](#contacts)
- [Community and contributions](#community-and-contributions)
- [Reporting bugs](#reporting-bugs)
- [Victoria Metrics Logo](#victoria-metrics-logo)
- [Logo Usage Guidelines](#logo-usage-guidelines)
- [Font used:](#font-used)
- [Color Palette:](#color-palette)
- [We kindly ask:](#we-kindly-ask)
* [How to start VictoriaMetrics](#how-to-start-victoriametrics)
* [Environment variables](#environment-variables)
* [Prometheus setup](#prometheus-setup)
* [Grafana setup](#grafana-setup)
* [How to upgrade VictoriaMetrics](#how-to-upgrade-victoriametrics)
* [How to apply new config to VictoriaMetrics](#how-to-apply-new-config-to-victoriametrics)
* [How to scrape Prometheus exporters such as node_exporter](#how-to-scrape-prometheus-exporters-such-as-node-exporter)
* [How to send data from InfluxDB-compatible agents such as Telegraf](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
* [How to send data from Graphite-compatible agents such as StatsD](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
* [Querying Graphite data](#querying-graphite-data)
* [How to send data from OpenTSDB-compatible agents](#how-to-send-data-from-opentsdb-compatible-agents)
* [Prometheus querying API usage](#prometheus-querying-api-usage)
* [How to build from sources](#how-to-build-from-sources)
* [Development build](#development-build)
* [Production build](#production-build)
* [ARM build](#arm-build)
* [Pure Go build (CGO_ENABLED=0)](#pure-go-build-cgo_enabled0)
* [Building docker images](#building-docker-images)
* [Start with docker-compose](#start-with-docker-compose)
* [Setting up service](#setting-up-service)
* [How to work with snapshots](#how-to-work-with-snapshots)
* [How to delete time series](#how-to-delete-time-series)
* [How to export time series](#how-to-export-time-series)
* [How to import time series data](#how-to-import-time-series-data)
* [Federation](#federation)
* [Capacity planning](#capacity-planning)
* [High availability](#high-availability)
* [Deduplication](#deduplication)
* [Retention](#retention)
* [Multiple retentions](#multiple-retentions)
* [Downsampling](#downsampling)
* [Multi-tenancy](#multi-tenancy)
* [Scalability and cluster version](#scalability-and-cluster-version)
* [Alerting](#alerting)
* [Security](#security)
* [Tuning](#tuning)
* [Monitoring](#monitoring)
* [Troubleshooting](#troubleshooting)
* [Backfilling](#backfilling)
* [Profiling](#profiling)
* [Integrations](#integrations)
* [Roadmap](#roadmap)
* [Contacts](#contacts)
* [Community and contributions](#community-and-contributions)
* [Third-party contributions](#third-party-contributions)
* [Reporting bugs](#reporting-bugs)
* [Victoria Metrics Logo](#victoria-metrics-logo)
* [Logo Usage Guidelines](#logo-usage-guidelines)
* [Font used](#font-used)
* [Color Palette](#color-palette)
* [We kindly ask](#we-kindly-ask)
### How to start VictoriaMetrics
@@ -115,19 +120,23 @@ The following command-line flags are used the most:
* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in current working directory.
* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted. Default period is 1 month.
* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
* `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default, it is disabled.
* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data over telnet protocol. By default, it is disabled.
* `-opentsdbHTTPListenAddr` - TCP address to listen to for HTTP OpenTSDB data over `/api/put`. By default, it is disabled.
Pass `-help` to see all the available flags with description and default values.
It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.
#### Environment variables
Each flag values can be set thru environment variables by following these rules:
* The `-envflag.enable` flag must be set
* Each `.` in flag names must be substituted by `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`)
* For repeating flags, an alternative syntax can be used by joining the different values into one using `,` as separator (for example `-storageNode <nodeA> -storageNode <nodeB>` will translate to `storageNode=<nodeA>,<nodeB>`)
### Prometheus setup
Prometheus must be configured with [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
in order to send data to VictoriaMetrics. Add the following lines
Prometheus must be configured with [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
in order to send data to VictoriaMetrics. Add the following lines
to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):
```yml
@@ -138,7 +147,7 @@ remote_write:
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
Then apply the new config via the following command:
```
```bash
kill -HUP `pidof prometheus`
```
@@ -161,7 +170,8 @@ across Prometheus instances, so those time series may be filtered and grouped by
For highly loaded Prometheus instances (400k+ samples per second)
the following tuning may be applied:
```
```yaml
remote_write:
- url: http://<victoriametrics-addr>:8428/api/v1/write
queue_config:
@@ -172,29 +182,27 @@ remote_write:
Using remote write increases memory usage for Prometheus up to ~25%
and depends on the shape of data. If you are experiencing issues with
too high memory consumption try to lower `max_samples_per_send`
too high memory consumption try to lower `max_samples_per_send`
and `capacity` params (keep in mind that these two params are tightly connected).
Read more about tuning remote write for Prometheus [here](https://prometheus.io/docs/practices/remote_write).
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
since the previous versions may have issues with `remote_write`.
### Grafana setup
Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
```
```url
http://<victoriametrics-addr>:8428
```
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
Then build graphs with the created datasource using [Prometheus query language](https://prometheus.io/docs/prometheus/latest/querying/basics/).
VictoriaMetrics supports native PromQL and [extends it with useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
VictoriaMetrics supports native PromQL and [extends it with useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL).
### How to upgrade VictoriaMetrics?
### How to upgrade VictoriaMetrics
It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
say otherwise. It is recommended performing regular upgrades to the latest version,
@@ -209,8 +217,7 @@ Follow the following steps during the upgrade:
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
### How to apply new config to VictoriaMetrics?
### How to apply new config to VictoriaMetrics
VictoriaMetrics must be restarted for applying new config:
@@ -221,20 +228,35 @@ VictoriaMetrics must be restarted for applying new config:
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
### How to scrape Prometheus exporters such as [node-exporter](https://github.com/prometheus/node_exporter)
### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)?
VictoriaMetrics can be used as drop-in replacement for Prometheus for scraping targets configured in `prometheus.yml` config file according to [the specification](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file).
Just set `-promscrape.config` command-line flag to the path to `prometheus.yml` config - and VictoriaMetrics should start scraping the configured targets.
Currently the following [scrape_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) types are supported:
* [static_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config)
* [file_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config)
In the future other `*_sd_config` types will be supported.
See also [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md), which can be used as drop-in replacement for Prometheus.
### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)
Just use `http://<victoriametric-addr>:8428` url instead of InfluxDB url in agents' configs.
For instance, put the following lines into `Telegraf` config, so it sends data to VictoriaMetrics instead of InfluxDB:
```
```toml
[[outputs.influxdb]]
urls = ["http://<victoriametrics-addr>:8428"]
```
Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
Another option is to enable TCP and UDP receiver for Influx line protocol via `-influxListenAddr` command-line flag.
VictoriaMetrics maps Influx data using the following rules:
* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
unless `db` tag exists in the Influx line.
* Field names are mapped to time series names prefixed with `{measurement}{separator}` value,
@@ -245,13 +267,13 @@ VictoriaMetrics maps Influx data using the following rules:
For example, the following Influx line:
```
```raw
foo,tag1=value1,tag2=value2 field1=12,field2=40
```
is converted into the following Prometheus data points:
```
```raw
foo_field1{tag1="value1", tag2="value2"} 12
foo_field2{tag1="value1", tag2="value2"} 40
```
@@ -259,20 +281,20 @@ foo_field2{tag1="value1", tag2="value2"} 40
Example for writing data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
to local VictoriaMetrics using `curl`:
```
```bash
curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'
```
An arbitrary number of lines delimited by '\n' may be sent in a single request.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__=~"measurement_.*"}'
```
The `/api/v1/export` endpoint should return the following response:
```
```jsonl
{"metric":{"__name__":"measurement_field1","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560272508147]}
{"metric":{"__name__":"measurement_field2","tag1":"value1","tag2":"value2"},"values":[1.23],"timestamps":[1560272508147]}
```
@@ -280,23 +302,21 @@ The `/api/v1/export` endpoint should return the following response:
Note that Influx line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
while VictoriaMetrics stores them with *milliseconds* precision.
### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)?
### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)
1) Enable Graphite receiver in VictoriaMetrics by setting `-graphiteListenAddr` command line flag. For instance,
the following command will enable Graphite receiver in VictoriaMetrics on TCP and UDP port `2003`:
```
```bash
/path/to/victoria-metrics-prod -graphiteListenAddr=:2003
```
2) Use the configured address in Graphite-compatible agents. For instance, set `graphiteHost`
to the VictoriaMetrics host in `StatsD` configs.
Example for writing data with Graphite plaintext protocol to local VictoriaMetrics using `nc`:
```
```bash
echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003
```
@@ -304,26 +324,23 @@ VictoriaMetrics sets the current time if the timestamp is omitted.
An arbitrary number of lines delimited by `\n` may be sent in one go.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match=foo.bar.baz'
```
The `/api/v1/export` endpoint should return the following response:
```
```bash
{"metric":{"__name__":"foo.bar.baz","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560277406000]}
```
### Querying Graphite data
Data sent to VictoriaMetrics via `Graphite plaintext protocol` may be read either via
[Prometheus querying API](#prometheus-querying-api-usage)
or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
### How to send data from OpenTSDB-compatible agents?
### How to send data from OpenTSDB-compatible agents
VictoriaMetrics supports [telnet put protocol](http://opentsdb.net/docs/build/html/api_telnet/put.html)
and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) for ingesting OpenTSDB data.
@@ -333,39 +350,37 @@ and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.ht
1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance,
the following command enables OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
```
```bash
/path/to/victoria-metrics-prod -opentsdbListenAddr=:4242
```
2) Send data to the given address from OpenTSDB-compatible agents.
Example for writing data with OpenTSDB protocol to local VictoriaMetrics using `nc`:
```
```bash
echo "put foo.bar.baz `date +%s` 123 tag1=value1 tag2=value2" | nc -N localhost 4242
```
An arbitrary number of lines delimited by `\n` may be sent in one go.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match=foo.bar.baz'
```
The `/api/v1/export` endpoint should return the following response:
```
```bash
{"metric":{"__name__":"foo.bar.baz","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560277292000]}
```
#### Sending OpenTSDB data via HTTP `/api/put` requests
1) Enable HTTP server for OpenTSDB `/api/put` requests by setting `-opentsdbHTTPListenAddr` command line flag. For instance,
the following command enables OpenTSDB HTTP server on port `4242`:
```
```bash
/path/to/victoria-metrics-prod -opentsdbHTTPListenAddr=:4242
```
@@ -373,31 +388,30 @@ the following command enables OpenTSDB HTTP server on port `4242`:
Example for writing a single data point:
```
```bash
curl -H 'Content-Type: application/json' -d '{"metric":"x.y.z","value":45.34,"tags":{"t1":"v1","t2":"v2"}}' http://localhost:4242/api/put
```
Example for writing multiple data points in a single request:
```
```bash
curl -H 'Content-Type: application/json' -d '[{"metric":"foo","value":45.34},{"metric":"bar","value":43}]' http://localhost:4242/api/put
```
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
```bash
curl -G 'http://localhost:8428/api/v1/export' -d 'match[]=x.y.z' -d 'match[]=foo' -d 'match[]=bar'
```
The `/api/v1/export` endpoint should return the following response:
```
```bash
{"metric":{"__name__":"foo"},"values":[45.34],"timestamps":[1566464846000]}
{"metric":{"__name__":"bar"},"values":[43],"timestamps":[1566464846000]}
{"metric":{"__name__":"x.y.z","t1":"v1","t2":"v2"},"values":[45.34],"timestamps":[1566464763000]}
```
### Prometheus querying API usage
VictoriaMetrics supports the following handlers from [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/):
@@ -422,7 +436,6 @@ Additionally VictoriaMetrics provides the following handlers:
so it can be slow if the database contains tens of millions of time series.
* `/api/v1/labels/count` - it returns a list of `label: values_count` entries. It can be used for determining labels with the maximum number of values.
### How to build from sources
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
@@ -430,7 +443,6 @@ We recommend using either [binary releases](https://github.com/VictoriaMetrics/V
from sources. Building from sources is reasonable when developing additional features specific
to your needs.
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
@@ -475,32 +487,24 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.
### Start with docker-compose
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml)
helps to spin up VictoriaMetrics, Prometheus and Grafana with one command.
More details may be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#folder-contains-basic-images-and-tools-for-building-and-running-victoria-metrics-in-docker).
### Setting up service
Read [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) on how to set up VictoriaMetrics as a service in your OS.
### Third-party contributions
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
### How to work with snapshots?
### How to work with snapshots
VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
for all the data stored under `-storageDataPath` directory.
Navigate to `http://<victoriametrics-addr>:8428/snapshot/create` in order to create an instant snapshot.
The page will return the following JSON response:
```
```json
{"status":"ok","snapshot":"<snapshot-name>"}
```
@@ -516,13 +520,13 @@ to delete `<snapshot-name>` snapshot.
Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete_all` in order to delete all the snapshots.
Steps for restoring from a snapshot:
1. Stop VictoriaMetrics with `kill -INT`.
2. Restore snapshot contents from backup with [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md)
to the directory pointed by `-storageDataPath`.
3. Start VictoriaMetrics.
### How to delete time series?
### How to delete time series
Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
@@ -532,8 +536,21 @@ the deleted time series isn't freed instantly - it is freed during subsequent me
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
before actually deleting the metrics.
The delete API is intended mainly for the following cases:
### How to export time series?
* One-off deleting of accidentally written invalid (or undesired) time series.
* One-off deleting of user data due to [GDPR](https://en.wikipedia.org/wiki/General_Data_Protection_Regulation).
It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
* Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
See [this article](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts) for details.
* Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
time series occupy disk space until the next merge operation, which can never occur.
It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
### How to export time series
Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
@@ -541,7 +558,7 @@ for metrics to export. Use `{__name__!=""}` selector for fetching all the time s
The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
Each JSON line would contain data for a single time series. An example output:
```
```jsonl
{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
```
@@ -552,7 +569,7 @@ unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) val
Pass `Accept-Encoding: gzip` HTTP header in the request to `/api/v1/export` in order to reduce network bandwidth during exporing big amounts
of time series data. This enables gzip compression for the exported data. Example for exporting gzipped data:
```
```bash
curl -H 'Accept-Encoding: gzip' http://localhost:8428/api/v1/export -d 'match[]={__name__!=""}' > data.jsonl.gz
```
@@ -560,8 +577,7 @@ The maximum duration for each request to `/api/v1/export` is limited by `-search
Exported data can be imported via POST'ing it to [/api/v1/import](#how-to-import-time-series-data).
### How to import time series data?
### How to import time series data
Time series data can be imported via any supported ingestion protocol:
@@ -574,7 +590,7 @@ Time series data can be imported via any supported ingestion protocol:
The most efficient protocol for importing data into VictoriaMetrics is `/api/v1/import`. Example for importing data obtained via `/api/v1/export`:
```
```bash
# Export the data from <source-victoriametrics>:
curl http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl
@@ -584,7 +600,7 @@ curl -X POST http://destination-victoriametrics:8428/api/v1/import -T exported_d
Pass `Content-Encoding: gzip` HTTP request header to `/api/v1/import` for importing gzipped data:
```
```bash
# Export gzipped data from <source-victoriametrics>:
curl -H 'Accept-Encoding: gzip' http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl.gz
@@ -595,7 +611,6 @@ curl -X POST -H 'Content-Encoding: gzip' http://destination-victoriametrics:8428
Each request to `/api/v1/import` can load up to a single vCPU core on VictoriaMetrics. Import speed can be improved by splitting the original file into smaller parts
and importing them concurrently. Note that the original file must be split on newlines.
### Federation
VictoriaMetrics exports [Prometheus-compatible federation data](https://prometheus.io/docs/prometheus/latest/federation/)
@@ -607,7 +622,6 @@ on the interval `[now - max_lookback ... now]` is scraped for each time series.
For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation
with scrape intervals exceeding `5m`.
### Capacity planning
A rough estimation of the required resources for ingestion path:
@@ -615,7 +629,7 @@ A rough estimation of the required resources for ingestion path:
* RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series.
Time series is considered active if new data points have been added to it recently or if it has been recently queried.
The number of active time series may be obtained from `vm_cache_entries{type="storage/hour_metric_ids"}` metric
exproted on the `/metrics` page.
exported on the `/metrics` page.
VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited by `-memory.allowedPercent` flag.
* CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
@@ -635,7 +649,6 @@ A rough estimation of the required resources for ingestion path:
The actual ingress bandwidth usage depends on the average number of labels per ingested metric and the average size
of label values. The higher number of per-metric labels and longer label values mean the higher ingress bandwidth.
The required resources for query path:
* RAM size: depends on the number of time series to scan in each query and the `step`
@@ -647,7 +660,6 @@ The required resources for query path:
* Network usage: depends on the frequency and the type of incoming requests. Typical Grafana dashboards usually
require negligible network bandwidth.
### High availability
1) Install multiple VictoriaMetrics instances in distinct datacenters (availability zones).
@@ -666,7 +678,7 @@ remote_write:
3) Apply the updated config:
```
```bash
kill -HUP `pidof prometheus`
```
@@ -674,10 +686,29 @@ kill -HUP `pidof prometheus`
5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
6) Set up Prometheus datasource in Grafana that points to Promxy.
If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
with the enabled de-duplication. See [this section](#deduplication) for details.
### Deduplication
VictoriaMetrics de-duplicates data points if `-dedup.minScrapeInterval` command-line flag
is set to positive duration. For example, `-dedup.minScrapeInterval=60s` would de-duplicate data points
on the same time series if they are located closer than 60s to each other.
The de-duplication reduces disk space usage if multiple identically configured Prometheus instances in HA pair
write data to the same VictoriaMetrics instance. Note that these Prometheus instances must have identical
`external_labels` section in their configs, so they write data to the same time series.
### Retention
Retention is configured with `-retentionPeriod` command-line flag. For instance, `-retentionPeriod=3` means
that the data will be stored for 3 months and then deleted.
Data is split in per-month subdirectories inside `<-storageDataPath>/data/small` and `<-storageDataPath>/data/big` folders.
Directories for months outside the configured retention are deleted on the first day of new month.
In order to keep data according to `-retentionPeriod` max disk space usage is going to be `-retentionPeriod` + 1 month.
For example if `-retentionPeriod` is set to 1, data for January is deleted on March 1st.
### Multiple retentions
@@ -687,24 +718,22 @@ Just start multiple VictoriaMetrics instances with distinct values for the follo
* `-storageDataPath`, so the data for each retention period is saved in a separate directory
* `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention
### Downsampling
There is no downsampling support at the moment, but:
- VictoriaMetrics is optimized for querying big amounts of raw data. See benchmark results for heavy queries
* VictoriaMetrics is optimized for querying big amounts of raw data. See benchmark results for heavy queries
in [this article](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
- VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
* VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
for details.
These properties reduce the need in downsampling. We plan to implement downsampling in the future.
These properties reduce the need of downsampling. We plan to implement downsampling in the future.
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details.
### Multi-tenancy
Single-node VictoriaMetrics doesn't support multi-tenancy. Use [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) instead.
### Scalability and cluster version
Though single-node VictoriaMetrics cannot scale to multiple nodes, it is optimized for resource usage - storage size / bandwidth / IOPS, RAM, CPU.
@@ -715,12 +744,13 @@ So try single-node VictoriaMetrics at first and then [switch to cluster version]
horizontally scalable long-term remote storage for really large Prometheus deployments.
[Contact us](mailto:info@victoriametrics.com) for paid support.
### Alerting
VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions must be performed either
on [Prometheus side](https://prometheus.io/docs/alerting/overview/) or on [Grafana side](https://grafana.com/docs/alerting/rules/).
VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions can be performed at the following places:
* At Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
* At Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
* At Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).
### Security
@@ -736,28 +766,29 @@ Consider setting the following command-line flags:
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
### Tuning
* There is no need in VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
* There is no need for VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
which are automatically adjusted for the available CPU and RAM resources.
* There is no need in Operating System tuning since VictoriaMetrics is optimized for default OS settings.
* There is no need for Operating System tuning since VictoriaMetrics is optimized for default OS settings.
The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a),
so Prometheus instances could establish more connections to VictoriaMetrics.
* The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs),
since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
If you plan storing more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
If you plan to store more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
then the following options are recommended to pass to `mkfs.ext4`:
```
```bash
mkfs.ext4 ... -O 64bit,huge_file,extent -T huge
```
### Monitoring
VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page.
Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics.
VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
These metrics may be collected via Prometheus by adding the corresponding scrape config to it.
Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.
There are officials Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229) and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176).
The most interesting metrics are:
@@ -773,11 +804,10 @@ The most interesting metrics are:
where `system_memory` is the amount of system memory and `-memory.allowedPercent` is the corresponding flag value.
* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
### Troubleshooting
* It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need
in tweaking these flag values arises.
of tweaking these flag values arises.
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
then it is likely you have too many active time series for the current amount of RAM.
@@ -791,31 +821,42 @@ The most interesting metrics are:
has at least 20% of free space comparing to disk size.
* If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors,
then just remove directoreis with broken parts. This will recover VictoriaMetrics at the cost
then just remove directories with broken parts. This will recover VictoriaMetrics at the cost
of data loss stored in the broken parts. In the future, `vmrecover` tool will be created
for automatic recovering from such errors.
* If you see gaps on the graphs, try resetting the cache by sending request to `/internal/resetRollupResultCache`.
If this removes gaps on the graphs, then it is likely data with timestamps older than `-search.cacheTimestampOffset`
is ingested into VictoriaMetrics. Make sure that data sources have synchronized time with VictoriaMetrics.
### Backfilling
VictoriaMetrics accepts historical data in arbitrary order of time.
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
historical data with timestamps from the past, since the cache assumes that the data is written with
the current timestamps. Query cache can be enabled after the backfilling is complete.
An alternative solution is to query `/internal/resetRollupResultCache` url after backfilling is complete. This will reset
the query cache, which could contain incomplete data cached during the backfilling.
Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
for data with timestamps close to the current time.
### Profiling
VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
- Memory profile. It can be collected with the following command:
```
* Memory profile. It can be collected with the following command:
```bash
curl -s http://<victoria-metrics-host>:8428/debug/pprof/heap > mem.pprof
```
- CPU profile. It can be collected with the following command:
```
* CPU profile. It can be collected with the following command:
```bash
curl -s http://<victoria-metrics-host>:8428/debug/pprof/profile > cpu.pprof
```
@@ -823,7 +864,6 @@ The command for collecting CPU profile waits for 30 seconds before returning.
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
## Integrations
* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`.
@@ -832,60 +872,61 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
See [this example](/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).
* [Ansible role for installing VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role).
## Roadmap
- [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
- [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
- [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
- [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
- [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment any item or add own one.
* [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
* [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38)
* [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36)
* [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119)
* [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)
The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.
## Contacts
Contact us with any questions regarding VictoriaMetrics at [info@victoriametrics.com](mailto:info@victoriametrics.com).
## Community and contributions
Feel free asking any questions regarding VictoriaMetrics:
- [slack](http://slack.victoriametrics.com/)
- [telegram-en](https://t.me/VictoriaMetrics_en)
- [telegram-ru](https://t.me/VictoriaMetrics_ru1)
- [google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
* [slack](http://slack.victoriametrics.com/)
* [reddit](https://www.reddit.com/r/VictoriaMetrics/)
* [telegram-en](https://t.me/VictoriaMetrics_en)
* [telegram-ru](https://t.me/VictoriaMetrics_ru1)
* [google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
If you like VictoriaMetrics and want to contribute, then we need the following:
- Filing issues and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
- Spreading a word about VictoriaMetrics: conference talks, articles, comments, experience sharing with colleagues.
- Updating documentation.
* Filing issues and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
* Spreading a word about VictoriaMetrics: conference talks, articles, comments, experience sharing with colleagues.
* Updating documentation.
We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):
- Prefer simple code and architecture.
- Avoid complex abstractions.
- Avoid magic code and fancy algorithms.
- Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d).
- Minimize the number of moving parts in the distributed system.
- Avoid automated decisions, which may hurt cluster availability, consistency or performance.
* Prefer simple code and architecture.
* Avoid complex abstractions.
* Avoid magic code and fancy algorithms.
* Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d).
* Minimize the number of moving parts in the distributed system.
* Avoid automated decisions, which may hurt cluster availability, consistency or performance.
Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.
## Third-party contributions
* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
* [Prometheus Oauth proxy](https://gitlab.com/optima_public/prometheus_oauth_proxy) - see [this article](https://medium.com/@richard.holly/powerful-saas-solution-for-detection-metrics-c67b9208d362) for details.
## Reporting bugs
Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
## Victoria Metrics Logo
[Zip](VM_logo.zip) contains three folders with different image orientation (main color and inverted version).
[Zip](VM_logo.zip) contains three folders with different image orientations (main color and inverted version).
Files included in each folder:
@@ -893,22 +934,21 @@ Files included in each folder:
* 2 PNG Preview files with transparent background
* 2 EPS Adobe Illustrator EPS10 files
### Logo Usage Guidelines
#### Font used:
#### Font used
* Lato Black
* Lato Regular
#### Color Palette:
#### Color Palette
* HEX [#110f0f](https://www.color-hex.com/color/110f0f)
* HEX [#ffffff](https://www.color-hex.com/color/ffffff)
### We kindly ask:
### We kindly ask
- Please don't use any other font instead of suggested.
- There should be sufficient clear space around the logo.
- Do not change spacing, alignment, or relative locations of the design elements.
- Do not change the proportions of any of the design elements or the design itself. You may resize as needed but must retain all proportions.
* Please don't use any other font instead of suggested.
* There should be sufficient clear space around the logo.
* Do not change spacing, alignment, or relative locations of the design elements.
* Do not change the proportions of any of the design elements or the design itself. You may resize as needed but must retain all proportions.

170
docs/vmagent.md Normal file
View File

@@ -0,0 +1,170 @@
## vmagent
`vmagent` is a tiny but brave agent, which helps you collecting metrics from various sources
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
<img alt="vmagent" src="vmagent.png">
### Motivation
While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
and RAM friendly to scrape metrics from Prometheus-compatible exporters to VictoriaMetrics.
Also, we found that users infrastructure is like snowflakes - never alike, and we decided to add more flexibility
to `vmagent` (like the ability to push metrics instead of pulling them). We did our best and plan to do even more.
### Features
* Can be used as drop-in replacement for Prometheus for scraping targets such as [node_exporter](https://github.com/prometheus/node_exporter).
See [Quick Start](#quick-start) for details.
* Can add, remove and modify labels via Prometheus relabeling. See [these docs](#relabeling) for details.
* Accepts data via all the ingestion protocols supported by VictoriaMetrics:
* Influx line protocol via `http://<vmagent>:8429/write`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf).
* JSON lines import protocol via `http://<vmagent>:8429/api/v1/import`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-time-series-data).
* Graphite plaintext protocol if `-graphiteListenAddr` command-line flag is set. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-graphite-compatible-agents-such-as-statsd).
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-opentsdb-compatible-agents).
* Prometheus remote write protocol via `http://<vmagent>:8429/api/v1/write`.
* Can replicate collected metrics simultaneously to multiple remote storage systems.
* Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as connection
to remote storage is recovered.
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth comparing to Prometheus.
### Quick Start
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
and pass the following flags to `vmagent` binary in order to start scraping Prometheus targets:
* `-promscrape.config` with the path to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`)
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. Multiple `-remoteWrite.url` args can be set in parallel
in order to replicate data concurrently to multiple remote storage systems.
Example command line:
```
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
If you need collecting only Influx data, then the following command line would be enough:
```
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
Then send Influx data to `http://vmagent-host:8429/write`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for more details.
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/).
Pass `-help` to `vmagent` in order to see the full list of supported command-line flags with their descriptions.
### How to collect metrics in Prometheus format?
Pass the path to `prometheus.yml` to `-promscrape.config` command-line flag. `vmagent` takes into account the following
sections from [Prometheus config file](https://prometheus.io/docs/prometheus/latest/configuration/configuration/):
* `global`
* `scrape_configs`
All the other sections are ignored, including [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section.
Use `-remoteWrite.*` command-line flags instead for configuring remote write settings:
* `-remoteWrite.url` for pointing to remote storage. Data to remote storage can be sent either via HTTP or HTTPS. See `-remoteWrite.tls*` flags for details.
* `-remoteWrite.label` for adding labels to metrics before sending them to remote storage.
* `-remoteWrite.relabelConfig` for applying relabeling to metrics before sending them to remote storage.
The following scrape types in [scrape_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) section are supported:
* `static_configs` - for scraping statically defined targets. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config) for details.
* `file_sd_configs` - for scraping targets defined in external files aka file-based service discover.
See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
The following service discovery mechanisms will be added to `vmagent` soon:
* [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
### Adding labels to metrics
Labels can be added to metrics via the following mechanisms:
* Via `global -> external_labels` section in `-promscrape.config` file. These labels are added only to metrics scraped from targets configured in `-promscrape.config` file.
* Via `-remoteWrite.label` command-line flag. These labels are added to all the collected metrics before sending them to `-remoteWrite.url`.
### Relabeling
`vmagent` supports [Prometheus relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config).
Additionally it provides the following extra actions:
* `replace_all`: replaces all the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the result in the `target_label`.
* `labelmap_all`: replaces all the occurences of `regex` in all the label names with the `replacement`.
The relabeling can be defined in the following places:
* At `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is applied to targets when parsing the file during `vmagent` startup
or during config reload after sending `SIGHUP` signal to `vmagent` via `kill -HUP`.
* At `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is applied to metrics after each scrape for the configured targets.
* At `-remoteWrite.relabelConfig` file. This relabeling is aplied to all the collected metrics before sending them to `-remoteWrite.url`.
Read more about relabeling in the following articles:
* [Life of a label](https://www.robustperception.io/life-of-a-label)
* [Discarding targets and timeseries with relabeling](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts)
* [Dropping labels at scrape time](https://www.robustperception.io/dropping-metrics-at-scrape-time-with-prometheus)
* [Extracting labels from legacy metric names](https://www.robustperception.io/extracting-labels-from-legacy-metric-names)
* [relabel_configs vs metric_relabel_configs](https://www.robustperception.io/relabel_configs-vs-metric_relabel_configs)
### Monitoring
`vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
### Troubleshooting
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping big number of targets,
since `vmagent` establishes at least a single TCP connection per each target.
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
and `vmagent_remotewrite_pending_data_bytes` metric exported by `vmagent` at `/metrics` page constantly grows.
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
The directory can grow big when remote storage is unvailable during extended periods of time. If you don't want
sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
### How to build from sources
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmagent` is located in `vmutils-*` archives there.
#### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
2. Run `make vmagent` from the root folder of the repository.
It builds `vmagent` binary and puts it into the `bin` folder.
#### Production build
1. [Install docker](https://docs.docker.com/install/).
2. Run `make vmagent-prod` from the root folder of the repository.
It builds `vmagent-prod` binary and puts it into the `bin` folder.
#### Building docker images
Run `make package-vmagent`. It builds `victoriametrics/vmagent:<PKG_TAG>` docker image locally.
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmagent`.

View File

@@ -1,2 +1,3 @@
fmt.Fprintf
fmt.Fprint
(net/http.ResponseWriter).Write

32
go.mod
View File

@@ -1,31 +1,25 @@
module github.com/VictoriaMetrics/VictoriaMetrics
require (
cloud.google.com/go v0.50.0 // indirect
cloud.google.com/go/storage v1.4.0
github.com/VictoriaMetrics/fastcache v1.5.4
github.com/VictoriaMetrics/metrics v1.9.3
github.com/aws/aws-sdk-go v1.26.8
cloud.google.com/go/storage v1.6.0
github.com/VictoriaMetrics/fastcache v1.5.7
github.com/VictoriaMetrics/metrics v1.11.0
github.com/aws/aws-sdk-go v1.29.10
github.com/cespare/xxhash/v2 v2.1.1
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 // indirect
github.com/golang/snappy v0.0.1
github.com/jstemmer/go-junit-report v0.9.1 // indirect
github.com/klauspost/compress v1.9.4
github.com/valyala/fastjson v1.4.1
github.com/klauspost/compress v1.10.1
github.com/valyala/fasthttp v1.9.0
github.com/valyala/fastjson v1.5.0
github.com/valyala/fastrand v1.0.0
github.com/valyala/gozstd v1.6.4
github.com/valyala/histogram v1.0.1
github.com/valyala/quicktemplate v1.4.1
go.opencensus.io v0.22.2 // indirect
golang.org/x/exp v0.0.0-20191224044220-1fea468a75e9 // indirect
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553 // indirect
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6 // indirect
golang.org/x/sys v0.0.0-20191224085550-c709ea063b76
golang.org/x/tools v0.0.0-20191224055732-dd894d0a8a40 // indirect
google.golang.org/api v0.15.0
google.golang.org/appengine v1.6.5 // indirect
google.golang.org/genproto v0.0.0-20191223191004-3caeed10a8bf // indirect
google.golang.org/grpc v1.26.0 // indirect
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b // indirect
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae
golang.org/x/tools v0.0.0-20200226180945-26f6a1b6802d // indirect
google.golang.org/api v0.19.0
google.golang.org/genproto v0.0.0-20200225123651-fc8f55426688 // indirect
gopkg.in/yaml.v2 v2.2.8
)
go 1.12

161
go.sum
View File

@@ -5,32 +5,47 @@ cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6A
cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
cloud.google.com/go v0.50.0 h1:0E3eE8MX426vUOs7aHfI7aN1BrIzzzf4ccKCSfSjGmc=
cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
cloud.google.com/go/bigquery v1.0.1 h1:hL+ycaJpVE9M7nLoiXb/Pn10ENE2u+oddxbD8uu0ZVU=
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
cloud.google.com/go v0.53.0 h1:MZQCQQaRwOrAcuKjiHWHrgKykt4fZyuwF2dtiG3fGW8=
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0 h1:sAbMqjY1PEQKZBWfbu6Y6bsupJ9c4QdHnzg/VvYTLcE=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
cloud.google.com/go/bigquery v1.4.0 h1:xE3CPsOgttP4ACBePh79zTKALtXwn/Edhcr16R5hMWU=
cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
cloud.google.com/go/datastore v1.0.0 h1:Kt+gOPPp2LEPWp8CSfxhsM8ik9CcyE/gYu+0r+RnZvM=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/pubsub v1.0.1 h1:W9tAK3E57P75u0XLLR82LZyw8VpAnhmyTOxW9qzmyj8=
cloud.google.com/go/datastore v1.1.0 h1:/May9ojXjRkPBNVrq+oWLqmWCkr4OU5uRY29bu0mRyQ=
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
cloud.google.com/go/pubsub v1.1.0 h1:9/vpR43S4aJaROxqQHQ3nH9lfyKKV0dC3vOmnw8ebQQ=
cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
cloud.google.com/go/pubsub v1.2.0 h1:Lpy6hKgdcl7a3WGSfJIFmxmcdjSpP6OmBEfcOv1Y680=
cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
cloud.google.com/go/storage v1.4.0 h1:KDdqY5VTXBTqpSbctVTt0mVvfanP6JZzNzLE0qNY100=
cloud.google.com/go/storage v1.4.0/go.mod h1:ZusYJWlOshgSBGbt6K3GnB3MT3H1xs2id9+TCl4fDBA=
cloud.google.com/go/storage v1.5.0 h1:RPUcBvDeYgQFMfQu1eBMq6piD1SXmLH+vK3qjewZPus=
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
cloud.google.com/go/storage v1.6.0 h1:UDpwYIwla4jHGzZJaEJYx1tOejbgSoNqsAfHAUYe2r8=
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/VictoriaMetrics/fastcache v1.5.4 h1:0BaXbRH01RycJk79OOBwMCXlNryko9z4yEf6RqbP+Xo=
github.com/VictoriaMetrics/fastcache v1.5.4/go.mod h1:ptDBkNMQI4RtmVo8VS/XwRY6RoTu1dAWCbrk+6WsEM8=
github.com/VictoriaMetrics/metrics v1.9.3 h1:+1kZnOIb8RY825Nb9q9yMrPcOYuPE2GrZWxUh59XnHI=
github.com/VictoriaMetrics/metrics v1.9.3/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
github.com/VictoriaMetrics/fastcache v1.5.7 h1:4y6y0G8PRzszQUYIQHHssv/jgPHAb5qQuuDNdCbyAgw=
github.com/VictoriaMetrics/fastcache v1.5.7/go.mod h1:ptDBkNMQI4RtmVo8VS/XwRY6RoTu1dAWCbrk+6WsEM8=
github.com/VictoriaMetrics/metrics v1.11.0 h1:sfRmbgk7hGrxNXrziwyTmU8FZFLFrPNC7g2kZs0xMTQ=
github.com/VictoriaMetrics/metrics v1.11.0/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
github.com/aws/aws-sdk-go v1.26.8 h1:W+MPuCFLSO/itZkZ5GFOui0YC1j3lZ507/m5DFPtzE4=
github.com/aws/aws-sdk-go v1.26.8/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.29.10 h1:QJOQq1xNmdrY5mXUmC8CHXzZPve8134Bx/Ux0o6s38s=
github.com/aws/aws-sdk-go v1.29.10/go.mod h1:1KvfttTE3SPKMpo8g2c6jL3ZKfXtFvKscTgahTma5Xg=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -39,35 +54,48 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 h1:uHTyIjqVhYRhLbJ8nIiOJHkEZZ+5YoOsAbD3sk82NiE=
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3 h1:gyjaxf+svBWX08ZjK86iN9geUJF0H6gp2IRKX6Nf6/I=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
@@ -76,29 +104,33 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.9.4 h1:xhvAeUPQ2drNUhKtrGdTGNvV9nNafHMUkRyLkzxJoB4=
github.com/klauspost/compress v1.9.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.10.1 h1:a/QY0o9S6wCi0XhxaMX/QmusicNUqCqFugR6WKPOSoQ=
github.com/klauspost/compress v1.10.1/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.2.0/go.mod h1:4vX61m6KN+xDduDNwXrhIAVZaZaZiQ1luJk8LWSxF3s=
github.com/valyala/fastjson v1.4.1 h1:hrltpHpIpkaxll8QltMU8c3QZ5+qIiCL8yKqPFJI/yE=
github.com/valyala/fastjson v1.4.1/go.mod h1:nV6MsjxL2IMJQUoHDIrjEI7oLyeqK6aBD7EFWPsvP8o=
github.com/valyala/fasthttp v1.9.0 h1:hNpmUdy/+ZXYpGy0OBfm7K0UQTzb73W0T0U4iJIVrMw=
github.com/valyala/fasthttp v1.9.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
github.com/valyala/fastjson v1.5.0 h1:DGrb4wEYso2HdGLyLmNoyNCQnCWfjd8yhghPv5/5YQg=
github.com/valyala/fastjson v1.5.0/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/gozstd v1.6.4 h1:nFLddjEf90SFl5cVWyElSHozQDsbvLljPK703/skBS0=
@@ -112,6 +144,8 @@ go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2 h1:75k/FF0Q2YM8QYo07VPddOLBslDt1MZOdEslOHvmzAs=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.3 h1:8sGtKOrtQqkN1bp2AtX+misvLIlOmsEsNd+9NIcPEm8=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
@@ -120,12 +154,14 @@ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136 h1:A1gGSx58LAGVHUUsOf7IiR0u8Xb6W51gRwfDBhkdcaw=
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
golang.org/x/exp v0.0.0-20191129062945-2f5052295587 h1:5Uz0rkjCFu9BC9gCRN7EkwVvhNyQgGWb8KNJrPwBoHY=
golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20191224044220-1fea468a75e9 h1:HLuLY2KniBsHW28uXd1i2UZKjifeJUy//P/wTK6AJwI=
golang.org/x/exp v0.0.0-20191224044220-1fea468a75e9/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd h1:zkO/Lhoka23X63N9OSzpSeROEUQ5ODw47tM3YWjygbs=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6 h1:QE6XYQK6naiK1EPAe1g/ILLxN5RBoH5xkJk3CqlMI/Y=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -134,15 +170,20 @@ golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTk
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f h1:J5lckAjkw6qYlOZNj90mLYNTEKDvWeuc1yieZ8qUzUE=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
golang.org/x/lint v0.0.0-20200130185559-910be7a94367 h1:0IiAsCRByjO2QjX7ZPkw5oU9x+n1YqRL802rjC0c3Aw=
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee h1:WG0RUwxtNT4qqaXX3DPA8zHFNm/D9xaBpxzHt1WcA/E=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -155,19 +196,24 @@ golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553 h1:efeOvDhwQ29Dj3SdAV/MJf8oukgn+8D8WgaCaRMchF8=
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b h1:0mm1VjtFUOIlE1SbDlwjYaDxZVDP2S5ou6y0gSgXHu8=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6 h1:pE8b58s1HRDMi8RDc79m0HISf9D4TzseP40cEA6IGfs=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -180,14 +226,24 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191224085550-c709ea063b76 h1:Dho5nD6R3PcW2SH1or8vS0dszDaXRxIw55lBX7XiE5g=
golang.org/x/sys v0.0.0-20191224085550-c709ea063b76/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4 h1:sfkvUWPNGwSV+8/fNqctR5lS2AqCSqYwXdrjCxp/dXo=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
@@ -203,21 +259,39 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20191224055732-dd894d0a8a40 h1:UyP2XDSgSc8ldYCxAK735zQxeH3Gd81sK7Iy7AoaVxk=
golang.org/x/tools v0.0.0-20191224055732-dd894d0a8a40/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200226180945-26f6a1b6802d h1:pUSEBYeASep5mmhgY5ZgD6zz3TDJ4SWJwaepxO+tJog=
golang.org/x/tools v0.0.0-20200226180945-26f6a1b6802d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.14.0 h1:uMf5uLi4eQMRrMKhCplNik4U4H8Z6C1br3zOtAa/aDE=
google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.15.0 h1:yzlyyDW/J0w8yNFJIhiAJy4kq74S+1DOLdawELNxFMA=
google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.17.0 h1:0q95w+VuFtv4PAx4PZVQdBMmYbaCHbnfKaEiDIcVyag=
google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.19.0 h1:GwFK8+l5/gdsOYKz5p6M4UK+QT8OvmHWZPJCnf+5DjA=
google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
@@ -232,26 +306,43 @@ google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRn
google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191223191004-3caeed10a8bf h1:1x8rC5/IgdLMPbPTvlQTN28+rcy8XL9Q19UWUMDyqYs=
google.golang.org/genproto v0.0.0-20191223191004-3caeed10a8bf/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce h1:1mbrb1tUU+Zmt5C94IGKADBTJZjZXAd+BubWi7r9EiI=
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200225123651-fc8f55426688 h1:1+0Z5cgv1eDXJD9z2tdQF9PSSQnJXwism490hJydMRI=
google.golang.org/genproto v0.0.0-20200225123651-fc8f55426688/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg=
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3 h1:sXmLre5bzIR6ypkjXCDI3jHPssRhc8KD/Ome589sc3U=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fsnil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -41,17 +42,33 @@ type Backup struct {
// Run runs b with the provided settings.
func (b *Backup) Run() error {
startTime := time.Now()
concurrency := b.Concurrency
src := b.Src
dst := b.Dst
origin := b.Origin
if origin != nil && origin.String() == dst.String() {
origin = nil
}
if origin == nil {
origin = &fsnil.FS{}
}
if err := dst.DeleteFile(fscommon.BackupCompleteFilename); err != nil {
return fmt.Errorf("cannot delete `backup complete` file at %s: %s", dst, err)
}
if err := runBackup(src, dst, origin, concurrency); err != nil {
return err
}
if err := dst.CreateFile(fscommon.BackupCompleteFilename, []byte("ok")); err != nil {
return fmt.Errorf("cannot create `backup complete` file at %s: %s", dst, err)
}
return nil
}
func runBackup(src *fslocal.FS, dst common.RemoteFS, origin common.OriginFS, concurrency int) error {
startTime := time.Now()
logger.Infof("starting backup from %s to %s using origin %s", src, dst, origin)
logger.Infof("obtaining list of parts at %s", src)
@@ -149,8 +166,8 @@ func (b *Backup) Run() error {
}
}
logger.Infof("backed up %d bytes in %s; deleted %d bytes; server-side copied %d bytes; uploaded %d bytes",
backupSize, time.Since(startTime), deleteSize, copySize, uploadSize)
logger.Infof("backed up %d bytes in %.3f seconds; deleted %d bytes; server-side copied %d bytes; uploaded %d bytes",
backupSize, time.Since(startTime).Seconds(), deleteSize, copySize, uploadSize)
return nil
}

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -29,6 +30,11 @@ type Restore struct {
// If dst points to existing directory, then incremental restore is performed,
// i.e. only new data is downloaded from src.
Dst *fslocal.FS
// SkipBackupCompleteCheck may be set in order to skip for `backup complete` file in Src.
//
// This may be needed for restoring from old backups with missing `backup complete` file.
SkipBackupCompleteCheck bool
}
// Run runs r with the provided settings.
@@ -48,6 +54,18 @@ func (r *Restore) Run() error {
concurrency := r.Concurrency
src := r.Src
dst := r.Dst
if !r.SkipBackupCompleteCheck {
ok, err := src.HasFile(fscommon.BackupCompleteFilename)
if err != nil {
return err
}
if !ok {
return fmt.Errorf("cannot find %s file in %s; this means either incomplete backup or old backup; "+
"pass -skipBackupCompleteCheck command-line flag if you still need restoring from this backup", fscommon.BackupCompleteFilename, src)
}
}
logger.Infof("starting restore from %s to %s", src, dst)
logger.Infof("obtaining list of parts at %s", src)
@@ -164,7 +182,8 @@ func (r *Restore) Run() error {
}
}
logger.Infof("restored %d bytes from backup in %s; deleted %d bytes; downloaded %d bytes", backupSize, time.Since(startTime), deleteSize, downloadSize)
logger.Infof("restored %d bytes from backup in %.3f seconds; deleted %d bytes; downloaded %d bytes",
backupSize, time.Since(startTime).Seconds(), deleteSize, downloadSize)
return nil
}

View File

@@ -38,4 +38,13 @@ type RemoteFS interface {
// UploadPart must upload part p from r to RemoteFS.
UploadPart(p Part, r io.Reader) error
// DeleteFile deletes filePath at RemoteFS
DeleteFile(filePath string) error
// CreateFile creates filePath at RemoteFS and puts data into it.
CreateFile(filePath string, data []byte) error
// HasFile returns true if filePath exists at RemoteFS.
HasFile(filePath string) (bool, error)
}

View File

@@ -260,3 +260,11 @@ func removeEmptyDirsInternal(d *os.File) (bool, error) {
}
return true, nil
}
// IgnorePath returns true if the given path must be ignored.
func IgnorePath(path string) bool {
return strings.HasSuffix(path, ".ignore")
}
// BackupCompleteFilename is a filename, which is created in the destination fs when backup is complete.
const BackupCompleteFilename = "backup_complete.ignore"

View File

@@ -3,6 +3,7 @@ package fsremote
import (
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
@@ -48,6 +49,9 @@ func (fs *FS) ListParts() ([]common.Part, error) {
if !strings.HasPrefix(file, dir) {
logger.Panicf("BUG: unexpected prefix for file %q; want %q", file, dir)
}
if fscommon.IgnorePath(file) {
continue
}
var p common.Part
if !p.ParseFromRemotePath(file[len(dir):]) {
logger.Infof("skipping unknown file %s", file)
@@ -188,3 +192,42 @@ func (fs *FS) mkdirAll(filePath string) error {
func (fs *FS) path(p common.Part) string {
return p.RemotePath(fs.Dir)
}
// DeleteFile deletes filePath at fs.
//
// The function does nothing if the filePath doesn't exist.
func (fs *FS) DeleteFile(filePath string) error {
path := filepath.Join(fs.Dir, filePath)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("cannot remove %q: %s", path, err)
}
return nil
}
// CreateFile creates filePath at fs and puts data into it.
//
// The file is overwritten if it exists.
func (fs *FS) CreateFile(filePath string, data []byte) error {
path := filepath.Join(fs.Dir, filePath)
if err := ioutil.WriteFile(path, data, 0600); err != nil {
return fmt.Errorf("cannot write %d bytes to %q: %s", len(data), path, err)
}
return nil
}
// HasFile returns true if filePath exists at fs.
func (fs *FS) HasFile(filePath string) (bool, error) {
path := filepath.Join(fs.Dir, filePath)
fi, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return false, nil
}
return false, fmt.Errorf("cannot stat %q: %s", path, err)
}
if fi.IsDir() {
return false, fmt.Errorf("%q is directory, while file is needed", path)
}
return true, nil
}

View File

@@ -8,6 +8,7 @@ import (
"cloud.google.com/go/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
@@ -97,6 +98,9 @@ func (fs *FS) ListParts() ([]common.Part, error) {
if !strings.HasPrefix(file, dir) {
return nil, fmt.Errorf("unexpected prefix for gcs key %q; want %q", file, dir)
}
if fscommon.IgnorePath(file) {
continue
}
var p common.Part
if !p.ParseFromRemotePath(file[len(dir):]) {
logger.Infof("skipping unknown object %q", file)
@@ -187,3 +191,56 @@ func (fs *FS) object(p common.Part) *storage.ObjectHandle {
path := p.RemotePath(fs.Dir)
return fs.bkt.Object(path)
}
// DeleteFile deletes filePath at fs if it exists.
//
// The function does nothing if the filePath doesn't exists.
func (fs *FS) DeleteFile(filePath string) error {
path := fs.Dir + filePath
o := fs.bkt.Object(path)
ctx := context.Background()
if err := o.Delete(ctx); err != nil {
if err != storage.ErrObjectNotExist {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %s", filePath, fs, o.ObjectName(), err)
}
}
return nil
}
// CreateFile creates filePath at fs and puts data into it.
//
// The file is overwritten if it exists.
func (fs *FS) CreateFile(filePath string, data []byte) error {
path := fs.Dir + filePath
o := fs.bkt.Object(path)
ctx := context.Background()
w := o.NewWriter(ctx)
n, err := w.Write(data)
if err != nil {
_ = w.Close()
return fmt.Errorf("cannot upload %d bytes to %q at %s (remote path %q): %s", len(data), filePath, fs, o.ObjectName(), err)
}
if n != len(data) {
_ = w.Close()
return fmt.Errorf("wrong data size uploaded to %q at %s (remote path %q); got %d bytes; want %d bytes", filePath, fs, o.ObjectName(), n, len(data))
}
if err := w.Close(); err != nil {
return fmt.Errorf("cannot close %q at %s (remote path %q): %s", filePath, fs, o.ObjectName(), err)
}
return nil
}
// HasFile returns ture if filePath exists at fs.
func (fs *FS) HasFile(filePath string) (bool, error) {
path := fs.Dir + filePath
o := fs.bkt.Object(path)
ctx := context.Background()
_, err := o.Attrs(ctx)
if err != nil {
if err == storage.ErrObjectNotExist {
return false, nil
}
return false, fmt.Errorf("unexpected error when obtaining attributes for %q at %s (remote path %q): %s", filePath, fs, o.ObjectName(), err)
}
return true, nil
}

View File

@@ -1,14 +1,17 @@
package s3remote
import (
"bytes"
"context"
"fmt"
"io"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
@@ -113,6 +116,9 @@ func (fs *FS) ListParts() ([]common.Part, error) {
errOuter = fmt.Errorf("unexpected prefix for s3 key %q; want %q", file, dir)
return false
}
if fscommon.IgnorePath(file) {
continue
}
var p common.Part
if !p.ParseFromRemotePath(file[len(dir):]) {
logger.Infof("skipping unknown object %q", file)
@@ -220,6 +226,78 @@ func (fs *FS) UploadPart(p common.Part, r io.Reader) error {
return nil
}
// DeleteFile deletes filePath from fs if it exists.
//
// The function does nothing if the file doesn't exist.
func (fs *FS) DeleteFile(filePath string) error {
// It looks like s3 may return `AccessDenied: Access Denied` instead of `s3.ErrCodeNoSuchKey`
// on an attempt to delete non-existing file.
// so just check whether the filePath exists before deleting it.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/284 for details.
ok, err := fs.HasFile(filePath)
if err != nil {
return err
}
if !ok {
// Missing file - nothing to delete.
return nil
}
path := fs.Dir + filePath
input := &s3.DeleteObjectInput{
Bucket: aws.String(fs.Bucket),
Key: aws.String(path),
}
if _, err := fs.s3.DeleteObject(input); err != nil {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %s", filePath, fs, path, err)
}
return nil
}
// CreateFile creates filePath at fs and puts data into it.
//
// The file is overwritten if it already exists.
func (fs *FS) CreateFile(filePath string, data []byte) error {
path := fs.Dir + filePath
sr := &statReader{
r: bytes.NewReader(data),
}
input := &s3manager.UploadInput{
Bucket: aws.String(fs.Bucket),
Key: aws.String(path),
Body: sr,
}
_, err := fs.uploader.Upload(input)
if err != nil {
return fmt.Errorf("cannot upoad data to %q at %s (remote path %q): %s", filePath, fs, path, err)
}
l := int64(len(data))
if sr.size != l {
return fmt.Errorf("wrong data size uploaded to %q at %s; got %d bytes; want %d bytes", filePath, fs, sr.size, l)
}
return nil
}
// HasFile returns true if filePath exists at fs.
func (fs *FS) HasFile(filePath string) (bool, error) {
path := fs.Dir + filePath
input := &s3.GetObjectInput{
Bucket: aws.String(fs.Bucket),
Key: aws.String(path),
}
o, err := fs.s3.GetObject(input)
if err != nil {
if ae, ok := err.(awserr.Error); ok && ae.Code() == s3.ErrCodeNoSuchKey {
return false, nil
}
return false, fmt.Errorf("cannot open %q at %s (remote path %q): %s", filePath, fs, path, err)
}
if err := o.Body.Close(); err != nil {
return false, fmt.Errorf("cannot close %q at %s (remote path %q): %s", filePath, fs, path, err)
}
return true, nil
}
func (fs *FS) path(p common.Part) string {
return p.RemotePath(fs.Dir)
}

View File

@@ -11,9 +11,9 @@ import (
var (
// Verify ByteBuffer implements the given interfaces.
_ io.Writer = &ByteBuffer{}
_ fs.ReadAtCloser = &ByteBuffer{}
_ io.ReaderFrom = &ByteBuffer{}
_ io.Writer = &ByteBuffer{}
_ fs.MustReadAtCloser = &ByteBuffer{}
_ io.ReaderFrom = &ByteBuffer{}
// Verify reader implement filestream.ReadCloser interface.
_ filestream.ReadCloser = &reader{}
@@ -36,8 +36,8 @@ func (bb *ByteBuffer) Write(p []byte) (int, error) {
return len(p), nil
}
// ReadAt reads len(p) bytes starting from the given offset.
func (bb *ByteBuffer) ReadAt(p []byte, offset int64) {
// MustReadAt reads len(p) bytes starting from the given offset.
func (bb *ByteBuffer) MustReadAt(p []byte, offset int64) {
if offset < 0 {
logger.Panicf("BUG: cannot read at negative offset=%d", offset)
}

View File

@@ -218,7 +218,7 @@ func TestByteBufferRead(t *testing.T) {
}
}
func TestByteBufferReadAt(t *testing.T) {
func TestByteBufferMustReadAt(t *testing.T) {
testStr := "foobar baz"
var bb ByteBuffer
@@ -232,7 +232,7 @@ func TestByteBufferReadAt(t *testing.T) {
t.Fatalf("expecting non-nil error when reading at negative offset")
}
}()
bb.ReadAt(p, -1)
bb.MustReadAt(p, -1)
}()
// Try reading past the end of buffer
@@ -242,18 +242,18 @@ func TestByteBufferReadAt(t *testing.T) {
t.Fatalf("expecting non-nil error when reading past the end of buffer")
}
}()
bb.ReadAt(p, int64(len(testStr))+1)
bb.MustReadAt(p, int64(len(testStr))+1)
}()
// Try reading the first byte
n := len(p)
bb.ReadAt(p, 0)
bb.MustReadAt(p, 0)
if string(p) != testStr[:n] {
t.Fatalf("unexpected value read: %q; want %q", p, testStr[:n])
}
// Try reading the last byte
bb.ReadAt(p, int64(len(testStr))-1)
bb.MustReadAt(p, int64(len(testStr))-1)
if string(p) != testStr[len(testStr)-1:] {
t.Fatalf("unexpected value read: %q; want %q", p, testStr[len(testStr)-1:])
}
@@ -266,18 +266,18 @@ func TestByteBufferReadAt(t *testing.T) {
}
}()
p := make([]byte, 10)
bb.ReadAt(p, int64(len(testStr))-3)
bb.MustReadAt(p, int64(len(testStr))-3)
}()
// Try reading multiple bytes from the middle
p = make([]byte, 3)
bb.ReadAt(p, 2)
bb.MustReadAt(p, 2)
if string(p) != testStr[2:2+len(p)] {
t.Fatalf("unexpected value read: %q; want %q", p, testStr[2:2+len(p)])
}
}
func TestByteBufferReadAtParallel(t *testing.T) {
func TestByteBufferMustReadAtParallel(t *testing.T) {
ch := make(chan error, 10)
var bb ByteBuffer
bb.B = []byte("foo bar baz adsf adsf dsakjlkjlkj2l34324")
@@ -285,7 +285,7 @@ func TestByteBufferReadAtParallel(t *testing.T) {
go func() {
p := make([]byte, 3)
for i := 0; i < len(bb.B)-len(p); i++ {
bb.ReadAt(p, int64(i))
bb.MustReadAt(p, int64(i))
}
ch <- nil
}()

52
lib/envflag/envflag.go Normal file
View File

@@ -0,0 +1,52 @@
package envflag
import (
"flag"
"log"
"os"
"strings"
)
var enable = flag.Bool("envflag.enable", false, "Whether to enable reading flags from environment variables additionally to command line. "+
"Command line flag values have priority over values from environment vars. "+
"Flags are read only from command line if this flag isn't set")
// Parse parses environment vars and command-line flags.
//
// Flags set via command-line override flags set via environment vars.
//
// This function must be called instead of flag.Parse() before using any flags in the program.
func Parse() {
flag.Parse()
if !*enable {
return
}
// Remember explicitly set command-line flags.
flagsSet := make(map[string]bool)
flag.Visit(func(f *flag.Flag) {
flagsSet[f.Name] = true
})
// Obtain the remaining flag values from environment vars.
flag.VisitAll(func(f *flag.Flag) {
if flagsSet[f.Name] {
// The flag is explicitly set via command-line.
return
}
// Get flag value from environment var.
fname := getEnvFlagName(f.Name)
if v, ok := os.LookupEnv(fname); ok {
if err := f.Value.Set(v); err != nil {
// Do not use lib/logger here, since it is uninitialized yet.
log.Fatalf("cannot set flag %s to %q, which is read from environment variable %q: %s", f.Name, v, fname, err)
}
}
})
}
func getEnvFlagName(s string) string {
// Substitute dots with underscores, since env var names cannot contain dots.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/311#issuecomment-586354129 for details.
return strings.ReplaceAll(s, ".", "_")
}

View File

@@ -212,7 +212,7 @@ func newWriter(f *os.File, nocache bool) *Writer {
// MustClose syncs the underlying file to storage and then closes it.
func (w *Writer) MustClose() {
if err := w.bw.Flush(); err != nil {
logger.Panicf("FATAL: cannot flush data to file %q: %s", w.f.Name(), err)
logger.Panicf("FATAL: cannot flush buffered data to file %q: %s", w.f.Name(), err)
}
putBufioWriter(w.bw)
w.bw = nil
@@ -253,6 +253,14 @@ func (w *Writer) Write(p []byte) (int, error) {
return n, nil
}
// MustFlush flushes all the buffered data to file.
func (w *Writer) MustFlush() {
if err := w.bw.Flush(); err != nil {
logger.Panicf("FATAL: cannot flush buffered data to file %q: %s", w.f.Name(), err)
}
// Do not call w.f.Sync() for performance reasons.
}
type statWriter struct {
*os.File
}

28
lib/flagutil/array.go Normal file
View File

@@ -0,0 +1,28 @@
package flagutil
import (
"flag"
"strings"
)
// NewArray returns new Array with the given name and descprition.
func NewArray(name, description string) *Array {
var a Array
flag.Var(&a, name, description)
return &a
}
// Array holds an array of flag values
type Array []string
// String implements flag.Value interface
func (a *Array) String() string {
return strings.Join(*a, ",")
}
// Set implements flag.Value interface
func (a *Array) Set(value string) error {
values := strings.Split(value, ",")
*a = append(*a, values...)
return nil
}

View File

@@ -0,0 +1,34 @@
package flagutil
import (
"flag"
"os"
"testing"
)
var fooFlag Array
func init() {
os.Args = append(os.Args, "--fooFlag=foo", "--fooFlag=bar")
flag.Var(&fooFlag, "fooFlag", "test")
}
func TestMain(m *testing.M) {
flag.Parse()
os.Exit(m.Run())
}
func TestArray(t *testing.T) {
expected := map[string]struct{}{
"foo": {},
"bar": {},
}
if len(expected) != len(fooFlag) {
t.Errorf("len array flag (%d) is not equal to %d", len(fooFlag), len(expected))
}
for _, i := range fooFlag {
if _, ok := expected[i]; !ok {
t.Errorf("unexpected item in array %v", i)
}
}
}

34
lib/fs/copy_mmap_cgo.go Normal file
View File

@@ -0,0 +1,34 @@
// +build cgo
package fs
// #cgo CFLAGS: -O3
//
// #include <stdint.h> // for uintptr_t
// #include <string.h> // for memcpy
//
// // The memcpy_wrapper allows avoiding memory allocations during calls from Go.
// // See https://github.com/golang/go/issues/24450 .
// static void memcpy_wrapper(uintptr_t dst, uintptr_t src, size_t n) {
// memcpy((void*)dst, (void*)src, n);
// }
import "C"
import (
"runtime"
"unsafe"
)
// copyMmap copies len(dst) bytes from src to dst.
func copyMmap(dst, src []byte) {
// Copy data from mmap'ed src via cgo call in order to protect from goroutine stalls
// when the copied data isn't available in RAM, so the OS triggers reading the data from file.
// See https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d for details.
dstPtr := C.uintptr_t(uintptr(unsafe.Pointer(&dst[0])))
srcPtr := C.uintptr_t(uintptr(unsafe.Pointer(&src[0])))
C.memcpy_wrapper(dstPtr, srcPtr, C.size_t(len(dst)))
// Prevent from GC'ing src or dst during C.memcpy_wrapper call.
runtime.KeepAlive(src)
runtime.KeepAlive(dst)
}

12
lib/fs/copy_mmap_nocgo.go Normal file
View File

@@ -0,0 +1,12 @@
// +build !cgo
package fs
// copyMmap copies len(dst) bytes from src to dst.
func copyMmap(dst, src []byte) {
// This may lead to goroutines stalls when the copied data isn't available in RAM.
// In this case the OS triggers reading the data from file.
// See https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d for details.
// TODO: fix this
copy(dst, src)
}

View File

@@ -4,9 +4,7 @@ import (
"os"
)
// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
//
// if prefetch is set, then the OS is hinted to prefetch f data.
func MustFadviseSequentialRead(f *os.File, prefetch bool) {
func fadviseSequentialRead(f *os.File, prefetch bool) error {
// TODO: implement this properly
return nil
}

View File

@@ -3,22 +3,20 @@
package fs
import (
"fmt"
"os"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"golang.org/x/sys/unix"
)
// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
//
// if prefetch is set, then the OS is hinted to prefetch f data.
func MustFadviseSequentialRead(f *os.File, prefetch bool) {
func fadviseSequentialRead(f *os.File, prefetch bool) error {
fd := int(f.Fd())
mode := unix.FADV_SEQUENTIAL
if prefetch {
mode |= unix.FADV_WILLNEED
}
if err := unix.Fadvise(int(fd), 0, 0, mode); err != nil {
logger.Panicf("FATAL: error returned from unix.Fadvise(%d): %s", mode, err)
return fmt.Errorf("error returned from unix.Fadvise(%d): %s", mode, err)
}
return nil
}

View File

@@ -10,69 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"golang.org/x/sys/unix"
)
// ReadAtCloser is rand-access read interface.
type ReadAtCloser interface {
// ReadAt must read len(p) bytes from offset off to p.
ReadAt(p []byte, off int64)
// MustClose must close the reader.
MustClose()
}
// ReaderAt implements rand-access read.
type ReaderAt struct {
f *os.File
}
// ReadAt reads len(p) bytes from off to p.
func (ra *ReaderAt) ReadAt(p []byte, off int64) {
if len(p) == 0 {
return
}
n, err := ra.f.ReadAt(p, off)
if err != nil {
logger.Panicf("FATAL: cannot read %d bytes at offset %d of file %q: %s", len(p), off, ra.f.Name(), err)
}
if n != len(p) {
logger.Panicf("FATAL: unexpected number of bytes read; got %d; want %d", n, len(p))
}
readCalls.Inc()
readBytes.Add(len(p))
}
// MustClose closes ra.
func (ra *ReaderAt) MustClose() {
if err := ra.f.Close(); err != nil {
logger.Panicf("FATAL: cannot close file %q: %s", ra.f.Name(), err)
}
readersCount.Dec()
}
// OpenReaderAt opens a file on the given path for random-read access.
//
// The file must be closed with MustClose when no longer needed.
func OpenReaderAt(path string) (*ReaderAt, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
readersCount.Inc()
ra := &ReaderAt{
f: f,
}
return ra, nil
}
var (
readCalls = metrics.NewCounter(`vm_fs_read_calls_total`)
readBytes = metrics.NewCounter(`vm_fs_read_bytes_total`)
readersCount = metrics.NewCounter(`vm_fs_readers`)
)
// MustSyncPath syncs contents of the given path.
func MustSyncPath(path string) {
d, err := os.Open(path)

128
lib/fs/reader_at.go Normal file
View File

@@ -0,0 +1,128 @@
package fs
import (
"flag"
"fmt"
"os"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"golang.org/x/sys/unix"
)
var disableMmap = flag.Bool("fs.disableMmap", false, "Whether to use pread() instead of mmap() for reading data files")
// MustReadAtCloser is rand-access read interface.
type MustReadAtCloser interface {
// MustReadAt must read len(p) bytes from offset off to p.
MustReadAt(p []byte, off int64)
// MustClose must close the reader.
MustClose()
}
// ReaderAt implements rand-access reader.
type ReaderAt struct {
f *os.File
mmapData []byte
}
// MustReadAt reads len(p) bytes at off from r.
func (r *ReaderAt) MustReadAt(p []byte, off int64) {
if len(p) == 0 {
return
}
if len(r.mmapData) == 0 || len(p) > 8*1024 {
// Read big blocks directly from file.
// This could be faster than reading these blocks from mmap,
// since it triggers less page faults.
n, err := r.f.ReadAt(p, off)
if err != nil {
logger.Panicf("FATAL: cannot read %d bytes at offset %d of file %q: %s", len(p), off, r.f.Name(), err)
}
if n != len(p) {
logger.Panicf("FATAL: unexpected number of bytes read; got %d; want %d", n, len(p))
}
} else {
if off < 0 || off > int64(len(r.mmapData)-len(p)) {
logger.Panicf("off=%d is out of allowed range [0...%d] for len(p)=%d", off, len(r.mmapData)-len(p), len(p))
}
copyMmap(p, r.mmapData[off:])
}
readCalls.Inc()
readBytes.Add(len(p))
}
// MustClose closes r.
func (r *ReaderAt) MustClose() {
fname := r.f.Name()
if len(r.mmapData) > 0 {
if err := unix.Munmap(r.mmapData); err != nil {
logger.Panicf("FATAL: cannot unmap data for file %q: %s", fname, err)
}
r.mmapData = nil
}
MustClose(r.f)
r.f = nil
readersCount.Dec()
}
// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
//
// if prefetch is set, then the OS is hinted to prefetch f data.
func (r *ReaderAt) MustFadviseSequentialRead(prefetch bool) {
if err := fadviseSequentialRead(r.f, prefetch); err != nil {
logger.Panicf("FATAL: error in fadviseSequentialRead(%q, %v): %s", r.f.Name(), prefetch, err)
}
}
// OpenReaderAt opens ReaderAt for reading from filename.
//
// MustClose must be called on the returned ReaderAt when it is no longer needed.
func OpenReaderAt(path string) (*ReaderAt, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("cannot open file %q for reader: %s", path, err)
}
var r ReaderAt
r.f = f
if !*disableMmap {
data, err := mmapFile(f)
if err != nil {
MustClose(f)
return nil, fmt.Errorf("cannot init reader for %q: %s", path, err)
}
r.mmapData = data
}
r.MustFadviseSequentialRead(false)
readersCount.Inc()
return &r, nil
}
var (
readCalls = metrics.NewCounter(`vm_fs_read_calls_total`)
readBytes = metrics.NewCounter(`vm_fs_read_bytes_total`)
readersCount = metrics.NewCounter(`vm_fs_readers`)
)
func mmapFile(f *os.File) ([]byte, error) {
fi, err := f.Stat()
if err != nil {
return nil, fmt.Errorf("error in stat: %s", err)
}
size := fi.Size()
if size == 0 {
return nil, nil
}
if size < 0 {
return nil, fmt.Errorf("got negative file size: %d bytes", size)
}
if int64(int(size)) != size {
return nil, fmt.Errorf("file is too big to be mmap'ed: %d bytes", size)
}
data, err := unix.Mmap(int(f.Fd()), 0, int(size), unix.PROT_READ, unix.MAP_SHARED)
if err != nil {
return nil, fmt.Errorf("cannot mmap file with size %d: %s", size, err)
}
return data, nil
}

Some files were not shown because too many files have changed in this diff Show More