Compare commits

...

443 Commits

Author SHA1 Message Date
Aliaksandr Valialkin
fe736c5388 docs/CHANGELOG.md: cut v1.87.0 2023-02-01 13:03:11 -08:00
Aliaksandr Valialkin
607b542222 vendor: make vendor-update 2023-02-01 12:23:23 -08:00
Aliaksandr Valialkin
8b9ebf625a lib/promscrape: add a comment explaining the logic behind adding exported_ perfix to metric names
This is a follow-up for 7b87fac8e7

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3557
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3406
2023-02-01 12:00:52 -08:00
Dmytro Kozlov
7b87fac8e7 lib/promscrape: fix honor_labels behavior (#3739)
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-02-01 11:21:44 -08:00
Aliaksandr Valialkin
7b1caf1db3 docs/CHANGELOG.md: document 9254e494f9 2023-02-01 09:56:52 -08:00
Nikolay
9254e494f9 lib/storage: fixes finalDedup for backfilled data (#3737)
previously historical data backfilling may trigger force merge for previous month every hour
it consumes cpu, disk io and decrease cluster performance.
Following commit fixes it by applying deduplication for InMemoryParts
2023-02-01 09:54:21 -08:00
Zakhar Bessarab
68985455f1 fix: vmselect multi-level setup panic (#3738)
* app/vmselect/netstorage: fix panic for multi-level cluster setup when `replicationFactor` was set and request contained `trace` parameter (#3734)

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

* app/vmselect/netstorage: use correct context for retry

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

---------

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-02-01 08:59:30 -08:00
Zakhar Bessarab
4cf37c5e70 app/vmbackup: fix deleting snapshot after backup completion (#3735) (#3736)
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-02-01 11:23:58 +01:00
Aliaksandr Valialkin
3d331e4c5d app/vmselect/vmui: make vmui-update after dcc5616126 2023-01-31 13:24:43 -08:00
Aliaksandr Valialkin
4ecb61e247 docs/CHANGELOG.md: document 442a9f16b4
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3661
2023-01-31 13:03:46 -08:00
Denys Holius
442a9f16b4 Makefile: adds i386 architecture (#3725)
see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3661
2023-01-31 12:58:53 -08:00
Yury Molodov
dcc5616126 vmui: improvement the theme (#3731)
* feat: add detect the system theme

* fix: change logic fetch tenants

* feat: add docs and info to cardinality page

---------

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-31 12:54:59 -08:00
Aliaksandr Valialkin
080a3e2396 vendor: make vendor-update 2023-01-31 11:03:20 -08:00
Aliaksandr Valialkin
ac8bc77688 lib/bytesutil/internstring.go: increase the limit on the maximum string lengths, which can be interned
The limit has been increased from 300 bytes to 500 bytes according to the collected production stats.
This allows reducing CPU usage without significant increase of RAM usage in most practical cases.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3692
2023-01-31 10:56:55 -08:00
Roman Khavronenko
1cbdcd391c docs: mention -vmalert.proxyURL in vmalert docs (#3730)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-30 16:28:33 +01:00
Aliaksandr Valialkin
0788be35eb lib/promscrape/discovery/azure: add __meta_azure_machine_size label in the same way as Prometheus does
See https://github.com/prometheus/prometheus/pull/11650
2023-01-27 17:07:12 -08:00
Aliaksandr Valialkin
ab57b92932 lib/promscrape/discovery/kubernetes: add support for __meta_kubernetes_pod_container_id
See https://github.com/prometheus/prometheus/issues/11843
and https://github.com/prometheus/prometheus/pull/11844
2023-01-27 16:34:06 -08:00
Aliaksandr Valialkin
6a7faf9f22 vendor: make vendor-update 2023-01-27 15:57:38 -08:00
Yury Molodov
ac14d50c18 vmui: add select of Tenant ID (#3673)
* feat: add select of tenantID

* feat: replace tenantID to default url

* fix: move the tenantID selector to the top header

* fix: hide tenantID selector by condition

* fix: correct z-index

* app/vmselect/vmui: `make vmui-update`

---------

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-27 15:53:14 -08:00
Aliaksandr Valialkin
51ad94677c docs: update security chapters after bd716d1b0c 2023-01-27 15:45:35 -08:00
Denys Holius
bd716d1b0c Improving docs by adding additional security sections (#3713)
* docs/Cluster-VictoriaMetrics.md: adds security section

* docs/Quick-Start.md: adds Security recommendation section
2023-01-27 15:22:21 -08:00
Aliaksandr Valialkin
06f6b76521 app/vmagent: properly return 200 response code when importing data via Prometheus PushGateway protocol
This is the same fix as has been already applied to app/vminsert at cdb6d651e9

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1415
2023-01-27 14:40:02 -08:00
Aliaksandr Valialkin
a0c8b86eab docs/vmauth.md: update docs after ff39a91147
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3346
2023-01-27 14:10:19 -08:00
Aliaksandr Valialkin
ff39a91147 app/vmauth: limit the number of concurrent requests served by vmauth with the -maxConcurrentRequests command-line flag
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3346

This commit is based on the https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3486
2023-01-27 14:07:30 -08:00
Aliaksandr Valialkin
372b1688d7 app/vmauth: do not use net/http/httputil.ReverseProxy
This allows better controlling requests to backends and providing better error logging.
For example, if the backend was unavailable, then the ReverseProxy was logging the error
message without client ip and the initial request uri. This could harden debugging.

This is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3486
2023-01-27 13:40:05 -08:00
Aliaksandr Valialkin
1b81d8f542 lib/netutil: move IsTrivialNetworkError() function there, since it is used in multiple places across the code 2023-01-27 13:24:30 -08:00
Aliaksandr Valialkin
7e355080ce app/vmauth: pass the target url to reverse proxy via context.Value instead of request header
This is less hacky way, since it doesn't clash with request headers
2023-01-27 12:15:52 -08:00
Aliaksandr Valialkin
2afa4ae00a docs/managed-victoriametrics: typo fix in links to images 2023-01-27 11:36:20 -08:00
Aliaksandr Valialkin
6342eb5523 docs/assets/README.md: mention that locally placed doc-specific images simplify referring them from various views without the need to deal with folder prefixes 2023-01-27 11:29:23 -08:00
Aliaksandr Valialkin
54a0ccbaca docs/managed-victoriametrics/user-management.md: move the associated images to docs/managed-victoriametrics/ folder with user-management_ prefix according to docs/assets/README.md 2023-01-27 11:25:22 -08:00
Aliaksandr Valialkin
b28cf0faa8 docs/managed-victoriametrics/quickstart.md: move the associated images to docs/managed-victoriametrics/ folder with quickstart_ prefix according to docs/assets/README.md 2023-01-27 11:16:03 -08:00
Aliaksandr Valialkin
3251d392b5 docs/assets: add README.md with the explanation on which files can be put into the docs/assets folder 2023-01-27 11:02:16 -08:00
Aliaksandr Valialkin
119010f7f2 docs/Cluster-VictoriaMetrics.md: move Naive_cluster_scheme.png from the docs/assets/images/ folder into docs/ folder and add Cluster-VictoriaMetrics_ prefix to the image name
The docs/assets folder should be used only for assets specific to docs generation at https://docs.victoriametrics.com, e.g. css, js and images.

All the other assets related to specific docs should be placed in the same folder as the corresponding *.md file.
These assets should have the same name prefix as the corresponding doc file name. This simplifies tracking the lifetime of these assets.
For example, if the doc is removed, it is very easy to remove all assets associated with it with a simple `rm -rf docs/doc-name*` command.

This also simplifies generating correct urls for doc-specific assets from both https://docs.victoriametrics.com
and from https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/ - just refer to the asset name without any directory prefixes.
2023-01-27 10:54:27 -08:00
Aliaksandr Valialkin
eedb294754 lib/netutil: typo fix in the error message 2023-01-27 10:38:38 -08:00
dmitryk-dk
4250b67fe8 docs: move how to register from dbaas to docs 2023-01-27 14:07:09 +02:00
dmitryk-dk
465c889f7f docs: use absolute path 2023-01-27 14:07:09 +02:00
dmitryk-dk
834c18a346 docs: add documentation of user management on managed-vm 2023-01-27 14:07:09 +02:00
Aliaksandr Valialkin
36941d6d75 app/vmauth: consistency renaming: UserInfo.URLMap -> UserInfo.URLMaps
This is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3486
2023-01-27 00:19:02 -08:00
Aliaksandr Valialkin
558165521b docs/Cluster-VictoriaMetrics.md: update command-line descriptions after ebebaecd94 2023-01-27 00:04:41 -08:00
Aliaksandr Valialkin
0890adde67 docs: update command-line descriptions after 73256fe438 2023-01-27 00:00:37 -08:00
Aliaksandr Valialkin
28d92a2f31 lib/netutil: limit the time needed for reading proxy protocol headers
This should prevent from misconfigured proxies and from possible Slowloris-type DoS attacks
(see https://en.wikipedia.org/wiki/Slowloris_(computer_security) )

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3335
2023-01-26 23:46:51 -08:00
Aliaksandr Valialkin
cb374677a9 app/vmagent/prometheusimport: delete the temporary directory created by vmagent after the test is complete
This is a follow-up for 1cfa183c2b
2023-01-26 23:21:24 -08:00
Nikolay
73256fe438 lib/netutil: init implimentation of proxy protocol (#3687)
* lib/netutil: init implimentation of proxy protocol
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3335

* wip

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-26 23:08:35 -08:00
Aliaksandr Valialkin
2c1419c687 docs/CHANGELOG.md: make the description for the bugfix from 465a285324 more reader-friendly 2023-01-26 10:08:13 -08:00
Nikolay
465a285324 lib/storage: properly release parts inMerge lock (#3711)
if storage doesn't have enough disk space, finalDedupWatcher holds inMerge lock for all parts and never release it until storage restart
2023-01-26 08:05:20 -08:00
Roman Khavronenko
95ee86b600 docs: specify the time window for series_limit (#3708)
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-25 09:30:20 -08:00
Aliaksandr Valialkin
28f66f0079 docs: update the list of command-line flags according to the latest changes 2023-01-25 09:20:24 -08:00
Aliaksandr Valialkin
d655d6b047 lib/streamaggr: add ability to de-duplicate input samples before aggregation 2023-01-25 09:14:49 -08:00
Yury Molodov
99d49e3ceb vmui: include fonts in its bundle (#3705)
* feat: include fonts in the build

* fix: reduce size fonts

* wip

- Document the change at docs/CHANGELOG.md
- Run `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-24 09:30:56 -08:00
Yury Molodov
20ad848c5d vmui: improvements to the UI styles (#3704)
* feat: add dark theme

* update packages

* feat: add multilevel menu (#3678)

* fix: correct styles

* fix: update link to cardinality-explorer

* fix: remove unused scss variables

* docs/CHANGELOG.md: document the changes

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-24 09:20:31 -08:00
Roman Khavronenko
1a8875b417 discover/ec2: follow-up after e2b4ab8384 (#3703)
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-24 11:02:18 +01:00
Roman Khavronenko
c7c4786f3f discover/ec2: bump API version (#3702)
Switch to the actual API version `2016-11-15`,
since the old version doesn't provide access to all
the fields which implementation expects.
For example, old API missing `zone_id` field
in `DescribeAvailabilityZonesResponse` response.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3700

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-24 10:42:55 +01:00
Aliaksandr Valialkin
a971bcc3fe lib/bytesutil: do not intern long strings, since they may need big amounts of additional memory for the cache
Allow users fine-tuning the maximum string length for interning via -internStringMaxLen command-line flag.
This may be used for fine-tuning RAM vs CPU usage for certain workloads.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3692
2023-01-23 23:36:22 -08:00
Aliaksandr Valialkin
0fd440cdb4 deployment: sync with cluster branch 2023-01-23 22:59:00 -08:00
Aliaksandr Valialkin
c496f06ca3 app/vmagent/{promremotewrite,vmimport}: remove unused functions InsertHandlerForReader()
Thanks to 1cfa183c2b , where the first such function has been removed
2023-01-23 22:31:29 -08:00
Aliaksandr Valialkin
f7acdb13db app/{vmagent,vminsert}: follow-up for 1cfa183c2b
- Call httpserver.GetQuotedRemoteAddr() and httpserver.GetRequestURI() only when the error occurs.
  This saves CPU time on fast path when there are no parsing errors.
- Create a helper function - httpserver.LogError() - for logging the error with the request uri and remote addr context.
2023-01-23 22:26:53 -08:00
Artem Navoiev
1cfa183c2b add error handler for parsing prometheus text format to vmagent and v… (#3693)
* add error handler for parsing prometheus text format to vmagent and vminsert

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

* fix typo

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

* typo

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

* fix variables naming and error message

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-23 22:14:34 -08:00
Yury Molodov
3536bef36e vmui: add open graph and twitter card tags (#3697)
* feat: add open graph and twitter card tags

* app/vmui: spelling fixes

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-23 22:04:46 -08:00
Aliaksandr Valialkin
babecd8363 lib/promscrape: follow-up for 393876e52a
- Document the change in docs/CHANGELOG.md
- Reduce memory usage when sending stale markers even more by parsing the response in stream parsing mode
- Update the TestSendStaleSeries

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3675
2023-01-23 21:52:59 -08:00
Roman Khavronenko
393876e52a lib/promscrape: limit number of sent stale series at once (#3686)
Stale series are sent when there is a difference between current
and previous scrapes. Those series which disappeared in the current scrape
are marked as stale and sent to the remote storage.

Sending stale series requires memory allocation and in case when too many
series disappear in the same it could result in noticeable memory spike.
For example, re-deploy of a big fleet of service can result into
excessive memory usage for vmagent, because all the series with old
pod name will be marked as stale and sent to the remote write storage.

This change limits the number of stale series which can be sent at once,
so memory usage remains steady.

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3675
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-23 21:15:59 -08:00
Aliaksandr Valialkin
2c4e384f07 lib/promscrape: properly log the actual response size after c4229a1bba 2023-01-23 21:04:50 -08:00
Aliaksandr Valialkin
ba5a6c851c lib/storage: use deterministic random generator in tests
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3683
2023-01-23 20:10:32 -08:00
Aliaksandr Valialkin
1a3a6ef907 lib/mergeset: use deterministic random generator in tests
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3683
2023-01-23 19:43:49 -08:00
Aliaksandr Valialkin
7030429958 lib/mergeset: fix data race in BenchmarkInmemoryBlockMarshal 2023-01-23 19:43:18 -08:00
Aliaksandr Valialkin
30e968df6d app/vmselect: use consistent randomizer in tests
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3683
2023-01-23 19:27:25 -08:00
Aliaksandr Valialkin
f2b40dbe9a app/vmalert: use consistent randomizer in tests
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3683
2023-01-23 19:25:10 -08:00
Aliaksandr Valialkin
a11dc6689a lib/decimal: use consistent randomizer in tests
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3683
2023-01-23 19:23:39 -08:00
Aliaksandr Valialkin
0a4d8dc777 lib/uint64set: use repeatable randomizer in tests
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3683
2023-01-23 19:22:58 -08:00
Aliaksandr Valialkin
3d1cb011b6 lib/encoding: make deterministic tests which rely on math/rand
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3683
2023-01-23 18:41:09 -08:00
Aliaksandr Valialkin
a7f8ce5e3d vendor: make vendor-update 2023-01-23 08:05:54 -08:00
Artem Navoiev
7c1daade15 tests: use DebugFlush instead of vmstorage stop. This simplifies the logic and allows to remove test-only methodds (#3694)
Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-23 14:45:59 +01:00
Denys Holius
f3cb412508 Fix/remove vmanomaly from release guide (#3699)
* docs/Release-Guide.md: remove vmanomaly from release guide because it has own release cycle

* fixed a typo
2023-01-23 14:32:26 +01:00
Aliaksandr Valialkin
edc51b3119 docs/Articles.md: added missing third-party articles about VictoriaMetrics 2023-01-22 14:23:45 -08:00
Aliaksandr Valialkin
cb5d073bc9 docs/Single-server-VictoriaMetrics.md: make it clear that VictoriaMetrics supports both pull and push protocols at how to import time series data chapter 2023-01-22 14:04:29 -08:00
Aliaksandr Valialkin
864c651c03 docs/Articles.md: add https://dev.to/aws-builders/ultra-monitoring-with-victoria-metrics-1p2 2023-01-22 13:51:53 -08:00
Aliaksandr Valialkin
4e25bc2087 lib/vmselectapi: propagate timeout errors from vmselect to vmstorage instead of closing the connection established from vmselect to vmstorage
This is a follow-up for 20e9598254
2023-01-20 19:33:42 -08:00
Aliaksandr Valialkin
74df30456b app/vmselect: make vmui-update after df7b81b44d 2023-01-20 12:07:13 -08:00
Yury Molodov
df7b81b44d vmui: add support for time zone selection for older versions of browsers (#3680)
* fix: add check for support of getting time zones

* vmui: add support for time zone selection for older versions of browsers
2023-01-20 11:47:53 -08:00
Denys Holius
d513230d43 Adds some improvements to release guide docs (#3679)
* docs/Release-Guide.md: fixed a typo

* Release-Guide.md: adds missed steps for updating vmanomaly and vmgateway helm charts
2023-01-19 16:03:42 +01:00
Max Golionko
e8554cd1cb ci: checkout correct branch for build step (#3676) 2023-01-19 08:34:20 +01:00
Aliaksandr Valialkin
59b67f1cfa snap: update Go builder from v1.19.4 to v1.19.5 2023-01-18 14:05:18 -08:00
Aliaksandr Valialkin
adeec6e369 deployment/docker: update VictoriaMetrics components in docker-compose from v1.86.0 to v1.86.2 2023-01-18 12:57:39 -08:00
Aliaksandr Valialkin
9785b3f57f docs/CHANGELOG.md: cut v1.86.2 2023-01-18 12:01:07 -08:00
Aliaksandr Valialkin
9c62391a5c .github/workflows: remove obsolete make targets: install-goling and install-errcheck
These targets became obsolete after ec2c82e800
2023-01-18 11:48:29 -08:00
Max Golionko
59b97f26c0 CI: split js and go codeql, split test and build, enable matrix for test (#3670)
* split js and go codeql, split test and build, enable matrix for test

* checkout before go setup

* enable build for PRs as well

* update filter
2023-01-18 11:42:27 -08:00
Aliaksandr Valialkin
19e28ce7b6 docs/CHANGELOG.md: document 777038fe44
See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3672
2023-01-18 11:39:48 -08:00
Tobias Jungel
777038fe44 app/vmbackup: prevent password leaks (#3672)
This prevents vmbackup from leaking passwords into logs like shown below.

2023-01-11T15:00:01.050Z        info    VictoriaMetrics/lib/logger/flag.go:12   build version: vmbackup-20221214-211706-tags-v1.85.1-0-g09a70d3e9
2023-01-11T15:00:01.050Z        info    VictoriaMetrics/lib/logger/flag.go:13   command-line flags
2023-01-11T15:00:01.050Z        info    VictoriaMetrics/lib/logger/flag.go:20     -dst="fs:///vm-backups/latest"
2023-01-11T15:00:01.050Z        info    VictoriaMetrics/lib/logger/flag.go:20     -snapshot.createURL="http://user:super_sercret123@victoriametricspshot/create"
2023-01-11T15:00:01.050Z        info    VictoriaMetrics/lib/logger/flag.go:20     -storageDataPath="/storage"
2023-01-11T15:00:01.050Z        info    VictoriaMetrics/app/vmbackup/main.go:53 Snapshot create url http://user:super_sercret123@victoriametrics:8428/snapshot/create
2023-01-11T15:00:01.050Z        info    VictoriaMetrics/app/vmbackup/main.go:60 Snapshot delete url http://user:super_sercret123@victoriametrics:8428/snapshot/delete
2023-01-18 11:35:21 -08:00
Aliaksandr Valialkin
e867df5ef5 app/vmui: increase perceived performance by 2.5x by reducing the delay before the query execution from 0.8s to 0.3s
The delay cannot be removed, since it is used for limiting the rate of queries sent to VictoriaMetrics during graph scrolling.
2023-01-18 01:33:48 -08:00
Aliaksandr Valialkin
2ac530eb28 lib/{storage,mergeset}: wake up background merges as soon as there is a potential work for them
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3647
2023-01-18 01:10:18 -08:00
Aliaksandr Valialkin
b8409d6600 lib/{storage,mergeset}: do not run assisted merges when flushing pending samples to parts
Assisted merges are intended to be performed by goroutines, which accept the incoming samples,
in order to limit the data ingestion rate.

The worker, which converts pending samples to parts, shouldn't be penalized by assisted merges,
since this may result in increased number of pending rows as seen at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3647#issuecomment-1385039142
when the assisted merge takes too much time.
2023-01-18 00:20:58 -08:00
Aliaksandr Valialkin
1ac025bbc9 lib/storage: use better naming for a function returning new []rawRows - newRawRowsBlock() -> newRawRows() 2023-01-18 00:01:03 -08:00
Aliaksandr Valialkin
0c625185cb app/vmselect/promql: updates tests for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664 2023-01-17 23:25:45 -08:00
Aliaksandr Valialkin
68463c9e87 lib/promscrape: follow-up for d79f1b106c
- Document the fix at docs/CHANGELOG.md
- Limit the concurrency for sendStaleMarkers() function in order to limit its memory usage
  when big number of targets disappear and staleness markers are sent
  for all the metrics exposed by these targets.
- Make sure that the writeRequestCtx is returned to the pool
  when there is no need to send staleness markers.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668
2023-01-17 23:11:56 -08:00
lzfhust
d79f1b106c using writeRequestCtxPool when delete kubernetes clusters from kubernetes_sd_configs (#3669) 2023-01-17 22:57:56 -08:00
Zakhar Bessarab
322d96bfe5 discovery/{consul,nomad}: fix cancelling serviceWatcher in-flight requests (#3658)
* lib/promscrape/discovery/{consul,nomad}: fix background service update watches not canceling requests on serviceWatcher stop

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

* lib/promscrape/discovery/{consul,nomad}: fix closing serviseWatcher during scrape job restart

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

* wip

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3468

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-17 21:47:11 -08:00
Scott Kevill
46b3b76d6d lib/fs: use unix.Statfs() / unix.Statvfs() when using a path (#3663) 2023-01-17 21:19:26 -08:00
Roman Khavronenko
5cb8ce8174 ci: disable JS codeQL check (#3659)
We have limited amount of time used by Github CI runners
and JS analysis accounts for a half of it.
Since JS represents only a small fraction of the codebase
and is solely maintained by one person - I suggest to disable
the CodeQL check in order to save CI runners time.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-17 21:05:27 -08:00
Yury Molodov
fcef2ff6b2 vmui: correctly display range results in Table view (#3657)
* fix: properly display range results

* fix: set range values to empty array

* wip

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-17 21:03:28 -08:00
Aliaksandr Valialkin
1ffa793322 docs/CHANGELOG.md: fix the link to feature request implemented at e58921aa8f
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3571
2023-01-17 20:27:43 -08:00
Yury Molodov
e58921aa8f vmui: give more visually different colors to graph lines (#3656)
* feat: make more different colors of graph lines

* docs/CHANGELOG.md: give more visually different colors to graph lines
2023-01-17 20:25:37 -08:00
Aliaksandr Valialkin
c94020f7dc app/vmui: do not round the number in formatPrettyNumber() if the range isn't set 2023-01-17 19:50:44 -08:00
Aliaksandr Valialkin
006af394ff vendor: update github.com/VictoriaMetrics/metricsql from v0.51.1 to v0.51.2
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664
2023-01-17 11:26:41 -08:00
Aliaksandr Valialkin
289af65071 lib/promscrape: properly apply series limit
Fix the following issues:

- Series limit wasn't applied when staleness tracking was disabled.
- Series limit didn't prevent from sending staleness markers for new series exceeding the limit.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3660

Thanks to @hagen1778 for the initial attempt to fix the issue
at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3665
2023-01-17 10:14:49 -08:00
Aliaksandr Valialkin
e06168f489 docs/CHANGELOG.md: document the 74f196c37813a18c2c8f28831696ed7d9f535604 2023-01-17 09:10:41 -08:00
Aliaksandr Valialkin
09d7fa2737 lib/{mergeset,storage}: do not slow down concurrently executed queries during assisted merges
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3647
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3641
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/648
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/291
2023-01-16 14:31:52 -08:00
Aliaksandr Valialkin
094ae82df5 vendor: make vendor-update 2023-01-15 14:16:34 -08:00
Aliaksandr Valialkin
092e2c8f2d github.com/VictoriaMetrics/metrics: update from v1.23.0 to v1.23.1
See https://github.com/VictoriaMetrics/metrics/issues/42
2023-01-15 14:06:43 -08:00
Yury Molodov
04c408e986 vmui: make the step input field global across all the tabs and views (#3644)
* feat: make the step input field global

* fix: correct get step from url

* fix: set minimumSignificantDigits to 1

* app/vmselect/vmui: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-15 13:47:08 -08:00
Aliaksandr Valialkin
cdb6d651e9 app/vminsert: return 200 OK status code when importing data in pushgateway format
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1415
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
2023-01-15 13:29:53 -08:00
Aliaksandr Valialkin
207a62a3c2 app/vmselect/promql: reduce memory allocations when searching for time series pairs with identical labelsets in q1 op q2 queries
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3641
2023-01-15 13:03:23 -08:00
Aliaksandr Valialkin
27afe7bc38 app/vmselect/promql: reduce the number of memory allocations inside getCommonLabelFilters()
This should improve performance a bit for `q1 op q2` queries
2023-01-15 13:03:23 -08:00
Denys Holius
ffe6e6fe59 guides/guide-delete-or-replace-metrics.md: fixed wrong curl command (#3652) 2023-01-15 13:02:52 -08:00
Yury Molodov
7fd82c0d3a feat: make nav menu as links (#3646) 2023-01-13 09:45:07 +01:00
Corporte Gadfly
f32a79189b docs: fix typo (#3648) 2023-01-13 09:39:11 +01:00
Aliaksandr Valialkin
be8fba9b6a app/vmselect/netstorage: tune the number of blocks per series which should be unpacked by a single goroutine instead of spinning up multiple goroutines
This reduces overhead on time series data unpacking for typical cases,
this reducing CPU usage at vmselect
2023-01-12 09:31:44 -08:00
Aliaksandr Valialkin
98de06ff38 docs/CHANGELOG.md: update the description of the change at 20f28eb9d6 2023-01-12 08:58:52 -08:00
Nikolay
20f28eb9d6 /lib/promscrape: use correct err logger for scrape unmarshalling (#3645)
/lib/promscrape: use correct err logger for scrape unmarshalling
It correctly suppresses scrape errors and adds correct context for err msg
2023-01-12 17:40:42 +01:00
Roman Khavronenko
ec7c3f45ba dashboards: bump operator dash to v9 of Grafana (#3642)
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-12 16:31:26 +01:00
Aliaksandr Valialkin
7067e8206c app/vmselect/promql: reduce memory allocations at getCommonLabelFilters() function
Intern tag keys and values there
2023-01-12 01:27:41 -08:00
Aliaksandr Valialkin
e2498af530 lib/promscrape: log the number of unsuccessful scrapes during the last -promscrape.suppressScrapeErrorsDelay
This commit is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3413
Thanks to @jelmd for the pull request.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2575
2023-01-12 01:09:32 -08:00
Aliaksandr Valialkin
9f5b5708ff app/vmselect: handle the /custom-dashboards request from /graph/ page in the same way as from the /vmui/ page
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3322
2023-01-11 23:41:04 -08:00
Aliaksandr Valialkin
9fdd1a10c6 docs/Cluster-VictoriaMetrics.md: mention the -vmui.customDashboardsPath command-line flag at vmselect
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3322
2023-01-11 23:39:55 -08:00
Aliaksandr Valialkin
a194982117 app/vmselect: follow-up after 820312a2b1
- Move the feature description at the correct place at docs/CHANGELOG.md
- Run `make vmui-update`
- Various cosmetic fixes

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3322
2023-01-11 23:28:00 -08:00
Dmytro Kozlov
820312a2b1 app/vmui: define custom path for dashboards json file (#3545)
* app/vmui: define custom path for dashboards json file

* app/vmui: remove unneeded code

* app/vmui: move handler to own file, fix show dashboards,

* app/vmui: move flag to handler, add flag description

* app/vmauth: fix part of the comments

* feat: add store for dashboards

* fix: prevent fetch dashboards for app mode

* app/vmauth: use simple cache for predefined dashboards

* app/vmauth: update dashboards doc

* app/vmauth: fix ci

* app/vmui: decrease timeout

* app/vmselect: removed cache, fix comments

* app/vmselect: remove unused const

* app/vmselect: fix error log, use slice byte instead of struct

Co-authored-by: Yury Moladau <yurymolodov@gmail.com>
2023-01-11 23:06:07 -08:00
Aliaksandr Valialkin
ec23ab6bc2 lib/promscrape/discovery: missing changes after b4ad3a3b4c 2023-01-11 23:02:45 -08:00
Dmytro Kozlov
20af29294e app/vmctl: add remote read protocol integration tests (#3626) 2023-01-11 23:00:10 -08:00
Aliaksandr Valialkin
b4ad3a3b4c lib/promscrape: follow-up for 8537533beb
- Add a comment describing the purpose of the `role` field inside `apiConfig` struct
- Revert changes at lib/promscrape/discovery/dockerswarm/dockerswarm.go ,
  since they reduce code readability. E.g. the reader needs to look up the named string constants
  in order to get their values.
2023-01-11 22:54:18 -08:00
Zakhar Bessarab
8537533beb lib/promscrape/discovery/dockerswarm: fix discovery filters being applied to all objects (#3632)
* lib/promscrape/discovery/dockerswarm: fix discovery filters being applied to all objects

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

* Update docs/CHANGELOG.md

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-11 22:50:34 -08:00
Yury Molodov
5c8bb029b5 vmui: small changes on explore metrics page (#3634)
* fix: change issue link

* fix: remove legend toggle

* fix: move select graph size

* feat: save url params on explore metrics page

* wip

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-11 22:16:10 -08:00
Artem Navoiev
13a1a7f826 fix operator docs hugo schema
Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-11 17:28:34 +01:00
Artem Navoiev
83c87f822a docs: prepare operator docs to migration
Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-11 18:19:40 +02:00
Artem Navoiev
54aec2b1ba docs: operator *.MD -> *.md
Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-11 16:29:29 +01:00
Roman Khavronenko
b3a70b8284 dasbhoards: fix the tooltip info for 1.86 (#3628)
See c63755c316 (diff-bba263a473e7fbc9d0fde075ebef6b3d4e32c322ee1210a3e07182292c7723aaR18)

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-11 11:30:12 +01:00
Aliaksandr Valialkin
351fc152e0 docs/CHANGELOG.md: cut v1.86.1 2023-01-11 01:26:43 -08:00
Aliaksandr Valialkin
975bb8722f lib/vmselectapi: properly calculate query timeout
vmselect passes query timeout to vmstorage in seconds.
The commit 20e9598254 treated it as timeout in nanoseconds.
Fix this in order to prevent from the following errors under vmstorage load:

cannot process vmselect request: cannot execute "search_v7": couldn't start executing the request in 0.000 seconds,
since -search.maxConcurrentRequests=... concurrent requests are already executed.
2023-01-11 01:22:33 -08:00
Roman Khavronenko
f73953a049 app/vmselect: fix typo (#3629)
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-11 01:09:47 -08:00
Aliaksandr Valialkin
dff47c73b7 app/vmselect: improve logging when the incoming query cannot be executed because of timeout in the wait queue 2023-01-11 01:06:05 -08:00
Aliaksandr Valialkin
65b9dcfcca app/vmselect/promql: typo fix after 0771d57860 2023-01-11 01:05:31 -08:00
Aliaksandr Valialkin
0771d57860 app/vmselect/promql: make a copy of per-series timestamps before their modification
The per-series timestamps are usually shared among series, so it is unsafe modifying them.

The issue has been appeared after the optimization at 2f3ddd4884
2023-01-11 00:59:13 -08:00
Aliaksandr Valialkin
31fc29599f app/vmselect/promql: move the eval function args in parallel query trace outside the loop 2023-01-10 22:23:30 -08:00
Aliaksandr Valialkin
a0f9cb27f9 docs/CHANGELOG.md: document v1.79.7 LTS release 2023-01-10 21:24:28 -08:00
Aliaksandr Valialkin
4faf7ea41e vendor: make vendor-update 2023-01-10 18:58:34 -08:00
Aliaksandr Valialkin
c449714c0a deployment/docker: update Go builder from v1.19.4 to v1.19.5
See https://github.com/golang/go/issues?q=milestone%3AGo1.19.5+label%3ACherryPickApproved
2023-01-10 18:43:04 -08:00
Aliaksandr Valialkin
37fe04999c docs/CHANGELOG.md: add release date for v1.86.0 2023-01-10 17:45:57 -08:00
Aliaksandr Valialkin
eda940106a deployment/docker: update Docker tag for VictoriaMetrics components from v1.85.3 to v1.86.0 2023-01-10 17:26:37 -08:00
Aliaksandr Valialkin
28df7c2a96 docs/CHANGELOG.md: cut v1.86.0 2023-01-10 16:22:26 -08:00
Aliaksandr Valialkin
2d294cca59 deployment/docker: update Alpine base image from v3.17.0 to v3.17.1
See https://alpinelinux.org/posts/Alpine-3.17.1-released.html
2023-01-10 16:14:40 -08:00
Aliaksandr Valialkin
95ce1ba6ce lib/httpserver: directly pass flag value to CheckAuthFlag()
There is no sense in passing a pointer to flag value there.

This is a follow-up for 4225a0bd75
2023-01-10 15:52:23 -08:00
Zakhar Bessarab
4225a0bd75 Use httpAuth.* flags as a fallback for endpoints protected by *AuthKey flags (#3582)
* {lib/server, app/}: use `httpAuth.*` flag as fallback for `*AuthKey` if it is not set

* lib/ingestserver/opentsdbhttp: fix opentdb HTTP handler not respecting `httpAuth.*` flags

* Apply suggestions from code review

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-10 15:46:13 -08:00
Dmytro Kozlov
0811000bb0 app/vmctl: Add insecure skip verify flag for remote read protocol (#3611)
* app/vmctl: Add insecure skip verify flag for remote read protocol
2023-01-10 23:18:49 +01:00
Artem Navoiev
37c52ccaf4 vmagent: add minimal scrape file exampe for vmagent quick start (#3627)
* vmagent: add minimal scrape file exampe for vmagent quick start

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

* replace example with link to your prometheus.yml in docker

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-10 23:16:10 +01:00
Aliaksandr Valialkin
cbe62f23ba lib/promscrape/discovery/gce: follow-up for b2ccdaaa2f
- Use promutils.Labels.GetLabels() instead of comparing promutils.Labels.Labels to nil.
  This make the code more consistent with other places.

- Mention the release where the issue has been introduced at docs/CHANGELOG.md.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3624
2023-01-10 13:51:03 -08:00
Aliaksandr Valialkin
53d871d0b1 app/vmselect/netstorage: reduce tail latency during query processing
Previously the selected time series were split evenly among available CPU cores
for further processing - e.g unpacking the data and applying the given rollup
function to the unpacked data.
Some time series could be processed slower than others.
This could result in uneven work distribution among available CPU cores,
e.g. some CPU cores could complete their work sooner than others.
This could slow down query execution.

The new algorithm allows stealing time series to process from other CPU cores
when all the local work is done. This should reduce the maximum time
needed for query execution (aka tail latency).

The new algorithm should also scale better on systems with many CPU cores,
since every CPU processes locally assigned time series without inter-CPU communications.

The inter-CPU communications are used only when all the local work is finished
and the pending work from other CPUs needs to be stealed.
2023-01-10 13:43:14 -08:00
Zakhar Bessarab
b2ccdaaa2f lib/promscrape/discovery/gce: fix crash in case instance does not have any labels set (#3625)
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-01-10 11:07:11 +01:00
Denys Holius
b06e795a1e docs/Release-Guide.md: added missed link to rpm repository (#3623) 2023-01-10 10:14:56 +01:00
Aliaksandr Valialkin
e640ff72f1 app/vmselect/netstorage: reduce memory allocations when unpacking time series
Unpack time series with less than 400K samples in the currently running goroutine.
Previously a new goroutine was being started for unpacking the samples.
This was requiring additional memory allocations.
2023-01-09 23:18:17 -08:00
Aliaksandr Valialkin
9a563a6aef app/vmselect/promql: eliminate memory allocation when sorting values inside float64s 2023-01-09 23:06:46 -08:00
Aliaksandr Valialkin
30ed33fae0 app/vmselect/promql: pre-allocate memory for values to be merged in mergeTimeseries()
This should reduce the number of memory re-allocations
2023-01-09 22:51:17 -08:00
Aliaksandr Valialkin
645c24dc5f app/vmselect/promql: consistently intern series names obtained from marshalMetricNameSorted
This reduces memory allocations when the returned series names are used as map keys later
2023-01-09 22:45:40 -08:00
Aliaksandr Valialkin
2f3ddd4884 app/vmselect/promql: avoid memory allocations and copying from source timeseries to the returned result at timeseriesToResult() 2023-01-09 22:38:59 -08:00
Aliaksandr Valialkin
26cf680468 app/vmselect/promql: remove memory allocations from sortMetricTags() 2023-01-09 22:22:15 -08:00
Aliaksandr Valialkin
4f0c11ee93 app/vmselect/promql: intern output series names inside timeseriesToResult()
This reduces the number of memory allocations for repeated queries,
which return (almost) the same set of time series.
2023-01-09 22:19:56 -08:00
Aliaksandr Valialkin
562d6bca08 app/vmselect/promql: intern output series names during normal aggregation 2023-01-09 22:15:24 -08:00
Aliaksandr Valialkin
21ee9a1fab app/vmselect/promql: intern output series names during incremental aggregation
This should reduce the number of memory allocations for repeated queries
2023-01-09 22:11:36 -08:00
Aliaksandr Valialkin
df2a494a7c app/vmselect/netstorage: pre-allocate 4 block references per each time series during querying
Usually the number of blocks returned per each time series during queries is around 4.
So it is a good idea to pre-allocate 4 block references per time series
in order to reduce the number of memory allocations.
2023-01-09 22:03:23 -08:00
Aliaksandr Valialkin
c5e0f527bc app/vmselect/netstorage: cache canonical MetricName for time series returned from the storage
This reduces memory allocations for repeated queries, which return (almost) the same set of time series.
2023-01-09 21:53:10 -08:00
Aliaksandr Valialkin
7afcca0c51 all: use metricsql.CompileRegexp instead of regexp.Compile for compiling regexps used in graphite queries
This should speed up repeated queries, since metricsql.CompileRegexp returns regexps from the cache
on subsequent calls for the same input regexp.
2023-01-09 21:43:08 -08:00
Aliaksandr Valialkin
67ab49baa9 vendor: make vendor-update 2023-01-09 21:34:34 -08:00
Aliaksandr Valialkin
e5eca54951 lib/promscrape/discovery/nomad: sync nomad_sd_configs fields with the Prometheus implementation
See the list of configs supported by Prometheus at f88a0a7d83/discovery/nomad/nomad.go (L76-L84)

- Removed "token" option. In can be set either via NOMAD_TOKEN env var or via `bearer_token` config option.
- Removed "scheme" option. It is automatically detected depending on whether the `tls_config` is set.
- Removed "services" and "tags" options, since they aren't supported by Prometheus.
- Added "region" option. If it is missing, then the region is read from NOMAD_REGION env var.
  If this var is empty, then it is set to "global" in the same way as Nomad client does.
  See 865ee8d37c/api/api.go (L297)
  and 865ee8d37c/api/api.go (L555-L556)
- If the "server" option is missing, then it is read from NOMAD_ADDR in the same way
  as Nomad client does - see 865ee8d37c/api/api.go (L294-L296)

This is a follow-up for 8aee209c53

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3367
2023-01-09 21:14:48 -08:00
Aliaksandr Valialkin
c38a10e143 app/vmselect/netstorage: eliminate memory allocation for sortBlocksHeap arg when calling mergeSortBlocks() 2023-01-09 21:08:51 -08:00
Aliaksandr Valialkin
1f9d605988 app/vmselect/netstorage: consistently select the sample with the biggest value out of samples with identical timestamps
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333

This fix is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3620 ,
but doesn't slow down the common case with merging replicated data blocks so significantly.

Benchmark results:

Before the change:

BenchmarkMergeSortBlocks/replicationFactor-1-4         	   13968	     85643 ns/op	 956.53 MB/s	    1700 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-2-4         	   10806	    109171 ns/op	1500.77 MB/s	    2191 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-3-4         	    8887	    130623 ns/op	1881.45 MB/s	    2660 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-4-4         	    7440	    157348 ns/op	2082.52 MB/s	    3174 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-5-4         	    6534	    184473 ns/op	2220.38 MB/s	    3612 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/overlapped-blocks-bestcase-4  	   13419	     85205 ns/op	 961.44 MB/s	    2213 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/overlapped-blocks-worstcase-4 	     579	   1894900 ns/op	  43.23 MB/s	   46760 B/op	       1 allocs/op

After the change:

BenchmarkMergeSortBlocks/replicationFactor-1-4         	   13832	     85298 ns/op	 960.40 MB/s	    1716 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-2-4         	    8833	    134222 ns/op	1220.66 MB/s	    2675 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-3-4         	    6487	    184830 ns/op	1329.65 MB/s	    3636 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-4-4         	    4977	    236318 ns/op	1386.61 MB/s	    4733 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/replicationFactor-5-4         	    4088	    296734 ns/op	1380.36 MB/s	    5761 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/overlapped-blocks-bestcase-4  	   14083	     84067 ns/op	 974.47 MB/s	    2110 B/op	       1 allocs/op
BenchmarkMergeSortBlocks/overlapped-blocks-worstcase-4 	     536	   2043534 ns/op	  40.09 MB/s	   50511 B/op	       1 allocs/op
2023-01-09 13:01:48 -08:00
Denys Holius
fe0e199859 deployment/docker: update Alertmanager tag from v0.24.0 to v0.25.0 in docker-compose files (#3619)
deployment/docker: bump alertmanager to latest v0.25.0
2023-01-09 12:37:14 +01:00
Roman Khavronenko
8aee209c53 lib/promscrape: remove datacenter field from nomad_sd_config (#3612)
Looks like `datacenter` field isn't part of `/v1/services` API.
See https://developer.hashicorp.com/nomad/api-docs/services#list-services
and https://developer.hashicorp.com/nomad/api-docs/services#read-service

Related issues:
https://github.com/traefik/traefik/issues/9109
https://github.com/prometheus/prometheus/issues/11776

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-09 09:07:40 +01:00
Aliaksandr Valialkin
28f8dc41b0 lib/promscrape/discoveryutils: cleanup after 5df9fddaf2
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3468
2023-01-07 01:26:54 -08:00
Zakhar Bessarab
5df9fddaf2 lib/promscrape/discoveryutils: use correct timeout for blocking requests (#3609)
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-01-07 01:13:03 -08:00
Aliaksandr Valialkin
41e00a0df7 lib/storage: simplify the fix from 488940502c
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3566
2023-01-07 01:04:43 -08:00
Dmytro Kozlov
488940502c lib/storage: fix returning camelcase label names (#3608)
* lib/storage: fix returning camelcase label names

* doc: add change log

* Update docs/CHANGELOG.md

* Update docs/CHANGELOG.md

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-07 00:50:14 -08:00
Aliaksandr Valialkin
5fe7ff24c2 lib/streamaggr: limit the the number of concurrent flushes of the aggregate data to the exact number of available CPUs
This should reduce the maximum memory usage during concurrent flushes of the aggregate data
2023-01-07 00:18:51 -08:00
Aliaksandr Valialkin
ad5bfe3089 lib/promscrape: reduce the number of concurrently executed processScrapedData calls from 2x of the number of CPUs to the number of CPUs
This should reduce the maximum memory usage for processScrapedData() function by 2x.
The only part, which can be IO-bound in the processScrapedData() is pushData() call,
when it buffers data to persistent queue if the remote storage cannot keep up
with the data ingestion speed. In this case it is OK if the scrape pace will be limited.
2023-01-07 00:14:30 -08:00
Aliaksandr Valialkin
af263fe881 all: small improvements in error messages and command-line flag descriptions related to concurrency limiters 2023-01-07 00:11:44 -08:00
Aliaksandr Valialkin
45f39e291e lib/writeconcurrencylimiter: moved the error generation from incConcurrency() to the caller place 2023-01-06 23:45:58 -08:00
Aliaksandr Valialkin
986a05e18d lib/promscrape: limit the concurrency during parsing and relabeling the scraped samples
This should reduce memory usage when scraping big number of targets,
since this limits the summary memory usage during concurrent parsing and relabeling
by the number of available CPU cores.
2023-01-06 22:59:17 -08:00
Aliaksandr Valialkin
293e4dc77b app/{vminsert,vmstorage}: add comments on why storage.AddRows() is called without limiting the number of concurrent calls 2023-01-06 22:40:07 -08:00
Aliaksandr Valialkin
5c4bd4f7c1 lib/streamaggr: limit the number of concurrent flushes of aggregate metrics in order to limit memory usage 2023-01-06 22:39:13 -08:00
Aliaksandr Valialkin
c63755c316 lib/writeconcurrencylimiter: improve the logic behind -maxConcurrentInserts limit
Previously the -maxConcurrentInserts was limiting the number of established client connections,
which write data to VictoriaMetrics. Some of these connections could be idle.
Such connections do not consume big amounts of CPU and RAM, so there is a little sense in limiting
the number of such connections. So now the -maxConcurrentInserts command-line option
limits the number of concurrently executed insert requests, not including idle connections.

It is recommended removing -maxConcurrentInserts command-line option, since the default value
for this option should work good for most cases.
2023-01-06 22:20:19 -08:00
Aliaksandr Valialkin
f299d2ca1a lib/vmselectapi: limit the number of concurrently executed requests
This should prevent from out of memory errors when big number of vmselect
nodes send many concurrent requests to vmstorage

The limit can be controlled at vmstorage via the following command-line flags:
- search.maxConcurrentRequests
- search.maxQueueDuration

See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#resource-usage-limits
2023-01-06 22:11:34 -08:00
Aliaksandr Valialkin
e7637885a6 app/vmselect: improve error message when the request cannot be started because too many concurrent requests are already executed 2023-01-06 22:10:42 -08:00
Aliaksandr Valialkin
463b957e54 lib/promscrape/discovery/{consul,nomad}: wait until the deleted serviceWatchers are stopped inside updateServices() call
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3468
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3367
2023-01-05 21:52:33 -08:00
Aliaksandr Valialkin
f392913d00 lib/promscrape: follow-up after bced9fb978
- Document the bugfix at docs/CHANGELOG.md
- Wait until all the worker goroutines are done in consulWatcher.mustStop()
- Do not log `context canceled` errors when discovering consul serviceNames
- Removed explicit handling of gzipped responses at lib/promscrape/discoveryutils.Client,
  since this handling is automatically performed by net/http.Transport.
  See DisableCompression option at https://pkg.go.dev/net/http#Transport .
- Remove explicit handling of the proxyURL, since it is automatically handled
  by net/http.Transport. See Proxy option at https://pkg.go.dev/net/http#Transport .
- Expliticly set MaxIdleConnsPerHost, since its default value equals to 2.
  Such a small value may result in excess tcp connection churn
  when more than 2 concurrent requests are processed by lib/promscrape/discoveryutils.Client.
- Do not set explicitly the `Host` request header, since it is automatically set by net/http.Client.
- Backport the bugfix to the recently added nomad_sd_configs - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3367

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3468
2023-01-05 21:13:06 -08:00
Zakhar Bessarab
bced9fb978 lib/promscrape/discoveryutils: switch to native http client from fasthttp (#3568) 2023-01-05 19:34:47 -08:00
Roman Khavronenko
5bdd880142 vmstorage: add more context to the flock acquiring msg (#3584)
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3578

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-05 18:30:42 -08:00
Aliaksandr Valialkin
9f348cf8a1 lib/promscrape/discovery/nomad: follow-up after 48f371a46c
- Remove undocumented `username` and `password` config options from `nomad_sd_config`.
  TODO: probably, remove these options from `consul_sd_config` too?
  These options exist there for backwards compatibility purposes.

- Add __meta_nomad_service_alloc_id and __meta_nomad_service_job_id meta-labels
  These labels contain AllocID and JobID fields for the discovered Nomad services.

- Various typo fixes.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3367
2023-01-05 18:07:20 -08:00
Aliaksandr Valialkin
cad8553c01 Makefile: remove trailing space after golangci-lint run command
It is left after ec2c82e800
2023-01-05 16:59:07 -08:00
Aliaksandr Valialkin
1a28f0e5b3 lib/promrelabel: pass query args via query string at /metric-relabel-debug and /target-relabel-debug pages if their length doesnt exceed 1000
This allows copy-n-pasting the url to another browser window and seeing the same result.

The limit in 1000 chars is selected in order to prevent from potential issues with systems
which limit the url length such as Internet Explorer - see https://stackoverflow.com/questions/812925/what-is-the-maximum-possible-length-of-a-query-string

If the limit is exceeded, then query args are sent via POST method and aren't visible in the url.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3580
2023-01-05 16:48:04 -08:00
Karan Sharma
48f371a46c lib/promscrape: add Prometheus-compatible service discovery for Nomad (#3549)
Add nomad_sd_config support for service discovery
2023-01-05 23:03:58 +01:00
Denys Holius
043b28c725 .github/workflows/nightly-build.yml: added dockerhub login (#3594) 2023-01-05 16:54:14 +01:00
Luke Palmer
ec2c82e800 Lint and errcheck using golangci-lint (#3558) 2023-01-05 16:12:46 +01:00
Zakhar Bessarab
01bc0c94ab doc: add vmbackupmanager monitoring section (#3605)
* doc: add vmbackupmanager monitoring section

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-01-05 16:03:06 +01:00
Thomas Danielsson
9d1104d812 dashboards: fix operator datasource variable (#3604)
Got "Failed to upgrade legacy queries Datasource $ds was not found" in
Grafana on operator dashboard.
It's datasource variable was incorrectly named `datasource`.

Also made the rest of the dashboards have homogeneous datasource-variable
names and selections, matching vmagent dashboard.
2023-01-05 14:59:56 +01:00
Artem Navoiev
8b763175ff Add Understand Your Setup Size Guide (#3572)
docs: add Understand Your Setup Size Guide

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-05 14:56:50 +01:00
Aliaksandr Valialkin
2ee81a5dbb docs/CHANGELOG.md: add missing dot 2023-01-05 03:35:02 -08:00
Zakhar Bessarab
185cdcd813 lib/promscrape/discovery/dockerswarm: fix query encoding of filters (#3586)
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-05 03:34:25 -08:00
Aliaksandr Valialkin
0dea3b71da lib/promscrape: pre-fetch metric_relabel_configs rules when debugging metric relabeling for a particular target
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3407
2023-01-05 03:26:49 -08:00
Aliaksandr Valialkin
a1076abcbf lib/promscrape: follow-up for a7e29c38bc
- Document the bugfix at docs/CHANGELOG.md
- Make the fix more durable against future changes when droppedTargetsMap.Register may be called from other places.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3580
2023-01-05 02:52:08 -08:00
Zakhar Bessarab
a7e29c38bc lib/promscrape/targetstatus: fix crash during droppedTarget registration (#3595)
* lib/promscrape/targetstatus: fix crash during droppedTarget registration in case original labels are not present

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

* lib/promscrape/targetstatus: address review comment

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-01-05 02:39:31 -08:00
Yury Molodov
2460e0f51e vmui: improve Explore metrics (#3598)
* feat: add multiple select

* feat: improve explore interface

* app/vmselect/vmui: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-01-05 02:23:04 -08:00
Aliaksandr Valialkin
0e1f0ade31 lib/streamaggr: sort by and without labels in the aggregate output metric name
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460
2023-01-05 02:08:44 -08:00
Aliaksandr Valialkin
04dff34de4 vendor: update github.com/VictoriaMetrics/metricsql from v0.50.0 to v0.51.0
Updates https://github.com/VictoriaMetrics/metricsql/pull/7
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3589
2023-01-05 01:50:10 -08:00
Aliaksandr Valialkin
66947ee5a2 lib/streamaggr: remove unused fields 2023-01-04 13:33:46 -08:00
Roman Khavronenko
9d0e1f8e68 dashboards: add backupmanager dashboard (#3599)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-04 17:26:15 +01:00
Roman Khavronenko
63bf583b3c github: rm plaintext render (#3597)
`render: plain text` makes fields not formattable and prevents
 from pasting screenshots.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2023-01-04 14:48:17 +01:00
Denys Holius
09fe346d18 docs/Release-Guide.md: adds release scenario for RPM LTS packages (#3588) 2023-01-04 14:36:14 +01:00
Zakhar Bessarab
59f20c1034 github: use github templates for filling in feature requests or bug reports (#3587)
github: use github templates for filling in feature requests or bug reports
2023-01-04 14:34:19 +01:00
Artem Navoiev
0ff85d00a4 update year in License
Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-04 09:46:38 +01:00
Aliaksandr Valialkin
fafece1af8 vendor: make vendor-update 2023-01-03 23:36:42 -08:00
Aliaksandr Valialkin
5bca3a5be2 app/vmselect: remove dependency on lib/promscrape from app/vmselect 2023-01-03 23:28:27 -08:00
Aliaksandr Valialkin
fd175ad80b docs: update -help outputs for vm* tools 2023-01-03 23:27:06 -08:00
Aliaksandr Valialkin
fa13bbc48a app/{vmagent,vminsert}: add support for streaming aggregation
See https://docs.victoriametrics.com/stream-aggregation.html

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460
2023-01-03 22:19:21 -08:00
Aliaksandr Valialkin
add2c4bf07 lib/bytesutil: add InternBytes() function as a shortcut to InternString(ToUnsafeString(..)) 2023-01-03 22:16:22 -08:00
Aliaksandr Valialkin
f33e687723 app/vmagent/remotewrite: improve descriptions for -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig
- Cross-reference these command-line flags.
- Add a link to https://docs.victoriametrics.com/vmagent.html#relabeling
2023-01-03 22:04:49 -08:00
Aliaksandr Valialkin
d3a1c36842 docs/Single-server-VictoriaMetrics.md: impromve formatting for prominent features chapter 2023-01-03 21:58:21 -08:00
Aliaksandr Valialkin
6db5c3801e app/vmbackup: remove superflouos whitespace after 8fe21ec707 2023-01-03 21:52:40 -08:00
Aliaksandr Valialkin
189f85b24c docs/vmbackupmanager.md: run make docs-sync after fa842d6534 2023-01-03 21:50:28 -08:00
Aliaksandr Valialkin
7b264b0c23 lib/promrelabel: allow calling Match on nil IfExpression
This simplifies the caller side of IfExpression
2023-01-03 21:44:03 -08:00
yanggang
8fe21ec707 Fix flag help message for the backups types. (#3577)
Signed-off-by: yanggang <gang.yang@daocloud.io>
2023-01-03 10:56:42 +01:00
yanggang
fa842d6534 fix typo for json values. (#3576)
Signed-off-by: yanggang <gang.yang@daocloud.io>
2023-01-03 10:55:38 +01:00
yanggang
93e935bcaa Fix vmctl command hint for vm-native-step-interval (#3575)
Signed-off-by: yanggang <gang.yang@daocloud.io>
2023-01-03 10:54:53 +01:00
Artem Navoiev
0a519c93ef run checks only for master/cluster branches (#3581)
Signed-off-by: Artem Navoiev <tenmozes@gmail.com>

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2023-01-03 11:08:44 +04:00
Roman Khavronenko
4b3d8eb573 vmalert: mention specifics of Alertmanager HA mode (#3573)
Stress the importance of specifying of all Alertmanager
URLs in vmalert's `-notifier.url` or `notifier.config`
if it runs in cluster mode.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3547

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-30 09:54:03 +01:00
Aliaksandr Valialkin
05fa11b296 app/vmui: reuse the series color in line tooltip 2022-12-29 15:32:41 -08:00
Aliaksandr Valialkin
1794f3d46e app/vmui: small usability improvements
- Show in the line tooltip the number of the query which generates the given line.
  This simplifies comparison of lines generated by multiple queries.

- Show metric name as __name__ label in the line tooltip in the same way as other labels are shown there.
  This makes the label information in the tooltip more consistent.

- Properly quote label values with JSON.stringify(). This prevents from improper formatting
  when label values contain doublequote chars.

- Remove double curly braces artifact at graph legend for lines without names and labels.

- Properly use modifier for regular expressions across the code.
2022-12-29 14:52:51 -08:00
Aliaksandr Valialkin
59e1e84a92 docs/CHANGELOG.md: document 1720bddb4f
Updats https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3530
2022-12-29 12:31:48 -08:00
Aliaksandr Valialkin
b8bc62431a app/vmselect/vmui: make vmui-update after 1720bddb4f 2022-12-29 12:20:06 -08:00
Yury Molodov
1720bddb4f fix: display correct graph tooltip title (#3562) 2022-12-29 12:06:48 -08:00
Roman Khavronenko
2cedb3e883 csvimport: support empty values (#3565)
Before, if the imported line contained multiple metrics and one
or more of them had an empty values - the whole line was ignored.

Now, only metrics with empty values are ignored, and the rest
of the metrics are accepted successfully.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3540

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2022-12-29 11:52:10 -08:00
Roman Khavronenko
83870aeb8d tests: attempt to fix flaky graphite test (#3567)
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-29 11:48:47 -08:00
Aliaksandr Valialkin
7af8857b68 docs/CHANGELOG.md: add a link to the docs and the pull request for update_entries_limit option in vmalert
This is a follow up for 6588fcbfca
2022-12-29 10:38:26 -08:00
Aliaksandr Valialkin
c90752a8be vendor: update github.com/valyala/fastjson/fastfloat from v1.6.3 to v1.6.4
This should properly parse floating-point numbers with missing integer or fractional parts.
For example, 123. or .123

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3544
2022-12-29 10:34:11 -08:00
ChenyuanHu
0a9e1d64fe app/vmselect/prometheus: no need manually call queryDuration.UpdateDuration (#3564)
There is no need to manually call `queryDuration.UpdateDuration(startTime)`, because `defer queryDuration.UpdateDuration(startTime)` is executed at the beginning of the function(L660).
2022-12-29 14:18:00 +01:00
Roman Khavronenko
6588fcbfca vmalert: allow configuring the default number of stored rule's update states (#3556)
Allow configuring the default number of stored rule's update states in memory
 via global `-rule.updateEntriesLimit` command-line flag or per-rule via rule's
 `update_entries_limit` configuration param.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-29 12:36:44 +01:00
Aliaksandr Valialkin
3dc684634e app/vmselect/searchutils: accept partial RFC3339 values at time, start and end query args
This simplifies manual usage of the APIs. For example, the following query
would return the results over the 2022 year.

  /api/v1/query_range?start=2022&end=2023&step=1d&query=...

This is equivalent to:

  /api/v1/query_range?start=2022-01-01T00:00:00Z&end=2023-01-01T00:00:00Z&step=1d&query=...
2022-12-28 19:41:54 -08:00
Yury Molodov
d2f89b55b7 vmui: fix step field (#3561)
* feat: use a unit next to the step value

* app/vmselect/vmui: `make vmui-update`

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2022-12-28 16:00:51 -08:00
Aliaksandr Valialkin
3d082ed6db vendor: make vendor-update 2022-12-28 15:00:02 -08:00
Aliaksandr Valialkin
c4229a1bba lib/promscrape: log the actual response size in the error message when the response size exceeds -promscrape.maxScrapeSize
This is a follow-up for 7ad9fff7e5
2022-12-28 14:42:11 -08:00
Aliaksandr Valialkin
1b16118e17 lib/{storage,mergeset}: tune the threshold for assisted merge
The https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425#issuecomment-1359117221
reveals that CPU usage for incoming queries may significantly increase when the number
of in-memory parts becomes too big.

This commit reduces the maximum number of in-memory parts before starting the assisted merge
during data ingestion. This should reduce CPU usage for incoming queries,
since they need to inspect lower number of in-memory parts.

This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
2022-12-28 14:39:24 -08:00
Clément Nussbaumer
7ad9fff7e5 fix(promscrape): check MaxScrapeSize after gzip decompression (#3550) 2022-12-28 12:19:41 -08:00
Aliaksandr Valialkin
293dda7169 lib/snapshot: improve log message on unexpected status code during attempts to create or delete snapshots
Use "unexpected status code returned from %q: %d; expecting %d" log message format
instead of less clear format "unexpected status code returned from %q; expecting %d; got %d"

This is a follow-up for c612bb165e
2022-12-28 11:41:50 -08:00
Aliaksandr Valialkin
ed9161a04f docs: remove a case study for Dreamteam.gg, since it looks like the company no longer exists 2022-12-28 11:34:40 -08:00
Zakhar Bessarab
c612bb165e lib/snapshot: fix error message format for failed HTTP request (#3559) 2022-12-28 18:04:11 +01:00
Artem Navoiev
34c705988e fix broken links
Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2022-12-27 13:17:22 +02:00
Artem Navoiev
7d9c4bebc0 update links to grafana dashboards (#3534)
docs: update links to grafana dashboards

Signed-off-by: Artem Navoiev <tenmozes@gmail.com>
2022-12-25 17:36:20 +01:00
Aliaksandr Valialkin
b11c806d1c app/vmui: show min, max and avg lines at Explore metrics graphs when instance is selected in the same way as when only the job is selected
This improves consistency of the graphs.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3386
2022-12-23 23:21:11 -08:00
Aliaksandr Valialkin
2a7e392bb3 app/vmselect/vmui: make vmui-update after 0dca224ec3 2022-12-23 22:25:04 -08:00
Aliaksandr Valialkin
0dca224ec3 app/vmui: small improvements for header panel
- Rename `Custom panel` tab to more clear `Query` tab
- Rename `Cardinality` tab to `Explore cardinality`, so it becomes consistent with `Explore metrics` tab
- Move `Dashboards` tab to the end, since it isn't used too much
2022-12-23 22:24:31 -08:00
Aliaksandr Valialkin
8ef1fe2047 app/vmui: move the Explore metrics tab closer to Custom panel tab
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3386
2022-12-23 22:16:21 -08:00
Aliaksandr Valialkin
f599e1bd34 app/vmui: show less lines at metrics explorer when the instance isn't selected
Show min, max and avg graphs across instances for the selected job.
This should improve usability of such a graphs when the job contains many instances.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3386
2022-12-23 22:15:01 -08:00
Aliaksandr Valialkin
4deea604bf app/vmui: follow-up after f6d31f5216
- Document the feature at docs/CHANGELOG.md.
- Document the metrics explorer at https://docs.victoriametrics.com/#metrics-explorer .
- Properly set `start` and `end` args for the selected time range
  when performing the request, which returns metric names.
- Improve queries, so they return lower number of lines and labels.
  This should improve metrics' exploration.
- Properly encode label filters and query args before passing them to VictoriaMetrics.
- Various cosmetic fixes.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3386
2022-12-22 17:17:01 -08:00
Yury Molodov
f6d31f5216 vmui: add explore tab for exploration of metrics, which belong to a particular job/instance (#3470)
* feat: add "Explore" page

* feat: add graphs for explore page

* vmui: add explore tab for exploration of metrics, which belong to a particular job/instance

* refactor: rename variables

* refactor: extract graph to ExploreMetricItemGraph.tsx

* feat: add searchable for Select.tsx

* feat: improve metrics explorer

* feat: set document title by page

* feat: add page to view icons

* fix: improve styles

* fix: add encodeURIComponent to query
2022-12-22 15:24:40 -08:00
Aliaksandr Valialkin
f6ac045933 docs/CHANGELOG.md: document 1df87807fd
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3513
2022-12-22 15:22:09 -08:00
Yury Molodov
1df87807fd vmui: step (#3521)
* feat: add step rounding

* fix: change step in URL parameters

* refactor: change comment for roundStep
2022-12-22 14:54:28 -08:00
Aliaksandr Valialkin
0076422350 lib/promscrape/discovery/azure: typo fix 2022-12-21 21:25:16 -08:00
Aliaksandr Valialkin
f1441a598f app/vmselect/promql: add tests for d3de110070 2022-12-21 20:25:21 -08:00
Aliaksandr Valialkin
fa236c5a84 lib/promrelabel: make fmt after d3de110070 2022-12-21 20:24:57 -08:00
Aliaksandr Valialkin
d3de110070 app/vmselect/promql: make sure that label_replace() doesn't create an empty dst_label if the src_label doesn't match regex 2022-12-21 20:20:01 -08:00
Aliaksandr Valialkin
31886aef3d lib/promrelabel: add support for keepequal and dropequal relabeling actions
These actions are supported by Prometheus starting from v2.41.0

See https://github.com/prometheus/prometheus/pull/11564 ,
https://github.com/prometheus/prometheus/issues/11556
and https://github.com/prometheus/prometheus/issues/3756

Side note:

It's a pity that Prometheus developers decided inventing `keepequal` and `dropequal`
relabeling actions instead of adding support for `keep_if_equal` and `drop_if_equal` relabeling
actions supported by VictoriaMetrics since June 2020 - see 2a39ba639d .
2022-12-21 20:04:55 -08:00
Aliaksandr Valialkin
3300546eab lib/bytesutil: make sure that the cleanup code is performed only by a single goroutine out of many concurrently running goroutines
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3466
2022-12-21 13:07:24 -08:00
Yury Molodov
0f8ffc7df9 vmui: fix change timezone (#3519)
vmui: fix time picker with changed time zone
2022-12-21 17:22:37 +01:00
Aliaksandr Valialkin
192db0f0b1 deployment/docker: update VictoriaMetrics tag from v1.85.2 to v1.85.3 in docker-compose files 2022-12-20 15:34:23 -08:00
Aliaksandr Valialkin
f443fad56d docs/CHANGELOG.md: cut v1.85.3 2022-12-20 14:51:23 -08:00
Aliaksandr Valialkin
77874d6055 app/vmselect/vmui: make vmui-update after 731d189fa9 2022-12-20 14:51:07 -08:00
Roman Khavronenko
7ca49290b6 docs: fix the image link (#3509)
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3495
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-20 14:36:25 -08:00
Roman Khavronenko
e4e9dfb785 vmagent: respect -usePromCompatibleNaming if no relabeling is set (#3511)
* vmagent: respect `-usePromCompatibleNaming` if no relabeling is set

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3493

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* vmagent: upd test

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2022-12-20 14:32:45 -08:00
Yury Molodov
731d189fa9 fix: change the logic for hide query (#3514) 2022-12-20 14:29:46 -08:00
Zakhar Bessarab
4be4645142 app/vmbackupmanager: add metrics for better observability (#488)
* app/vmbackupmanager: add metrics for better observability, include more information to `/api/v1/backups` API call response

* app/vmbackupmanager: drop old metrics before creating new ones

* app/vmbackupmanager: use `_total` postfix for counter metrics

* app/vmbackupmanager: remove `_total` postfix for gauge-like metrics

* app/vmbackupmanager: add `_last_run_failed` metrics for backups and retention

* app/vmbackupmanager: address review feedback

* app/vmbackupmanager: fix metric name

* app/vmbackupmanager: address review feedback, remove background updates of metrics, add restoring state of `_last_run_failed` metric from remote storage

* app/vmbackupmanager: improve performance for backup size calculation

* app/vmbackupmanager: refactor backup and retention runs to deduplicate each run logic

* {app/vmbackupmanager,lib/formatutil}: move HumanizeBytes into lib package

* app/vmbackupmanager: fix creating new metrics instead of reusing existing ones

* lit/formatutil: add comment to make linter happy

* app/vmbackupmanager: address review feedback
2022-12-20 14:18:06 -08:00
Aliaksandr Valialkin
4e55b67a44 lib/storage: clear the err if it is set to io.EOF when searching for the TSID by metricID
This is expected error after when recently added indexdb data isn't available for search yet
or wasn't flushed to disk after unclean shutdown of VictoriaMetrics.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3515
2022-12-20 14:05:29 -08:00
Aliaksandr Valialkin
8f5e822565 Makefile: update golangci-lint version from v1.48.0 to v1.50.1 2022-12-20 13:09:40 -08:00
Aliaksandr Valialkin
cad90c7ac1 Makefile: publish release docker images at DockerHub with the stable tag
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2911
2022-12-20 12:27:06 -08:00
Aliaksandr Valialkin
a48510573e Revert "docs/Release-Guide.md: add LATEST_TAG=stable env var for make publish-release in order to create stable tag for the published components at DockerHub"
This reverts commit 8afa7ef837.
2022-12-20 12:24:27 -08:00
Aliaksandr Valialkin
8afa7ef837 docs/Release-Guide.md: add LATEST_TAG=stable env var for make publish-release in order to create stable tag for the published components at DockerHub
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2911
2022-12-20 12:22:42 -08:00
Aliaksandr Valialkin
1d7b5cb83c docs/CHANGELOG.md: document the change at 547f07463b29c09c62c9af35eac9cee6764b3286
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2612
2022-12-20 10:22:07 -08:00
Zakhar Bessarab
18e55d14c6 app/vmbackupmanager: update doc to include cluster to cluster restore example (#3506) 2022-12-20 14:54:56 +01:00
Roman Khavronenko
8122191368 docs: fix link typo in operator docs (#3508) 2022-12-20 14:52:47 +01:00
Aliaksandr Valialkin
6bf46c7bf5 docs/CHANGELOG.md: formatting fix 2022-12-20 01:06:34 -08:00
Aliaksandr Valialkin
9fa3f1dc57 app/vmselect/promql: do not extend too short lookbehind window for rate() function if it is set explicitly
Previously too short lookbehind window d for rate(m[d]) could be automatically extended
if it didn't cover at least two raw samples. This was needed in order to guarantee
non-empty results from rate(m[d]) on short time ranges.

Now the lookbehind window isn't extended if it is set explicitly,
since it is expected that the user knows what he is doing.

The lookbehind window continues to be extended when needed if it isn't set explicitly.
For example, in the case of rate(m).

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3483
2022-12-20 00:18:20 -08:00
Aliaksandr Valialkin
eabb8762ee docs/Articles.md: add a link to https://www.youtube.com/watch?v=Mesc6JBFNhQ 2022-12-19 21:40:51 -08:00
Michal Kralik
fd53f86c84 build: fix issue with missing docker scan (#3501) 2022-12-19 15:22:45 -08:00
Aliaksandr Valialkin
680925f872 docs/CHANGELOG.md: add a warning for releases between v1.83.0 and v1.85.1 that it is recommended upgrading to v1.85.2 because of the https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3502
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3502
2022-12-19 13:36:49 -08:00
Aliaksandr Valialkin
d2ad184377 docs/CHANGELOG.md: consistently use YYYY-MM-DD format for release dates
The previously used DD-MM-YYYY format could be confused with the MM-DD-YYYY format.
The YYYY-MM-DD format reduces this confusion.
2022-12-19 13:33:05 -08:00
Aliaksandr Valialkin
944effca54 lib/storage: do not check for the result returned by db.doExtDB() where this isn't necessary
This simplifies the code a bit
2022-12-19 13:23:13 -08:00
Aliaksandr Valialkin
6530344a8f docs/CHANGELOG.md: cut v1.85.2 2022-12-19 13:09:29 -08:00
Aliaksandr Valialkin
9a0308ab32 vendor: make vendor-update 2022-12-19 13:08:13 -08:00
Aliaksandr Valialkin
0bf3ae9559 lib/promscrape/discovery/consul: expose service tags in individual labels __meta_consul_tag_<tagname>
This simplifies copying service tags to target labels with the following relabeling rule:

- action: labelmap
  regex: __meta_consul_tag_(.+)

See https://stackoverflow.com/questions/44339461/relabeling-in-prometheus
2022-12-19 13:08:11 -08:00
Aliaksandr Valialkin
6c98b56935 lib/storage: search for TSIDs for the given metricIDs in the previous indexdb if they aren't found in the current indexdb
The issue triggers after the indexdb rotation for time series, which stop receiving new samples.
This results in missing data for such time series in query responses.

This commit should address the https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3502

The issue has been introduced in 2dd93449d8
2022-12-19 12:03:09 -08:00
Aliaksandr Valialkin
dc0b08efb0 lib/storage: optimize partSearch.searchBHS() for common case when the TSID for the current block header is bigger or equal to the current tsid
This should help improving performance at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
2022-12-19 10:28:03 -08:00
Aliaksandr Valialkin
057fb2120b lib/storage: properly set buf capacity inside marshalMetricID
Previously it was always set to 0. In theory this could result into incorrect marshaling
of metricIDs.

The issue has been introduced in 5e4dfe50c6
2022-12-19 10:14:38 -08:00
Aliaksandr Valialkin
4cb83f0f4a lib/logger: follow-up for 72f8fce107
- Document the change at docs/CHANELOG.md
- Log fatal errors if the -loggerJSONFields contains unexpected values
- Rename -loggerJsonFields to -loggerJSONFields for the sake of consistency naming commonly used in Go

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2348
2022-12-16 17:42:07 -08:00
Michal Kralik
72f8fce107 lib/logger: support for renaming json fields (#3488) 2022-12-16 17:26:32 -08:00
Aliaksandr Valialkin
56a9ea3753 docs/vmalert.md: mention latency_offset query arg, which has been added in 86dae56bd0
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3481
2022-12-16 17:20:37 -08:00
Aliaksandr Valialkin
285841bcce app/vmselect/prometheus: follow-up after 86dae56bd0
Return error if the provided latency_offset query arg cannot be parsed.
This should simplify debugging in production.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3481
2022-12-16 17:13:20 -08:00
Aliaksandr Valialkin
65f8fc527f lib/promscrape: stop dropping metric name if relabeling rules do not instruct to do this on the /metric-relabel-debug page 2022-12-16 17:02:41 -08:00
Roman Khavronenko
86dae56bd0 vmselect: support overriding of -search.latencyOffset (#3489)
support overriding of `-search.latencyOffset` value via
URL param `latency_offset`.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3481

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-16 16:54:57 -08:00
Michal Kralik
07e9322157 build: nightly builds at 2:48am (#3490) 2022-12-16 16:46:24 -08:00
Roman Khavronenko
e40c7d6efa dashboards: respect $job var in sub-vars for cluster dash (#3487)
Previously, $job_select, $job_storage and $job_insert
didn't respect the $job filter. This change updates
the variable queries to account for set $job variable.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-16 09:53:32 +01:00
Aliaksandr Valialkin
17a244571b docs/keyConcepts.md: update the list of supported data ingestion protocols
- Add DataDog protocol
- Remove native protocol, since it isn't intended for general-purpose usage by external clients
2022-12-15 12:01:59 -08:00
Aliaksandr Valialkin
ad8852759d lib/storage: skip missing tsids in the current block header by using binary search
This improves performance by up to 10x when big number of the requested TSIDs
are missing in the searched parts.

This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
2022-12-14 22:06:51 -08:00
Aliaksandr Valialkin
4de9d35458 lib/flagutil/bytes.go: properly handle values bigger than 2GiB on 32-bit architectures
This fixes handling of values bigger than 2GiB for the following command-line flags:

- -storage.minFreeDiskSpaceBytes
- -remoteWrite.maxDiskUsagePerURL
2022-12-14 19:26:31 -08:00
Aliaksandr Valialkin
5d30080555 lib/flagutil: support for TB and TiB suffixes for command-line flags, which accept byte sizes 2022-12-14 17:52:32 -08:00
Aliaksandr Valialkin
38341802c2 app/vmselect: add /expand-with-exprs page 2022-12-14 16:18:55 -08:00
Aliaksandr Valialkin
1896c47fee .wwhrd.yml: add ISC license, which is used by github.com/davecgh/go-spew
This license is compatible with Apache2

See https://github.com/davecgh/go-spew/blob/master/LICENSE
2022-12-14 14:55:46 -08:00
Aliaksandr Valialkin
231569f89b docs/vmagent.md: small formatting fix 2022-12-14 14:25:35 -08:00
Aliaksandr Valialkin
0c54ff20eb docs/CHANGELOG.md: fix the link to the issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3466 2022-12-14 14:17:17 -08:00
Aliaksandr Valialkin
76445bfd98 docs/CHANGELOG.md: add release date for v1.85.1 2022-12-14 14:12:07 -08:00
Aliaksandr Valialkin
09a70d3e90 vendor: make vendor-update 2022-12-14 12:13:54 -08:00
Aliaksandr Valialkin
ac5948b3f3 app/vmselect/vmui: make vmui-update 2022-12-14 12:01:48 -08:00
Aliaksandr Valialkin
ea44c39377 docs/CHANGELOG.md: cut v1.85.1 2022-12-14 11:57:49 -08:00
Aliaksandr Valialkin
e0009ec466 docs/Cluster-VictoriaMetrics.md: mention the vm_storage_is_read_only metric, which can help debugging readonly mode at vmstorage 2022-12-14 09:31:28 -08:00
Aliaksandr Valialkin
7a61bafe59 docs/vmagent.md: clarify that relabeling is actually a debugging at relabel debug section 2022-12-13 15:46:21 -08:00
Aliaksandr Valialkin
b89d862aa5 docs/CHANGELOG.md: document the bugfix at a50120a212 2022-12-13 09:36:24 -08:00
Zakhar Bessarab
a50120a212 lib/backup/azremote: fix copying for parts larger than 256M by using async copy (#3479)
* lib/backup/azremote: fix copying for parts larger than 256M by using async copy

* lib/backup/azremote: add description of an error for log message
2022-12-13 09:32:57 -08:00
Yury Molodov
d3418bafc0 fix: prevent run query when selecting autocomplete option (#3480) 2022-12-13 09:30:05 -08:00
Aliaksandr Valialkin
0d41d933e9 lib/mergeset: reduce the parts threshold before starting assisted merges
This should improve query speed in general case.

This is a follow-up for d1af6046c7
2022-12-13 09:13:49 -08:00
Aliaksandr Valialkin
33bff00890 app/vmselect/vmui: make vmui-update 2022-12-12 17:46:30 -08:00
Yury Molodov
cca3bc756b vmui: minor enhancements (#3471)
* update package-lock.json

* fix: correct handle click by "action" on cardinality page

* fix: correct styles for icons width

* feat: add layout with copyright

* feat: add website and issue to footer
2022-12-12 17:44:13 -08:00
Dima Lazerka
bde93844de Add Anomaly Detection to enterprise features list (#3476)
* Add Anomaly Detection to enterprise features list

* Update docs/enterprise.md

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2022-12-12 17:42:44 -08:00
Aliaksandr Valialkin
d1af6046c7 lib/{mergeset,storage}: do not block small merges by pending big merges - assist with small merges instead
Blocked small merges may result into big number of small parts, which, in turn,
may result in increased CPU and memory usage during queries, since queries need to inspect
all the existing small parts.

The issue has been introduced in 8189770c50

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337
2022-12-12 17:00:50 -08:00
Aliaksandr Valialkin
3b18931050 lib/bytesutil: cache results for all the input strings, which were passed during the last 5 minutes from FastStringMatcher.Match(), FastStringTransformer.Transform() and InternString()
Previously only up to 100K results were cached.
This could result in sub-optimal performance when more than 100K unique strings were actually used.
For example, when the relabeling rule was applied to a million of unique Graphite metric names
like in the https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3466

This commit should reduce the long-term CPU usage for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3466
after all the unique Graphite metrics are registered in the FastStringMatcher.Transform() cache.

It is expected that the number of unique strings, which are passed to FastStringMatcher.Match(),
FastStringTransformer.Transform() and to InternString() during the last 5 minutes,
is limited, so the function results fit memory. Otherwise OOM crash can occur.
This should be the case for typical production workloads.
2022-12-12 14:41:13 -08:00
Zakhar Bessarab
ebcb4ab617 {app/{vmbackup/vmrestore}: update path example to use Azure terminology for consistency (#3475) 2022-12-12 22:17:22 +03:00
Roman Khavronenko
8286f9608f vmalert: support $for or .For template variables (#3474)
support `$for` or `.For` template variables  in alert's annotations.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3246

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-12 22:16:10 +03:00
Roman Khavronenko
eb275be99d dashboards: add VersionChange annotation (#3473)
The new annotation is hidden by default and suppose to show
component `short_version` label change on the panels.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-12 16:32:26 +01:00
Aliaksandr Valialkin
7ae744fce6 lib/protoparser/datadog: do not re-use previously parsed field values if they are missing in the currently parsed message
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3432
2022-12-11 13:09:25 -08:00
Aliaksandr Valialkin
4a4b3c2462 vendor: update github.com/klauspost/compress from v1.15.12 to v1.15.13 2022-12-11 02:10:51 -08:00
Aliaksandr Valialkin
9f642d10ff docs/CHANGELOG.md: cut v1.85.0 2022-12-11 02:01:11 -08:00
Aliaksandr Valialkin
38f8e8adc3 docs/CHANGELOG.md: document changes at v1.79.6 2022-12-11 01:51:36 -08:00
Aliaksandr Valialkin
d141cb28a6 docs/CHANGELOG.md: document 461158a437
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3427
2022-12-10 23:42:16 -08:00
Aliaksandr Valialkin
88597f187b app/{vmagent,vminsert}/datadog: make the host label optional in DataDog data ingestion protocol
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3432
2022-12-10 23:32:31 -08:00
Aliaksandr Valialkin
e272a0ec78 app/vmselect/promql: allow passing inf arg into functions, which accept numeric limit on the number of output time series
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3461
2022-12-10 22:47:47 -08:00
Aliaksandr Valialkin
b7aec1be4d docs/CHANGELOG.md: add a link to the issue related to reduced CPU and memory usage at vmalert
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3464

The related commit - b97bd01605
2022-12-10 22:21:19 -08:00
Aliaksandr Valialkin
19f20c0f4e vendor: make vendor-update 2022-12-10 21:46:16 -08:00
Aliaksandr Valialkin
b01607e3fb docs: clarify that single-node VictoriaMetrics also provides functionality for relabel debugging
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3407
2022-12-10 20:49:52 -08:00
Aliaksandr Valialkin
a30ae502ef lib/promscrape: allow editing relabeling configs and labels at /target-relabel-debug page
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3407
2022-12-10 12:44:45 -08:00
Aliaksandr Valialkin
3e7276639e app/{vminsert,vmselect}: move the handler for /metric-relabel-debug from vminsert to vmselect to be consistent with the cluster version 2022-12-10 02:45:40 -08:00
Aliaksandr Valialkin
3f4cb9a142 docs: sync with cluster branch after 97b41e727c 2022-12-10 02:32:24 -08:00
Aliaksandr Valialkin
a8b8e23d68 lib/promscrape: implement target-level and metric-level relabel debugging
Target-level debugging is performed by clicking the 'debug' link at the corresponding target
on either http://vmagent:8429/targets page or on http://vmagent:8428/service-discovery page.

Metric-level debugging is perfromed at http://vmagent:8429/metric-relabel-debug page.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3407

See https://docs.victoriametrics.com/vmagent.html#relabel-debug
2022-12-10 02:09:44 -08:00
Aliaksandr Valialkin
6f0179405a app/vmalert: properly handle nil req passed to requestToCurl()
This fixes a panic in the TestAlertingRule_Exec_Negative test.
The panic has been introduced in the commit b97bd01605
2022-12-10 02:04:17 -08:00
Aliaksandr Valialkin
c5dd973f9c docs/url-examples.md: add missing whitespace for proper heading for the example on how to send data via OpenTSDB protocol 2022-12-09 17:33:44 -08:00
Aliaksandr Valialkin
765ee5f7ba docs/CHANGELOG.md: document b97bd01605 2022-12-09 11:49:29 -08:00
Aliaksandr Valialkin
ca59d3de59 app/vmalert: do not show system links at http://vmalert:8880/ page when it is requested via proxy
The system links are absolute, e.g. they start from `/`, so there are high chances
they won't work as expected when requested via proxy such as vmselect with -vmalert.proxyURL
command-line flag.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3424
2022-12-09 11:46:28 -08:00
Roman Khavronenko
b97bd01605 vmalert: do not hold pointer to http.Request (#3467)
http.Request was used as a part of state struct
for generating the curl command when viewing the rule's
state changes.
It appears, that holding a referencing is far more expensive
than generating the curl command immediately.
On the test with 40k rules, this change reduces memory
and CPU usage by 50%.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-09 18:13:29 +03:00
Aliaksandr Valialkin
2406c0dcfd docs/CHANGELOG.md: document the bugfix at 05b42601c3
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3247
2022-12-08 18:35:28 -08:00
Zakhar Bessarab
05b42601c3 lib/promscrape/discovery/azure: remove API server from URL returned by azure (#3403)
* lib/promscrape/discovery/azure: remove API server from URL returned by azure

* lib/promscrape/discovery/azure: validate nextLink contains same URL as apiServer
2022-12-08 18:29:10 -08:00
Aliaksandr Valialkin
8434aa142d lib/querytracer: fix remaining tests after 49ebc48809 2022-12-08 18:18:06 -08:00
Aliaksandr Valialkin
5b9e6b9d24 lib/storage: follow-up after 7c0ae3a86a
- Update docs at https://docs.victoriametrics.com/#deduplication
- Optimize the deduplication loop a bit

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333
2022-12-08 18:16:57 -08:00
Roman Khavronenko
7c0ae3a86a lib/storage: keep sample with the biggest value on timestamp conflict (#3421)
The change leaves raw sample with the biggest value for identical
timestamps per each `-dedup.minScrapeInterval` discrete interval
when the deduplication is enabled.

```
benchstat old.txt new.txt
name                                         old time/op    new time/op    delta
DeduplicateSamples/minScrapeInterval=1s-10      817ns ± 2%     832ns ± 3%      ~     (p=0.052 n=10+10)
DeduplicateSamples/minScrapeInterval=2s-10     1.56µs ± 1%    2.12µs ± 0%   +35.19%  (p=0.000 n=9+7)
DeduplicateSamples/minScrapeInterval=5s-10     1.32µs ± 3%    1.65µs ± 2%   +25.57%  (p=0.000 n=10+10)
DeduplicateSamples/minScrapeInterval=10s-10    1.13µs ± 2%    1.50µs ± 1%   +32.85%  (p=0.000 n=10+10)

name                                         old speed      new speed      delta
DeduplicateSamples/minScrapeInterval=1s-10   10.0GB/s ± 2%   9.9GB/s ± 3%      ~     (p=0.052 n=10+10)
DeduplicateSamples/minScrapeInterval=2s-10   5.24GB/s ± 1%  3.87GB/s ± 0%   -26.03%  (p=0.000 n=9+7)
DeduplicateSamples/minScrapeInterval=5s-10   6.22GB/s ± 3%  4.96GB/s ± 2%   -20.37%  (p=0.000 n=10+10)
DeduplicateSamples/minScrapeInterval=10s-10  7.28GB/s ± 2%  5.48GB/s ± 1%   -24.74%  (p=0.000 n=10+10)
```

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-08 18:06:11 -08:00
Aliaksandr Valialkin
3019ec3da6 lib/querytracer: fix tests after 49ebc48809 2022-12-08 17:21:38 -08:00
Aliaksandr Valialkin
eeacbaf0b6 all: update Go builder from v1.19.3 to v1.19.4
See https://github.com/golang/go/issues?q=milestone%3AGo1.19.4+label%3ACherryPickApproved
2022-12-08 16:41:24 -08:00
Aliaksandr Valialkin
56b8980915 lib/promscrape: allow using sample_limit and series_limit options in stream parsing mode
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3458
2022-12-08 16:33:38 -08:00
Aliaksandr Valialkin
f9730676d9 app/vmselect/searchutils: do not print flag name responsible for query timeout if the timeout isn't reached
This should make the log message more clear
2022-12-08 13:07:33 -08:00
Aliaksandr Valialkin
bce1c5d572 docs/Cluster-VictoriaMetrics.md: typo fix 2022-12-07 12:26:34 -08:00
Aliaksandr Valialkin
189217a069 docs/CHANGELOG.md: document the addition of file-based discovery of vmstorage nodes 2022-12-07 12:04:57 -08:00
Aliaksandr Valialkin
59430e4274 docs/Cluster-VictoriaMetrics.md: make docs-sync after 5de8330ce00adfc5ac794070d30a2617ddc14bf2 2022-12-07 12:02:38 -08:00
Aliaksandr Valialkin
49ebc48809 lib/querytracer: put the version of VictoriaMetrics in the first message of query trace
This should simplify further debugging, since the first thing to start the debugging by query trace
is to know the version of VictoriaMetrics, which produced this trace.
2022-12-07 09:46:39 -08:00
Roman Khavronenko
0b6b6d52bf dashboards: remove DataLinks from single version (#3456)
Those data links were copy&paste artifact from cluster version
and aren't needed on the dash.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-07 14:35:52 +01:00
Roman Khavronenko
9f1403db38 dashboards: add non-default flags panel for vmagent (#3453)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-07 12:22:20 +01:00
Roman Khavronenko
b9dc11612e alerts: remove show_at label for RequestErrorsToAPI alert (#3455)
Alert `RequestErrorsToAPI` could be permanently triggered due to
mistakes in clients configuration. However, such requests are unlikely
to cause VM health state change. So there is no need in displaying
this alert because there will be no correlation caused by it.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-07 14:19:50 +03:00
Denys Holius
f12dae130a deployment/docker: bump Grafana version to v9.2.7 (#3454)
see https://grafana.com/blog/2022/11/29/grafana-security-release-new-versions-with-high-severity-security-fix-for-cve-2022-31097/
2022-12-07 10:23:19 +01:00
Aliaksandr Valialkin
6183975d45 .github/ISSUE_TEMPLATE/bug_report.md: update the link to troubleshooting docs 2022-12-06 21:11:15 -08:00
Aliaksandr Valialkin
3f82e3fa36 docs: follow-up after e1bf2a85d0559d112908ce81597f3261d3a085c0
- Document the change at docs/CHANGELOG.md
- Run `make docs-sync` for copying app/vmgateway/README.md to docs/vmgateway.md
  in order to propagate docs' changes to https://docs.victoriametrics.com/vmgateway.html
2022-12-06 21:05:22 -08:00
Aliaksandr Valialkin
758e8a15fd app/vmselect: typo fixes in code comments 2022-12-06 20:58:16 -08:00
Aliaksandr Valialkin
35a3170d97 docs: document the addition of -storageNode.discoveryInterval command-line flag in VictoriaMetrics cluster enterprise
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3417
2022-12-06 19:57:06 -08:00
Aliaksandr Valialkin
5cc19b1f7e docs/Articles.md: change the link to How we tried using VictoriaMetrics and Thanos at the same time from Russian to English article 2022-12-06 16:30:10 -08:00
Roman Khavronenko
3dec847c93 vmalert: correctly return error for RW failures (#3452)
* vmalert: correctly return error for RW failures

By mistake, in 0989649ad0 the error
for remote write failures weren't return to user.
This change fixes it.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-06 15:36:46 +01:00
Aliaksandr Valialkin
9a7c36e645 docs/Articles.md: add a link to the talk "How do We Keep Metrics for a Long Time in VictoriaMetrics" 2022-12-06 00:55:38 -08:00
Aliaksandr Valialkin
50ea632bfe docs/Articles.md: link to a video for the talk 'VictoriaMetrics: scaling to 100 million metrics per second' 2022-12-06 00:53:05 -08:00
Aliaksandr Valialkin
06758650bf vendor: make vendor-update 2022-12-05 23:28:14 -08:00
Aliaksandr Valialkin
a40c50f4fe docs/CHANGELOG.md: document 1e0666abb4 2022-12-05 23:10:17 -08:00
Aliaksandr Valialkin
e2e341da9f app/vmselect/vmui: make vmui-update after 7645d9ae00 2022-12-05 23:07:08 -08:00
Pedro Gonçalves
1e0666abb4 Datadog - Add device as a tag if it's present as a field in the series object (#3431)
* Datadog - Add device as a tag if it's present as a field in the series object

* address PR comments
2022-12-05 23:06:03 -08:00
Aliaksandr Valialkin
caa1c43166 docs: follow-up for 7645d9ae00
- Document the change at docs/CHANGELOG.md
- Document the feature at https://docs.victoriametrics.com/#vmui

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3446
2022-12-05 22:50:41 -08:00
Yury Molodov
7645d9ae00 feat: add toggle query display by Ctrl (#3449) 2022-12-05 22:45:15 -08:00
Yury Molodov
01a9b36a95 vmui: timezone select (#3414)
* feat: add timezone selection

* vmui: provide feature timezone select

* fix: correct timezone with relative time

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2022-12-05 22:44:31 -08:00
Roman Khavronenko
71f0bbbe39 deployment: update the README (#3447)
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-05 22:05:31 -08:00
Aliaksandr Valialkin
718d1d90b6 docs/CHANGELOG.md: document fd43b5bad0
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3444
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3445
2022-12-05 22:01:42 -08:00
Yury Molodov
fd43b5bad0 vmui: fix multi-line query (#3448)
* fix: remove prevent nav by up/down keys for multi-line query

* fix: add query params encode in URL
2022-12-05 21:56:54 -08:00
Aliaksandr Valialkin
5eae9a9914 app/vmselect/promql: add range_trim_spikes(phi, q) function for trimming phi percent of largest spikes per each time series returned by q 2022-12-05 21:55:01 -08:00
Aliaksandr Valialkin
d99d222f0a lib/{storage,mergeset}: log the duration for flushing in-memory parts on graceful shutdown 2022-12-05 21:30:48 -08:00
Aliaksandr Valialkin
eed32b368c docs/vmctl.md: make docs-sync after 86c31f2955
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2930
2022-12-05 17:24:10 -08:00
Zakhar Bessarab
86c31f2955 app/vmctl: add option to migrate between clusters with automatic tenants discovery (#3450) 2022-12-05 17:18:09 -08:00
Aliaksandr Valialkin
f3e84b4dea {dashboards,alerts}: subtitute {type="indexdb"} with {type=~"indexdb.*"} inside queries after 8189770c50
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337
2022-12-05 16:00:22 -08:00
Aliaksandr Valialkin
8189770c50 all: add -inmemoryDataFlushInterval command-line flag for controlling the frequency of saving in-memory data to disk
The main purpose of this command-line flag is to increase the lifetime of low-end flash storage
with the limited number of write operations it can perform. Such flash storage is usually
installed on Raspberry PI or similar appliances.

For example, `-inmemoryDataFlushInterval=1h` reduces the frequency of disk write operations
to up to once per hour if the ingested one-hour worth of data fits the limit for in-memory data.

The in-memory data is searchable in the same way as the data stored on disk.
VictoriaMetrics automatically flushes the in-memory data to disk on graceful shutdown via SIGINT signal.
The in-memory data is lost on unclean shutdown (hardware power loss, OOM crash, SIGKILL).

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337
2022-12-05 15:16:14 -08:00
Aliaksandr Valialkin
e509552e92 vendor: make vendor-update 2022-12-05 01:01:57 -08:00
Yury Molodov
461158a437 fix: add word-break for tooltip (#3437) 2022-12-05 08:50:34 +01:00
Roman Khavronenko
6801b37e53 dashboards: add Disk space usage % and Disk space usage % by type panels (#3436)
The new panels have been added to the vmstorage and drilldown rows.

`Disk space usage %` is supposed to show disk space usage percentage.
This panel is now also referred by `DiskRunsOutOfSpace` alerting rule.
This panel has Drilldown option to show absolute values.

`Disk space usage % by type` shows the relation between datapoints
and indexdb size. It supposed to help identify cases when indexdb
starts to take too much disk space.
This panel has Drilldown option to show absolute values.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-05 08:35:33 +01:00
Roman Khavronenko
91a8afa172 vmalert: reduce allocations for Prometheus resp parse (#3435)
Method `metrics()` now pre-allocates slices for labels
and results from query responses. This reduces the number 
of allocations on the hot path for instant requests.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-05 08:34:54 +01:00
Aliaksandr Valialkin
544ea89f91 lib/{mergeset,storage}: add start background workers via startBackgroundWorkers() function 2022-12-04 00:01:04 -08:00
Aliaksandr Valialkin
33dda2809b lib/mergeset: panic when too long item is passed to Table.AddItems() 2022-12-03 23:32:16 -08:00
Aliaksandr Valialkin
932c1f90ae lib/storage: remove duplicate logging for filepath on errors 2022-12-03 23:15:22 -08:00
Aliaksandr Valialkin
044a304adb lib/storage: pass a single arg - rowsPerBlock - to getCompressLevel() function instead of two args 2022-12-03 23:10:16 -08:00
Aliaksandr Valialkin
cb44976716 lib/{storage,mergeset}: use a single sync.WaitGroup for all background workers
This simplifies the code
2022-12-03 23:03:08 -08:00
Aliaksandr Valialkin
28e6d9e1ff lib/storage: properly pass retentionMsecs to OpenStorage() at TestIndexDBRepopulateAfterRotation 2022-12-03 23:02:10 -08:00
Aliaksandr Valialkin
343c69fc15 lib/{mergeset,storage}: pass compressLevel to blockStreamWriter.InitFromInmemoryPart
This allows packing in-memory blocks with different compression levels
depending on its contents. This may save memory usage.
2022-12-03 22:46:48 -08:00
Aliaksandr Valialkin
6d87462f4b lib/mergeset: use the given compressLevel for index and metaindex compression in in-memory part
Previously only data was compressed with the given compressLevel
2022-12-03 22:34:54 -08:00
Aliaksandr Valialkin
f3e3a3daeb lib/{mergeset,storage}: take into account byte slice capacity when returning the size of in-memory part
This results in more correct reporting of memory usage for in-memory parts
2022-12-03 22:30:36 -08:00
Aliaksandr Valialkin
c4150995ad lib/mergeset: reduce the time needed for the slowest tests 2022-12-03 22:26:33 -08:00
Aliaksandr Valialkin
45299efe22 lib/{storage,mergeset}: consistency rename: `flushRaw{Rows,Items} -> flushPending{Rows,Items} 2022-12-03 22:17:46 -08:00
Aliaksandr Valialkin
5ca58cc4fb lib/storage: optimization: do not scan block for rows outside retention if it is covered by the retention 2022-12-03 22:14:12 -08:00
Aliaksandr Valialkin
152ac564ab lib/storage: remove logging redundant path values in a single error message 2022-12-03 22:13:13 -08:00
Aliaksandr Valialkin
93764746c2 lib/filestream: remove logging redundant path values in a single error message 2022-12-03 22:01:51 -08:00
Aliaksandr Valialkin
4f28513b1a lib/fs: remove logging redundant path values in a single error message 2022-12-03 22:00:20 -08:00
Aliaksandr Valialkin
7c3c08d102 lib/backup: remove logging duplicate path values in a single error message 2022-12-03 21:55:06 -08:00
Aliaksandr Valialkin
14660d4df5 all: typo fix: the the -> the 2022-12-03 21:53:01 -08:00
Aliaksandr Valialkin
ddc3d6b5c3 lib/mergeset: drop the crufty code responsible for direct upgrade from releases prior v1.28.0
Upgrade to v1.84.0, wait until the "finished round 2 of background conversion" message
appears in the log and then upgrade to newer release.
2022-12-03 21:17:31 -08:00
Aliaksandr Valialkin
05c65bd83f lib/storage: speed up search for data block for the given tsids
Use binary search instead of linear scan for looking up the needed
data block inside index block.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
2022-12-03 20:58:32 -08:00
Aliaksandr Valialkin
c1cd4a9101 docs/CHANGELOG.md: consistently add - prefix in front of command-line flags
This is a follow-up for bcba5d2a78
2022-12-02 19:08:26 -08:00
Aliaksandr Valialkin
b6712ac08e docs: follow-up after 30fea30685
- Run `make docs-sync`, so app/vmalert/README.md is copied to docs/vmalert.md
- Clarify the feature description in the docs/CHANGELOG.md

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3408
2022-12-02 19:03:11 -08:00
Aliaksandr Valialkin
299285b147 lib/storage: fix TestUpdateCurrHourMetricIDs test when it runs on the first hour of the day by UTC 2022-12-02 18:52:37 -08:00
Aliaksandr Valialkin
e9636b4c69 lib/{mergeset,storage}: re-use the code for removing isInMerge flag at parts
Move the common code into releasePartsToMerge() method and consistently use it throughout the code.
2022-12-02 18:52:37 -08:00
Denys Holius
54741f6f38 docs/Articles.md: fir broken link (#3433) 2022-12-02 10:35:40 +01:00
Roman Khavronenko
cd5c451ea3 docs: fix typo in cluster's README (#3430)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-01 16:24:16 +01:00
Zakhar Bessarab
cd2ac07195 make: make openssl output parsing symbol number agnostic (#3429) 2022-12-01 16:09:36 +01:00
Roman Khavronenko
bcba5d2a78 vmalert: fix replay step param (#3428)
The recent change in modifying default value
of `datasource.queryStep` flag resulted in situation
where replay mode was always running queries with
step=`datasource.queryStep`. When it should always
use rule's evaluation interval.

The fix is related not to replay mode only, but
for all Range requests. Now step param is set
individually for each mode.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-01 13:57:53 +01:00
Roman Khavronenko
f989c20dd7 dashboards: fix typo in data link (#3426)
Fixes a missing `&` char in data link for ETA panel
on cluster dashboards. Without `&` char it generates
wrong link when click on Drilldown menu.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-01 13:21:14 +01:00
Zakhar Bessarab
30fea30685 app/vmalert: add remoteWrite.sendTimeout command-line flag to configure timeout for sending data to remoteWrite.url (#3423)
* app/vmalert: add `remoteWrite.sendTimeout` command-line flag to configure timeout for sending data to `remoteWrite.url`

* vmalert: remove WriteTimeout from clients Cfg
No need to have it as a part of configuration struct:
* the client isn't used by other packages;
* there are no internal tests to check the WriteTimeout.

* vmalert: remove DisablePathAppend from clients Cfg
No need to have it as a part of configuration struct:
* the client isn't used by other packages;
* there are no internal tests to check the DisablePathAppend.

Co-authored-by: hagen1778 <roman@victoriametrics.com>
2022-12-01 09:57:19 +01:00
Roman Khavronenko
8cc4f7eac6 vmalert: properly pass headers during the restore procedure (#3420)
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3418

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-01 09:27:39 +01:00
Aliaksandr Valialkin
3cdff3de23 docs/vmagent.md: update after 959f06d175 2022-11-29 21:33:49 -08:00
Aliaksandr Valialkin
f325410c26 lib/promscrape: optimize service discovery speed
- Return meta-labels for the discovered targets via promutils.Labels
  instead of map[string]string. This improves the speed of generating
  meta-labels for discovered targets by up to 5x.

- Remove memory allocations in hot paths during ScrapeWork generation.
  The ScrapeWork contains scrape settings for a single discovered target.
  This improves the service discovery speed by up to 2x.
2022-11-29 21:26:00 -08:00
Aliaksandr Valialkin
c7ce4979ec all: follow-up after 05cf8a6ecc 2022-11-29 21:03:59 -08:00
Aliaksandr Valialkin
4822406b64 app/vmalert: substitute -datasource.disablePathAppend with -remoteRead.disablePathAppend in the description for -datasource.url command-line flag
This is a follow-up for 959f06d175
2022-11-29 20:36:41 -08:00
Aliaksandr Valialkin
295c84df66 lib/promscrape/discovery: add a benchmark for measuring the performance of creating pod meta-labels 2022-11-29 20:27:48 -08:00
Dmytro Kozlov
05cf8a6ecc vmctl: support of the remote read protocol (#3232)
vmctl: support of the remote read protocol

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
2022-11-29 22:53:28 +01:00
Roman Khavronenko
bdd0683c4a dashboards: update VM single dash (#3400)
The change list is the following:
* bump Grafana version to 9.2.6;
* replace old "Graph" panel with "TimeSeries" panel;
* show % usage of Mem and CPU additionally to of absolute values;
* `Caches` row was removed. All needed info for caches is now part of `Troubleshooting`;
* add Annotations for Alert triggers. Not all alerts are supposed to be displayed
on the dashboard, but only those with label `show_at: dashboard`.
See `alerts.yml` change.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-11-29 19:28:22 +01:00
Roman Khavronenko
5d835a6d64 dashboards: update vmalert dash (#3404)
The change list is the following:
* bump Grafana version to 9.2.6;
* replace old Graph panel with TimeSeries panel;
* add RemoteWrite section;
* allow configuring topK elements for some of the panels;
* Preer grouping by job instead of grouping by instance.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-11-29 19:26:31 +01:00
Max Golionko
959f06d175 vmalert: flag reference update (#3415)
* flag reference update

there is no flag `-datasource.disablePathAppend` and datasource actually checking for `-remoteRead.disablePathAppend`

* update source for doc as well
2022-11-29 19:22:57 +01:00
Roman Khavronenko
7dfb01bd7b dashboards: update vmagent dash (#3411)
The change list is the following:
* bump Grafana version to 9.2.6;
* add version change annotations;
* switch to per-job panels instead of per-instance;
* add drilldown option for resource usage panels.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-11-29 19:22:13 +01:00
Zakhar Bessarab
2f837c1b23 doc/operator: update formatting for backup section, add FAQ section (#3416)
* doc/operator: update formatting for backup section, add FAQ section

* doc/operator: address review feedback

* doc/operator: add note about difference between `VMRestore` and `VMBackupmanager` as init containers

* Update docs/operator/backups.MD

Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>

Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
2022-11-29 19:19:46 +01:00
Aliaksandr Valialkin
0002de937b docs/CHANGELOG.md: document 027ab74efb
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3402
2022-11-28 18:55:01 -08:00
Aliaksandr Valialkin
5c906beea2 docs/url-examples.md: allow linking to how to send OpenTSDB/Graphite data chapters 2022-11-28 18:42:49 -08:00
Aliaksandr Valialkin
654e94f420 lib/promscrape: add exported_ prefix to metric names exported by scrape targets if they clash with automatically generated metrics
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3406
2022-11-28 18:37:09 -08:00
匠心零度
fa0ce10275 lib/storage: remove extra error check (#3396) 2022-11-28 16:43:31 -08:00
Aliaksandr Valialkin
090343ff50 docs/Articles.md: add a link to slides about scaling to 100 million metrics per second 2022-11-28 16:41:14 -08:00
Roman Khavronenko
31ff26065b dashboards: update VM cluster dash (#3401)
The change list is the following:
* bump Grafana version to 9.2.6;
* remove artifacts in data links.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-11-28 14:13:00 +01:00
Denys Holius
4b3479c003 deployment/docker: bump grafana version to latest v9.2.6 (#3398) 2022-11-28 10:31:10 +01:00
Timur Bakeyev
9ad578214e Update datasource entries consistently contain type prometheus and uid $ds. (#3393)
Co-authored-by: Timour I. Bakeev <tbakeev@ripe.net>
2022-11-28 08:37:39 +01:00
Aliaksandr Valialkin
fa308ae9f8 deployment/docker: update VictoriaMetrics tag from v1.83.1 to v1.84.0 2022-11-25 22:19:29 -08:00
1604 changed files with 253043 additions and 43800 deletions

View File

@@ -1,46 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
It would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
and verify whether the bug is reproducible there.
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/#troubleshooting).
**To Reproduce**
Steps to reproduce the behavior.
**Expected behavior**
A clear and concise description of what you expected to happen.
**Logs**
Check if any warnings or errors were logged by VictoriaMetrics components
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
**Screenshots**
If applicable, add screenshots to help explain your problem.
For VictoriaMetrics health-state issues please provide full-length screenshots
of Grafana dashboards if possible:
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
See how to setup monitoring here:
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
**Version**
The line returned when passing `--version` command line flag to the binary. For example:
```
$ ./victoria-metrics-prod --version
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
```
**Used command-line flags**
Please provide the command-line flags used for running VictoriaMetrics and its components.

86
.github/ISSUE_TEMPLATE/bug_report.yml vendored Normal file
View File

@@ -0,0 +1,86 @@
name: Bug report
description: Create a report to help us improve
labels: [bug]
body:
- type: markdown
attributes:
value: |
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
and verify whether the bug is reproducible there.
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
- type: textarea
id: describe-the-bug
attributes:
label: Describe the bug
description: |
A clear and concise description of what the bug is.
placeholder: |
When I do `A` VictoriaMetrics does `B`. I expect it to do `C`.
validations:
required: true
- type: textarea
id: to-reproduce
attributes:
label: To Reproduce
description: |
Steps to reproduce the behavior.
If reproducing an issue requires some specific configuration file, please paste it here.
placeholder: |
Steps to reproduce the behavior.
validations:
required: true
- type: textarea
id: version
attributes:
label: Version
description: |
The line returned when passing `--version` command line flag to the binary. For example:
```
$ ./victoria-metrics-prod --version
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
```
validations:
required: true
- type: textarea
id: logs
attributes:
label: Logs
description: |
Check if any warnings or errors were logged by VictoriaMetrics components
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
validations:
required: false
- type: textarea
id: screenshots
attributes:
label: Screenshots
description: |
If applicable, add screenshots to help explain your problem.
For VictoriaMetrics health-state issues please provide full-length screenshots
of Grafana dashboards if possible:
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
See how to setup monitoring here:
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
validations:
required: false
- type: textarea
id: flags
attributes:
label: Used command-line flags
description: |
Please provide the command-line flags used for running VictoriaMetrics and its components.
validations:
required: false
- type: textarea
id: additional-info
attributes:
label: Additional information
placeholder: |
Additional information that doesn't fit elsewhere
validations:
required: false

View File

@@ -0,0 +1,5 @@
blank_issues_enabled: true
contact_links:
- name: Ask on Slack
url: https://slack.victoriametrics.com/
about: You can ask for help here!

View File

@@ -1,20 +0,0 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

View File

@@ -0,0 +1,43 @@
name: Feature request
description: Suggest an idea for this project
labels: [enhancement]
body:
- type: textarea
id: describe-the-problem
attributes:
label: Is your feature request related to a problem? Please describe
description: |
A clear and concise description of what the problem is.
placeholder: |
Ex. I'm always frustrated when [...]
validations:
required: false
- type: textarea
id: describe-the-solution
attributes:
label: Describe the solution you'd like
description: |
A clear and concise description of what you want to happen.
validations:
required: true
- type: textarea
id: alternative-solutions
attributes:
label: Describe alternatives you've considered
description: |
A clear and concise description of any alternative solutions or features you've considered.
placeholder: |
I have tried to do `A`, but that doesn't solve a problem completely.
I have tried to do `A` and `B`, but implementing this would be better.
validations:
required: false
- type: textarea
id: feature-additional-info
attributes:
label: Additional information
description: |
Additional information which you consider helpful for implementing this feature.
placeholder: |
Add any other context or screenshots about the feature request here.
validations:
required: false

View File

@@ -17,7 +17,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@main
with:
go-version: 1.19.3
go-version: 1.19.5
id: go
- name: Code checkout
uses: actions/checkout@master

View File

@@ -0,0 +1,42 @@
name: "CodeQL - JS"
on:
push:
branches: [master, cluster]
paths:
- "**.js"
pull_request:
# The branches below must be a subset of the branches above
branches: [master, cluster]
paths:
- "**.js"
schedule:
- cron: "30 18 * * 2"
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: ["javascript"]
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "javascript"

View File

@@ -13,12 +13,20 @@ name: "CodeQL"
on:
push:
branches: [ master, cluster ]
branches: [master, cluster]
paths-ignore:
- "**.md"
- "**.txt"
- "**.js"
pull_request:
# The branches below must be a subset of the branches above
branches: [ master, cluster ]
branches: [master, cluster]
paths-ignore:
- "**.md"
- "**.txt"
- "**.js"
schedule:
- cron: '30 18 * * 2'
- cron: "30 18 * * 2"
jobs:
analyze:
@@ -32,45 +40,47 @@ jobs:
strategy:
fail-fast: false
matrix:
language: [ 'go', 'javascript' ]
language: ["go"]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.19
if: ${{ matrix.language == 'go' }}
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.19.5
check-latest: true
cache: true
if: ${{ matrix.language == 'go' }}
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

66
.github/workflows/main-test.yml vendored Normal file
View File

@@ -0,0 +1,66 @@
name: main - test
on:
push:
branches:
- master
- cluster
paths-ignore:
- "docs/**"
- "**.md"
pull_request:
branches:
- master
- cluster
paths-ignore:
- "docs/**"
- "**.md"
permissions:
contents: read
jobs:
lint:
name: lint
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v3
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: 1.19.5
check-latest: true
cache: true
- name: Dependencies
run: |
make install-golangci-lint
make check-all
git diff --exit-code
test:
needs: lint
strategy:
matrix:
scenario: ["test-full", "test-pure", "test-full-386"]
name: test
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v3
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: 1.19.5
check-latest: true
cache: true
- name: run tests
run: |
make ${{ matrix.scenario}}
- name: Publish coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.txt

View File

@@ -1,13 +1,10 @@
name: main
on:
push:
paths-ignore:
- 'docs/**'
- '**.md'
pull_request:
paths-ignore:
- 'docs/**'
- '**.md'
workflow_run:
workflows: ["main - test"]
types:
- completed
permissions:
contents: read
@@ -16,30 +13,19 @@ jobs:
name: Build
runs-on: ubuntu-latest
steps:
- name: Setup Go
uses: actions/setup-go@main
with:
go-version: 1.19.3
id: go
- name: Code checkout
uses: actions/checkout@master
- name: Dependencies
run: |
make install-golint
make install-errcheck
make install-golangci-lint
uses: actions/checkout@v3
with:
ref: ${{ github.event.workflow_run.head_branch }}
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: 1.19.5
check-latest: true
cache: true
- name: Build
run: |
export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
make check-all
git diff --exit-code
make test-full
make test-pure
make test-full-386
make victoria-metrics-crossbuild
make vmuitils-crossbuild
- name: Publish coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.txt

39
.github/workflows/nightly-build.yml vendored Normal file
View File

@@ -0,0 +1,39 @@
name: nightly-build
on:
schedule:
# Daily at 2:48am
- cron: '48 2 * * *'
permissions:
contents: read
jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
-
name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Setup Go
uses: actions/setup-go@main
with:
go-version: 1.19.5
id: go
-
name: Setup docker scan
run: |
mkdir -p ~/.docker/cli-plugins && \
curl https://github.com/docker/scan-cli-plugin/releases/latest/download/docker-scan_linux_amd64 -L -s -S -o ~/.docker/cli-plugins/docker-scan &&\
chmod +x ~/.docker/cli-plugins/docker-scan
-
name: Code checkout
uses: actions/checkout@master
-
name: Publish
run: |
LATEST_TAG=nightly PKG_TAG=nightly make publish

15
.golangci.yml Normal file
View File

@@ -0,0 +1,15 @@
run:
timeout: 2m
enable:
- revive
issues:
exclude-rules:
- linters:
- staticcheck
text: "SA(4003|1019|5011):"
linters-settings:
errcheck:
exclude: ./errcheck_excludes.txt

View File

@@ -3,3 +3,4 @@ allowlist:
- MIT
- BSD-3-Clause
- BSD-2-Clause
- ISC

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS
Copyright 2019-2022 VictoriaMetrics, Inc.
Copyright 2019-2023 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -2,7 +2,8 @@ PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -c 10-17)))
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
LATEST_TAG ?= latest
PKG_TAG ?= $(shell git tag -l --points-at HEAD)
ifeq ($(PKG_TAG),)
@@ -143,6 +144,7 @@ vmutils-windows-amd64: \
vmctl-windows-amd64
victoria-metrics-crossbuild: \
victoria-metrics-linux-386 \
victoria-metrics-linux-amd64 \
victoria-metrics-linux-arm64 \
victoria-metrics-linux-arm \
@@ -154,6 +156,7 @@ victoria-metrics-crossbuild: \
victoria-metrics-openbsd-amd64
vmutils-crossbuild: \
vmutils-linux-386 \
vmutils-linux-amd64 \
vmutils-linux-arm64 \
vmutils-linux-arm \
@@ -166,16 +169,17 @@ vmutils-crossbuild: \
vmutils-windows-amd64
publish-release:
git checkout $(TAG) && $(MAKE) release publish && \
git checkout $(TAG)-cluster && $(MAKE) release publish && \
git checkout $(TAG)-enterprise && $(MAKE) release publish && \
git checkout $(TAG)-enterprise-cluster && $(MAKE) release publish
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
release: \
release-victoria-metrics \
release-vmutils
release-victoria-metrics: \
release-victoria-metrics-linux-386 \
release-victoria-metrics-linux-amd64 \
release-victoria-metrics-linux-arm \
release-victoria-metrics-linux-arm64 \
@@ -184,6 +188,10 @@ release-victoria-metrics: \
release-victoria-metrics-freebsd-amd64 \
release-victoria-metrics-openbsd-amd64
# adds i386 arch
release-victoria-metrics-linux-386:
GOOS=linux GOARCH=386 $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-linux-amd64:
GOOS=linux GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
@@ -215,6 +223,7 @@ release-victoria-metrics-goos-goarch: victoria-metrics-$(GOOS)-$(GOARCH)-prod
cd bin && rm -rf victoria-metrics-$(GOOS)-$(GOARCH)-prod
release-vmutils: \
release-vmutils-linux-386 \
release-vmutils-linux-amd64 \
release-vmutils-linux-arm64 \
release-vmutils-linux-arm \
@@ -224,6 +233,9 @@ release-vmutils: \
release-vmutils-openbsd-amd64 \
release-vmutils-windows-amd64
release-vmutils-linux-386:
GOOS=linux GOARCH=386 $(MAKE) release-vmutils-goos-goarch
release-vmutils-linux-amd64:
GOOS=linux GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
@@ -314,21 +326,7 @@ vet:
go vet ./lib/...
go vet ./app/...
lint: install-golint
golint lib/...
golint app/...
install-golint:
which golint || go install golang.org/x/lint/golint@latest
errcheck: install-errcheck
errcheck -exclude=errcheck_excludes.txt ./lib/...
errcheck -exclude=errcheck_excludes.txt ./app/...
install-errcheck:
which errcheck || go install github.com/kisielk/errcheck@latest
check-all: fmt vet lint errcheck golangci-lint govulncheck
check-all: fmt vet golangci-lint govulncheck
test:
go test ./lib/... ./app/...
@@ -379,10 +377,10 @@ install-qtc:
golangci-lint: install-golangci-lint
golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m
golangci-lint run
install-golangci-lint:
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.48.0
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.50.1
govulncheck: install-govulncheck
govulncheck ./...

293
README.md
View File

@@ -40,19 +40,36 @@ VictoriaMetrics has the following prominent features:
* It can be used as a drop-in replacement for Prometheus in Grafana, because it supports [Prometheus querying API](#prometheus-querying-api-usage).
* It can be used as a drop-in replacement for Graphite in Grafana, because it supports [Graphite API](#graphite-api-usage).
* It features easy setup and operation:
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d)
without external dependencies.
* All the configuration is done via explicit command-line flags with reasonable defaults.
* All the data is stored in a single directory pointed by `-storageDataPath` command-line flag.
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) to S3 or GCS can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools. See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
* It implements PromQL-based query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools.
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
* It implements PromQL-like query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
* It provides global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics. Later this data may be queried via a single query.
* It provides high performance and good vertical and horizontal scalability for both [data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4). It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f) when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
* It provides high performance and good vertical and horizontal scalability for both
[data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893)
and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
* It is optimized for time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate).
* It provides high data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) may be crammed into limited storage comparing to TimescaleDB and [up to 7x less storage space is required compared to Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB. See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae), [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683) and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* It provides high data compression, so up to 70x more data points may be stored into limited storage comparing to TimescaleDB
according to [these benchmarks](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
and up to 7x less storage space is required compared to Prometheus, Thanos or Cortex
according to [this benchmark](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc).
See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
[comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to
[the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* It supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
* [Metrics scraping from Prometheus exporters](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
* [Prometheus remote write API](#prometheus-setup).
@@ -65,11 +82,15 @@ VictoriaMetrics has the following prominent features:
* [Arbitrary CSV data](#how-to-import-csv-data).
* [Native binary format](#how-to-import-data-in-native-format).
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
* It supports metrics [relabeling](#relabeling).
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data
and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
* It has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/) and [Google Filestore](https://cloud.google.com/filestore).
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/)
and [Google Filestore](https://cloud.google.com/filestore).
See also [various Articles about VictoriaMetrics](https://docs.victoriametrics.com/Articles.html).
@@ -84,7 +105,6 @@ Case studies:
* [Brandwatch](https://docs.victoriametrics.com/CaseStudies.html#brandwatch)
* [CERN](https://docs.victoriametrics.com/CaseStudies.html#cern)
* [COLOPL](https://docs.victoriametrics.com/CaseStudies.html#colopl)
* [Dreamteam](https://docs.victoriametrics.com/CaseStudies.html#dreamteam)
* [Fly.io](https://docs.victoriametrics.com/CaseStudies.html#flyio)
* [German Research Center for Artificial Intelligence](https://docs.victoriametrics.com/CaseStudies.html#german-research-center-for-artificial-intelligence)
* [Grammarly](https://docs.victoriametrics.com/CaseStudies.html#grammarly)
@@ -271,11 +291,13 @@ Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](
VictoriaMetrics provides UI for query troubleshooting and exploration. The UI is available at `http://victoriametrics:8428/vmui`.
The UI allows exploring query results via graphs and tables.
It also provides the following features:
- [metrics explorer](#metrics-explorer)
- [cardinality explorer](#cardinality-explorer)
- [query tracer](#query-tracing)
- [top queries explorer](#top-queries)
Graphs in vmui support scrolling and zooming:
Graphs in `vmui` support scrolling and zooming:
* Select the needed time range on the graph in order to zoom in into the selected time range. Hold `ctrl` (or `cmd` on MacOS) and scroll down in order to zoom out.
* Hold `ctrl` (or `cmd` on MacOS) and scroll up in order to zoom in the area under cursor.
@@ -293,6 +315,8 @@ VMUI allows investigating correlations between multiple queries on the same grap
enter an additional query in the newly appeared input field and press `Enter`.
Results for all the queries are displayed simultaneously on the same graph.
Graphs for a particular query can be temporarily hidden by clicking the `eye` icon on the right side of the input field.
When the `eye` icon is clicked while holding the `ctrl` key, then query results for the rest of queries become hidden
except of the current query results.
See the [example VMUI at VictoriaMetrics playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/?g0.expr=100%20*%20sum(rate(process_cpu_seconds_total))%20by%20(job)&g0.range_input=1d).
@@ -304,9 +328,21 @@ See the [example VMUI at VictoriaMetrics playground](https://play.victoriametric
* queries with the biggest average execution duration;
* queries that took the most summary time for execution.
## Metrics explorer
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
1. Open the `vmui` at `http://victoriametrics:8428/vmui/`.
2. Click the `Explore metrics` tab.
3. Select the `job` you want to explore.
4. Optionally select the `instance` for the selected job to explore.
5. Select metrics you want to explore and compare.
It is possible to change the selected time range for the graphs in the top right corner.
## Cardinality explorer
VictoriaMetrics provides an ability to explore time series cardinality at `cardinality` tab in [vmui](#vmui) in the following ways:
VictoriaMetrics provides an ability to explore time series cardinality at `Explore cardinality` tab in [vmui](#vmui) in the following ways:
- To identify metric names with the highest number of series.
- To identify labels with the highest number of series.
@@ -361,7 +397,7 @@ DataDog agent allows configuring destinations for metrics sending via ENV variab
or via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) in section `dd_url`.
<p align="center">
<img src="Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
</p>
To configure DataDog agent via ENV variable add the following prefix:
@@ -395,7 +431,7 @@ DataDog allows configuring [Dual Shipping](https://docs.datadoghq.com/agent/guid
sending via ENV variable `DD_ADDITIONAL_ENDPOINTS` or via configuration file `additional_endpoints`.
<p align="center">
<img src="Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
</p>
Run DataDog using the following ENV variable with VictoriaMetrics as additional metrics receiver:
@@ -699,8 +735,7 @@ VictoriaMetrics accepts optional `extra_label=<label_name>=<label_value>` query
VictoriaMetrics accepts optional `extra_filters[]=series_selector` query arg, which can be used for enforcing arbitrary label filters for queries. For example,
`/api/v1/query_range?extra_filters[]={env=~"prod|staging",user="xyz"}&query=<query>` would automatically add `{env=~"prod|staging",user="xyz"}` label filters to the given `<query>`. This functionality can be used for limiting the scope of time series visible to the given tenant. It is expected that the `extra_filters[]` query args are automatically set by auth proxy sitting in front of VictoriaMetrics. See [vmauth](https://docs.victoriametrics.com/vmauth.html) and [vmgateway](https://docs.victoriametrics.com/vmgateway.html) as examples of such proxies.
VictoriaMetrics accepts relative times in `time`, `start` and `end` query args additionally to unix timestamps and [RFC3339](https://www.ietf.org/rfc/rfc3339.txt).
For example, the following query would return data for the last 30 minutes: `/api/v1/query_range?start=-30m&query=...`.
VictoriaMetrics accepts multiple formats for `time`, `start` and `end` query args - see [these docs](#timestamp-formats).
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
@@ -725,6 +760,18 @@ Additionally, VictoriaMetrics provides the following handlers:
For example, request to `/api/v1/status/top_queries?topN=5&maxLifetime=30s` would return up to 5 queries per list, which were executed during the last 30 seconds.
VictoriaMetrics tracks the last `-search.queryStats.lastQueriesCount` queries with durations at least `-search.queryStats.minQueryDuration`.
### Timestamp formats
VictoriaMetrics accepts the following formats for `time`, `start` and `end` query args
in [query APIs](https://docs.victoriametrics.com/#prometheus-querying-api-usage) and
in [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series).
- Unix timestamps in seconds with optional milliseconds after the point. For example, `1562529662.678`.
- [RFC3339](https://www.ietf.org/rfc/rfc3339.txt). For example, '2022-03-29T01:02:03Z`.
- Partial RFC3339. Examples: `2022`, `2022-03`, `2022-03-29`, `2022-03-29T01`, `2022-03-29T01:02`.
- Relative duration comparing to the current time. For example, `1h5m` means `one hour and five minutes ago`.
## Graphite API usage
VictoriaMetrics supports data ingestion in Graphite protocol - see [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details.
@@ -785,7 +832,7 @@ to your needs or when testing bugfixes.
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make victoria-metrics` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `victoria-metrics` binary and puts it into the `bin` folder.
@@ -801,7 +848,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make victoria-metrics-linux-arm` or `make victoria-metrics-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `victoria-metrics-linux-arm` or `victoria-metrics-linux-arm64` binary respectively and puts it into the `bin` folder.
@@ -815,7 +862,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
`Pure Go` mode builds only Go code without [cgo](https://golang.org/cmd/cgo/) dependencies.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make victoria-metrics-pure` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `victoria-metrics-pure` binary and puts it into the `bin` folder.
@@ -944,8 +991,9 @@ Each JSON line contains samples for a single time series. An example output:
{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
```
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
See [allowed formats](#timestamp-formats) for these args.
For example:
```console
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
@@ -992,8 +1040,9 @@ where:
* `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
for metrics to export.
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
See [allowed formats](#timestamp-formats) for these args.
For example:
```console
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
@@ -1019,8 +1068,9 @@ wget -O- -q 'http://your_victoriametrics_instance:8428/api/v1/series/count' | jq
# relaunch victoriametrics with search.maxExportSeries more than value from previous command
```
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
See [allowed formats](#timestamp-formats) for these args.
For example:
```console
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
@@ -1035,7 +1085,9 @@ The [deduplication](#deduplication) isn't applied for the data exported in nativ
## How to import time series data
Time series data can be imported into VictoriaMetrics via any supported data ingestion protocol:
VictoriaMetrics can discover and scrape metrics from Prometheus-compatible targets (aka "pull" protocol) -
see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
Additionally, VictoriaMetrics can accept metrics via the following popular data ingestion protocols (aka "push" protocols):
* [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). See [these docs](#prometheus-setup) for details.
* DataDog `submit metrics` API. See [these docs](#how-to-send-data-from-datadog-agent) for details.
@@ -1243,7 +1295,11 @@ Example contents for `-relabelConfig` file:
regex: true
```
VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details.
VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling.
See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details.
The relabeling can be debugged at `http://victoriametrics:8428/metric-relabel-debug` page.
See [these docs](https://docs.victoriametrics.com/vmagent.html#relabel-debug) for more details.
## Federation
@@ -1252,7 +1308,8 @@ VictoriaMetrics exports [Prometheus-compatible federation data](https://promethe
at `http://<victoriametrics-addr>:8428/federate?match[]=<timeseries_selector_for_federation>`.
Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval.
`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
See [allowed formats](#timestamp-formats) for these args.
For example:
```console
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
@@ -1349,7 +1406,12 @@ with the enabled de-duplication. See [this section](#deduplication) for details.
## Deduplication
VictoriaMetrics leaves a single raw sample with the biggest timestamp per each `-dedup.minScrapeInterval` discrete interval if `-dedup.minScrapeInterval` is set to positive duration. For example, `-dedup.minScrapeInterval=60s` would leave a single raw sample with the biggest timestamp per each discrete 60s interval. If multiple raw samples have the same biggest timestamp on the given `-dedup.minScrapeInterval` discrete interval, then an arbitrary sample out of these samples is left. This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness).
VictoriaMetrics leaves a single raw sample with the biggest timestamp per each `-dedup.minScrapeInterval` discrete interval
if `-dedup.minScrapeInterval` is set to positive duration. For example, `-dedup.minScrapeInterval=60s` would leave a single
raw sample with the biggest timestamp per each discrete 60s interval.
This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness).
If multiple raw samples have the same biggest timestamp on the given `-dedup.minScrapeInterval` discrete interval, then the sample with the biggest value is left.
The `-dedup.minScrapeInterval=D` is equivalent to `-downsampling.period=0s:D` if [downsampling](#downsampling) is enabled. So it is safe to use deduplication and downsampling simultaneously.
@@ -1363,18 +1425,50 @@ It is recommended passing different `-promscrape.cluster.name` values to HA pair
## Storage
VictoriaMetrics stores time series data in [MergeTree](https://en.wikipedia.org/wiki/Log-structured_merge-tree)-like
data structures. On insert, VictoriaMetrics accumulates up to 1s of data and dumps it on disk to
`<-storageDataPath>/data/small/YYYY_MM/` subdirectory forming a `part` with the following
name pattern: `rowsCount_blocksCount_minTimestamp_maxTimestamp`. Each part consists of two "columns":
values and timestamps. These are sorted and compressed raw time series values. Additionally, part contains
index files for searching for specific series in the values and timestamps files.
VictoriaMetrics buffers the ingested data in memory for up to a second. Then the buffered data is written to in-memory `parts`,
which can be searched during queries. The in-memory `parts` are periodically persisted to disk, so they could survive unclean shutdown
such as out of memory crash, hardware power loss or `SIGKILL` signal. The interval for flushing the in-memory data to disk
can be configured with the `-inmemoryDataFlushInterval` command-line flag (note that too short flush interval may significantly increase disk IO).
`Parts` are periodically merged into the bigger parts. The resulting `part` is constructed
under `<-storageDataPath>/data/{small,big}/YYYY_MM/tmp` subdirectory.
When the resulting `part` is complete, it is atomically moved from the `tmp`
to its own subdirectory, while the source parts are atomically removed. The end result is that the source
parts are substituted by a single resulting bigger `part` in the `<-storageDataPath>/data/{small,big}/YYYY_MM/` directory.
In-memory parts are persisted to disk into `part` directories under the `<-storageDataPath>/data/small/YYYY_MM/` folder,
where `YYYY_MM` is the month partition for the stored data. For example, `2022_11` is the partition for `parts`
with [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) from `November 2022`.
The `part` directory has the following name pattern: `rowsCount_blocksCount_minTimestamp_maxTimestamp`, where:
- `rowsCount` - the number of [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) stored in the part
- `blocksCount` - the number of blocks stored in the part (see details about blocks below)
- `minTimestamp` and `maxTimestamp` - minimum and maximum timestamps across raw samples stored in the part
Each `part` consists of `blocks` sorted by internal time series id (aka `TSID`).
Each `block` contains up to 8K [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples),
which belong to a single [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series).
Raw samples in each block are sorted by `timestamp`. Blocks for the same time series are sorted
by the `timestamp` of the first sample. Timestamps and values for all the blocks
are stored in [compressed form](https://faun.pub/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
in separate files under `part` directory - `timestamps.bin` and `values.bin`.
The `part` directory also contains `index.bin` and `metaindex.bin` files - these files contain index
for fast block lookups, which belong to the given `TSID` and cover the given time range.
`Parts` are periodically merged into bigger parts in background. The background merge provides the following benefits:
* keeping the number of data files under control, so they don't exceed limits on open files
* improved data compression, since bigger parts are usually compressed better than smaller parts
* improved query speed, since queries over smaller number of parts are executed faster
* various background maintenance tasks such as [de-duplication](#deduplication), [downsampling](#downsampling)
and [freeing up disk space for the deleted time series](#how-to-delete-time-series) are performed during the merge
Newly added `parts` either successfully appear in the storage or fail to appear.
The newly added `parts` are being created in a temporary directory under `<-storageDataPath>/data/{small,big}/YYYY_MM/tmp` folder.
When the newly added `part` is fully written and [fsynced](https://man7.org/linux/man-pages/man2/fsync.2.html)
to a temporary directory, then it is atomically moved to the storage directory.
Thanks to this alogrithm, storage never contains partially created parts, even if hardware power off
occurrs in the middle of writing the `part` to disk - such incompletely written `parts`
are automatically deleted on the next VictoriaMetrics start.
The same applies to merge process — `parts` are either fully merged into a new `part` or fail to merge,
leaving the source `parts` untouched.
VictoriaMetrics doesn't merge parts if their summary size exceeds free disk space.
This prevents from potential out of disk space errors during merge.
@@ -1383,24 +1477,10 @@ This increases overhead during data querying, since VictoriaMetrics needs to rea
bigger number of parts per each request. That's why it is recommended to have at least 20%
of free disk space under directory pointed by `-storageDataPath` command-line flag.
Information about merging process is available in [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176) Grafana dashboards.
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
and [the dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/).
See more details in [monitoring docs](#monitoring).
The `merge` process improves compression rate and keeps number of `parts` on disk relatively low.
Benefits of doing the merge process are the following:
* it improves query performance, since lower number of `parts` are inspected with each query
* it reduces the number of data files, since each `part` contains fixed number of files
* various background maintenance tasks such as [de-duplication](#deduplication), [downsampling](#downsampling)
and [freeing up disk space for the deleted time series](#how-to-delete-time-series) are performed during the merge.
Newly added `parts` either appear in the storage or fail to appear.
Storage never contains partially created parts. The same applies to merge process — `parts` are either fully
merged into a new `part` or fail to merge. MergeTree doesn't contain partially merged `parts`.
`Part` contents in MergeTree never change. Parts are immutable. They may be only deleted after the merge
to a bigger `part` or when the `part` contents goes outside the configured `-retentionPeriod`.
See [this article](https://valyala.medium.com/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) for more details.
See also [how to work with snapshots](#how-to-work-with-snapshots).
@@ -1536,7 +1616,9 @@ VictoriaMetrics provides the following security-related command-line flags:
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
VictoriaMetrics has achieved security certifications for Database Software Development and Software-Based Monitoring Services. We apply strict security measures in everything we do. See our [Security page](https://victoriametrics.com/security/) for more details.
See also [security recommendation for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#security)
and [the general security page at VictoriaMetrics website](https://victoriametrics.com/security/).
## Tuning
@@ -1565,8 +1647,8 @@ Alternatively, single-node VictoriaMetrics can self-scrape the metrics when `-se
set to duration greater than 0. For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page
with 10 seconds interval.
Official Grafana dashboards available for [single-node](https://grafana.com/dashboards/10229)
and [clustered](https://grafana.com/grafana/dashboards/11176) VictoriaMetrics.
Official Grafana dashboards available for [single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
and [clustered](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/) VictoriaMetrics.
See an [alternative dashboard for clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11831)
created by community.
@@ -1723,10 +1805,11 @@ and [cardinality explorer docs](#cardinality-explorer).
* VictoriaMetrics buffers incoming data in memory for up to a few seconds before flushing it to persistent storage.
This may lead to the following "issues":
* Data becomes available for querying in a few seconds after inserting. It is possible to flush in-memory buffers to persistent storage
* Data becomes available for querying in a few seconds after inserting. It is possible to flush in-memory buffers to searchable parts
by requesting `/internal/force_flush` http handler. This handler is mostly needed for testing and debugging purposes.
* The last few seconds of inserted data may be lost on unclean shutdown (i.e. OOM, `kill -9` or hardware reset).
See [this article for technical details](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
The `-inmemoryDataFlushInterval` command-line flag allows controlling the frequency of in-memory data flush to persistent storage.
See [storage docs](#storage) and [this article](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704) for more details.
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
then it is likely you have too many [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series) for the current amount of RAM.
@@ -1814,8 +1897,8 @@ The following metrics for each type of cache are exported at [`/metrics` page](#
* `vm_cache_misses_total` - the number of cache misses
* `vm_cache_entries` - the number of entries in the cache
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
contain `Caches` section with cache metrics visualized. The panels show the current
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
then cache efficiency is already very high and does not need any tuning.
@@ -2056,7 +2139,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-datadog.sanitizeMetricName
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
-dedup.minScrapeInterval duration
@@ -2091,7 +2174,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
-graphiteListenAddr string
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol
-graphiteListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -graphiteListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-graphiteTrimTimestamp duration
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
-http.connTimeout duration
@@ -2111,20 +2196,24 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-httpAuth.username string
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
-httpListenAddr string
TCP address to listen for http connections (default ":8428")
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8428")
-httpListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-import.maxLineLen size
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 104857600)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
-influx.databaseNames array
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
Supports an array of values separated by comma or specified via multiple flags.
-influx.maxLineSize size
The maximum size in bytes for a single InfluxDB line during parsing
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 262144)
-influxDBLabel string
Default label for the DB name sent over '?db={db_name}' query parameter (default "db")
-influxListenAddr string
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<victoriametrics>:8428/write . See also -influxListenAddr.useProxyProtocol
-influxListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -influxListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-influxMeasurementFieldSeparator string
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
-influxSkipMeasurement
@@ -2133,8 +2222,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field
-influxTrimTimestamp duration
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-inmemoryDataFlushInterval duration
The interval for guaranteed saving of in-memory data to disk. The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). Smaller intervals increase disk IO load. Minimum supported value is 1s (default 5s)
-insert.maxQueueDuration duration
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
-internStringMaxLen int
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 300)
-logNewSeries
Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics
-loggerDisableTimestamps
@@ -2143,6 +2236,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerJSONFields string
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
@@ -2152,30 +2247,34 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-loggerWarnsPerSecondLimit int
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-maxConcurrentInserts int
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
-maxInsertRequestSize size
The maximum size in bytes of a single Prometheus remote_write API request
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
-maxLabelValueLen int
The maximum length of label values in the accepted time series. Longer label values are truncated. In this case the vm_too_long_label_values_total metric at /metrics page is incremented (default 16384)
-maxLabelsPerTimeseries int
The maximum number of labels accepted per time series. Superfluous labels are dropped. In this case the vm_metrics_with_dropped_labels_total metric at /metrics page is incremented (default 30)
-memory.allowedBytes size
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
-opentsdbHTTPListenAddr string
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
-opentsdbHTTPListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-opentsdbListenAddr string
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol
-opentsdbListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-opentsdbTrimTimestamp duration
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
-opentsdbhttp.maxInsertRequestSize size
The maximum size of OpenTSDB HTTP put request
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
-opentsdbhttpTrimTimestamp duration
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-pprofAuthKey string
@@ -2242,15 +2341,19 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
-promscrape.maxResponseHeadersSize size
The maximum size of http response headers from Prometheus scrape targets
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 4096)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 4096)
-promscrape.maxScrapeSize size
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16777216)
-promscrape.minResponseSizeForStreamParse size
The minimum target response size for automatic switching to stream parsing mode, which can reduce memory usage. See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 1000000)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 1000000)
-promscrape.noStaleMarkers
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
-promscrape.nomad.waitTime duration
Wait time used by Nomad service discovery. Default value is used if not set
-promscrape.nomadSDCheckInterval duration
Interval for checking for changes in Nomad. This works only if nomad_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#nomad_sd_configs for details (default 30s)
-promscrape.openstackSDCheckInterval duration
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#openstack_sd_configs for details (default 30s)
-promscrape.seriesLimitPerTarget int
@@ -2275,8 +2378,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Supports an array of values separated by comma or specified via multiple flags.
-relabelConfig string
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. The path can point either to local file or to http url. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
-relabelDebug
Whether to log metrics before and after relabeling with -relabelConfig. If the -relabelDebug is enabled, then the metrics aren't sent to storage. This is useful for debugging the relabeling configs
-retentionFilter array
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
@@ -2296,7 +2397,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-search.graphiteStorageStep duration
The interval between datapoints stored in the database. It is used at Graphite Render API handler for normalizing the interval between datapoints in case it isn't normalized. It can be overridden by sending 'storage_step' query arg to /render API or by sending the desired interval via 'Storage-Step' http header during querying /render API. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 10s)
-search.latencyOffset duration
The time when data points become visible in query results after the collection. Too small value can result in incomplete last points for query results (default 30s)
The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
-search.logSlowQueryDuration duration
Log queries with execution time exceeding this value. Zero disables slow query logging (default 5s)
-search.maxConcurrentRequests int
@@ -2313,7 +2414,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Synonym to -search.lookback-delta from Prometheus. The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. See also '-search.maxStalenessInterval' flag, which has the same meaining due to historical reasons
-search.maxMemoryPerQuery size
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-search.maxPointsPerTimeseries int
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph (default 30000)
-search.maxPointsSubqueryPerTimeseries int
@@ -2322,7 +2423,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum duration for query execution (default 30s)
-search.maxQueryLen size
The maximum search query length in bytes
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
-search.maxQueueDuration duration
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
-search.maxSamplesPerQuery int
@@ -2378,25 +2479,31 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
-storage.cacheSizeIndexDBDataBlocks size
Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-storage.cacheSizeIndexDBIndexBlocks size
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-storage.cacheSizeIndexDBTagFilters size
Overrides max size for indexdb/tagFiltersToMetricIDs cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-storage.cacheSizeStorageTSID size
Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-storage.maxDailySeries int
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/#cardinality-limiter . See also -storage.maxHourlySeries
-storage.maxHourlySeries int
The maximum number of unique series can be added to the storage during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/#cardinality-limiter . See also -storage.maxDailySeries
-storage.minFreeDiskSpaceBytes size
The minimum free disk space at -storageDataPath after which the storage stops accepting new data
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 10000000)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000)
-storageDataPath string
Path to storage data (default "victoria-metrics-data")
-streamAggr.config string
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html . See also -remoteWrite.streamAggr.keepInput and -streamAggr.dedupInterval
-streamAggr.dedupInterval duration
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero
-streamAggr.keepInput
Whether to keep input samples after the aggregation with -streamAggr.config. By default the input is dropped after the aggregation, so only the aggregate data is stored. See https://docs.victoriametrics.com/stream-aggregation.html
-tls
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
-tlsCertFile string
@@ -2414,4 +2521,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Show VictoriaMetrics version
-vmalert.proxyURL string
Optional URL for proxying requests to vmalert. For example, if -vmalert.proxyURL=http://vmalert:8880 , then alerting API requests such as /api/v1/rules from Grafana will be proxied to http://vmalert:8880/api/v1/rules
-vmui.customDashboardsPath string
Optional path to vmui dashboards. See https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmui/packages/vmui/public/dashboards
```

View File

@@ -24,11 +24,17 @@ import (
)
var (
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling")
dryRun = flag.Bool("dryRun", false, "Whether to check only -promscrape.config and then exit. "+
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag")
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
"The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. "+
"Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
"Smaller intervals increase disk IO load. Minimum supported value is 1s")
)
func main() {
@@ -54,12 +60,13 @@ func main() {
logger.Infof("starting VictoriaMetrics at %q...", *httpListenAddr)
startTime := time.Now()
storage.SetDedupInterval(*minScrapeInterval)
storage.SetDataFlushInterval(*inmemoryDataFlushInterval)
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
vmselect.Init()
vminsert.Init()
startSelfScraper()
go httpserver.Serve(*httpListenAddr, requestHandler)
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
logger.Infof("started VictoriaMetrics in %.3f seconds", time.Since(startTime).Seconds())
sig := procutil.WaitForSigterm()
@@ -96,6 +103,8 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"vmui", "Web UI"},
{"targets", "status for discovered active targets"},
{"service-discovery", "labels before and after relabeling for discovered targets"},
{"metric-relabel-debug", "debug metric relabeling"},
{"expand-with-exprs", "WITH expressions' tutorial"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"metrics", "available service metrics"},

View File

@@ -129,10 +129,10 @@ func setUp() {
storagePath = filepath.Join(os.TempDir(), testStorageSuffix)
processFlags()
logger.Init()
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
vmselect.Init()
vminsert.Init()
go httpserver.Serve(*httpListenAddr, requestHandler)
go httpserver.Serve(*httpListenAddr, false, requestHandler)
readyStorageCheckFunc := func() bool {
resp, err := http.Get(testHealthHTTPPath)
if err != nil {
@@ -189,10 +189,8 @@ func tearDown() {
func TestWriteRead(t *testing.T) {
t.Run("write", testWrite)
vmstorage.Storage.DebugFlush()
time.Sleep(1 * time.Second)
vmstorage.Stop()
// open storage after stop in write
vmstorage.InitWithoutMetrics(promql.ResetRollupResultCacheIfNeeded)
t.Run("read", testRead)
}
@@ -261,7 +259,7 @@ func testRead(t *testing.T) {
for _, q := range test.Query {
q = testutil.PopulateTimeTplString(q, insertionTime)
if test.Issue != "" {
test.Issue = "Regression in " + test.Issue
test.Issue = "\nRegression in " + test.Issue
}
switch true {
case strings.HasPrefix(q, "/api/v1/export"):
@@ -284,7 +282,7 @@ func testRead(t *testing.T) {
queryResult := Query{}
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
if err := checkQueryResult(queryResult, test.ResultQuery); err != nil {
t.Fatalf("Query. %s fails with error %s.%s", q, err, test.Issue)
t.Fatalf("Query. %s fails with error: %s.%s", q, err, test.Issue)
}
default:
t.Fatalf("unsupported read query %s", q)

View File

@@ -12,7 +12,7 @@ func TestPopulateTimeTplString(t *testing.T) {
}
f := func(s, resultExpected string) {
t.Helper()
result := PopulateTimeTplString(s, now)
result := PopulateTimeTplString(s, now.UTC())
if result != resultExpected {
t.Fatalf("unexpected result; got %q; want %q", result, resultExpected)
}

View File

@@ -6,7 +6,7 @@
"forms_daily_count;item=x 2 {TIME_S-2m}",
"forms_daily_count;item=y 3 {TIME_S-1m}",
"forms_daily_count;item=y 4 {TIME_S-2m}"],
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}"],
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}&latency_offset=1ms"],
"result_query": {
"status":"success",
"data":{"resultType":"vector","result":[{"metric":{"item":"x"},"value":["{TIME_S-1m}","2"]},{"metric":{"item":"y"},"value":["{TIME_S-1m}","4"]}]}

View File

@@ -24,8 +24,8 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
* Can accept data via all the ingestion protocols supported by VictoriaMetrics - see [these docs](#how-to-push-data-to-vmagent).
* Can replicate collected metrics simultaneously to multiple remote storage systems -
see [these docs](#replication-and-high-availability).
* Can aggregate incoming samples by time and by labels before sending them to remote storage - see [these docs](https://docs.victoriametrics.com/stream-aggregation.html).
* Can replicate collected metrics simultaneously to multiple remote storage systems - see [these docs](#replication-and-high-availability).
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as the connection
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
@@ -60,6 +60,8 @@ Example command line:
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
```
Example of scrape configuration for `-promscrape.config` argument you can find [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/prometheus.yml)
See [how to scrape Prometheus-compatible targets](#how-to-collect-metrics-in-prometheus-format) for more details.
If you use single-node VictoriaMetrics, then you can discover and scrape Prometheus-compatible targets directly from VictoriaMetrics
@@ -126,6 +128,12 @@ If you use Prometheus only for scraping metrics from various targets and forward
then `vmagent` can replace Prometheus. Typically, `vmagent` requires lower amounts of RAM, CPU and network bandwidth compared with Prometheus.
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
### Statsd alternative
`vmagent` can be used as an alternative to [statsd](https://github.com/statsd/statsd)
when [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) is enabled.
See [these docs](https://docs.victoriametrics.com/stream-aggregation.html#statsd-alternative) for details.
### Flexible metrics relay
`vmagent` can accept metrics in [various popular data ingestion protocols](#how-to-push-data-to-vmagent), apply [relabeling](#relabeling)
@@ -245,8 +253,6 @@ scrape_configs:
* `scrape_align_interval: duration` for aligning scrapes to the given interval instead of using random offset
in the range `[0 ... scrape_interval]` for scraping each target. The random offset helps spreading scrapes evenly in time.
* `scrape_offset: duration` for specifying the exact offset for scraping instead of using random offset in the range `[0 ... scrape_interval]`.
* `relabel_debug: true` for enabling debug logging during relabeling of the discovered targets. See [these docs](#relabeling).
* `metric_relabel_debug: true` for enabling debug logging during relabeling of the scraped metrics. See [these docs](#relabeling).
See [scrape_configs docs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) for more details on all the supported options.
@@ -317,7 +323,8 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
## Automatically generated metrics
`vmagent` automatically generates the following metrics per each scrape of every [Prometheus-compatible target](#how-to-collect-metrics-in-prometheus-format):
`vmagent` automatically generates the following metrics per each scrape of every [Prometheus-compatible target](#how-to-collect-metrics-in-prometheus-format)
and attaches target-specific `instance` and `job` labels to these metrics:
* `up` - this metric exposes `1` value on successful scrape and `0` value on unsuccessful scrape. This allows monitoring
failing scrapes with the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html):
@@ -405,6 +412,9 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0
```
If the target exports metrics with names clashing with the automatically generated metric names, then `vmagent` automatically
adds `exported_` prefix to these metric names, so they don't clash with automatically generated metric names.
## Relabeling
@@ -415,26 +425,25 @@ with [additional enhancements](#relabeling-enhancements). The relabeling can be
This relabeling is used for modifying labels in discovered targets and for dropping unneded targets.
See [relabeling cookbook](https://docs.victoriametrics.com/relabeling.html) for details.
This relabeling can be debugged by passing `relabel_debug: true` option to the corresponding `scrape_config` section.
In this case `vmagent` logs target labels before and after the relabeling and then drops the logged target.
This relabeling can be debugged by clicking the `debug` link at the corresponding target on the `http://vmagent:8429/targets` page
or on the `http://vmagent:8429/service-discovery` page. See [these docs](#relabel-debug) for details.
* At the `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file.
This relabeling is used for modifying labels in scraped metrics and for dropping unneeded metrics.
See [relabeling cookbook](https://docs.victoriametrics.com/relabeling.html) for details.
This relabeling can be debugged by passing `metric_relabel_debug: true` option to the corresponding `scrape_config` section.
In this case `vmagent` logs metrics before and after the relabeling and then drops the logged metrics.
This relabeling can be debugged via `http://vmagent:8429/metric-relabel-debug` page. See [these docs](#relabel-debug) for details.
* At the `-remoteWrite.relabelConfig` file. This relabeling is used for modifying labels for all the collected metrics
(inluding [metrics obtained via push-based protocols](#how-to-push-data-to-vmagent)) and for dropping unneeded metrics
(including [metrics obtained via push-based protocols](#how-to-push-data-to-vmagent)) and for dropping unneeded metrics
before sending them to all the configured `-remoteWrite.url` addresses.
This relabeling can be debugged by passing `-remoteWrite.relabelDebug` command-line option to `vmagent`.
In this case `vmagent` logs metrics before and after the relabeling and then drops all the logged metrics instead of sending them to remote storage.
This relabeling can be debugged via `http://vmagent:8429/metric-relabel-debug` page. See [these docs](#relabel-debug) for details.
* At the `-remoteWrite.urlRelabelConfig` files. This relabeling is used for modifying labels for metrics
and for dropping unneeded metrics before sending them to a particular `-remoteWrite.url`.
This relabeling can be debugged by passing `-remoteWrite.urlRelabelDebug` command-line options to `vmagent`.
In this case `vmagent` logs metrics before and after the relabeling and then drops all the logged metrics instead of sending them to the corresponding `-remoteWrite.url`.
This relabeling can be debugged via `http://vmagent:8429/metric-relabel-debug` page. See [these docs](#relabel-debug) for details.
All the files with relabeling configs can contain special placeholders in the form `%{ENV_VAR}`,
which are replaced by the corresponding environment variable values.
@@ -449,9 +458,6 @@ The following articles contain useful information about Prometheus relabeling:
* [Extracting labels from legacy metric names](https://www.robustperception.io/extracting-labels-from-legacy-metric-names)
* [relabel_configs vs metric_relabel_configs](https://www.robustperception.io/relabel_configs-vs-metric_relabel_configs)
[This relabeler playground](https://relabeler.promlabs.com/) can help debugging issues related to relabeling.
## Relabeling enhancements
`vmagent` provides the following enhancements on top of Prometheus-compatible relabeling:
@@ -570,7 +576,7 @@ Note that the `name` field must be substituted with explicit `__name__` option u
If `__name__` option is missing under `labels` section, then the original Graphite-style metric name is left unchanged.
For example, the following relabeling rule generates `requests_total{job="app42",instance="host124:8080"}` metric
from "app42.host123.requests.total" Graphite-style metric:
from `app42.host123.requests.total` Graphite-style metric:
```yaml
- action: graphite
@@ -593,6 +599,26 @@ Important notes about `action: graphite` relabeling rules:
The `action: graphite` relabeling rules are easier to write and maintain than `action: replace` for labels extraction from Graphite-style metric names.
Additionally, the `action: graphite` relabeling rules usually work much faster than the equivalent `action: replace` rules.
## Relabel debug
`vmagent` and [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html)
provide the following tools for debugging target-level and metric-level relabeling:
- Target-level debugging (e.g. `relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs))
can be performed by navigating to `http://vmagent:8429/targets` page (`http://victoriametrics:8428/targets` page for single-node VictoriaMetrics)
and clicking the `debug target relabeling` link at the target, which must be debugged.
The opened page will show step-by-step results for the actual target relabeling rules applied to the discovered target labels.
The `http://vmagent:8429/targets` page shows only active targets. If you need to understand why some target
is dropped during the relabeling, then navigate to `http://vmagent:8428/service-discovery` page
(`http://victoriametrics:8428/service-discovery` for single-node VictoriaMetrics), find the dropped target
and click the `debug` link there. The opened page will show step-by-step results for the actual relabeling rules,
which result to target drop.
- Metric-level debugging (e.g. `metric_relabel_configs` section at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs)
can be performed by navigating to `http://vmagent:8429/targets` page (`http://victoriametrics:8428/targets` page for single-node VictoriaMetrics)
and clicking the `debug metrics relabeling` link at the target, which must be debugged.
The opened page will show step-by-step results for the actual metric relabeling rules applied to the given target labels.
## Prometheus staleness markers
@@ -650,8 +676,9 @@ scrape_configs:
'match[]': ['{__name__!=""}']
```
Note that `sample_limit` and `series_limit` [scrape_config options](https://docs.victoriametrics.com/sd_configs.html#scrape_configs)
cannot be used in stream parsing mode because the parsed data is pushed to remote storage as soon as it is parsed.
Note that `vmagent` in stream parsing mode stores up to `sample_limit` samples to the configured `-remoteStorage.url`
instead of droping all the samples read from the target, because the parsed data is sent to the remote storage
as soon as it is parsed in stream parsing mode.
## Scraping big number of targets
@@ -736,21 +763,19 @@ scrape_configs:
## Cardinality limiter
By default `vmagent` doesn't limit the number of time series each scrape target can expose. The limit can be enforced in the following places:
By default `vmagent` doesn't limit the number of time series each scrape target can expose.
The limit can be enforced in the following places:
* Via `-promscrape.seriesLimitPerTarget` command-line option. This limit is applied individually
to all the scrape targets defined in the file pointed by `-promscrape.config`.
* Via `series_limit` config option at `scrape_config` section. This limit is applied individually
to all the scrape targets defined in the given `scrape_config`.
* Via `series_limit` config option at [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) section.
This limit is applied individually to all the scrape targets defined in the given `scrape_config`.
* Via `__series_limit__` label, which can be set with [relabeling](#relabeling) at `relabel_configs` section.
This limit is applied to the corresponding scrape targets. Typical use case: to set the limit
via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) for targets,
which may expose too high number of time series.
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
Scraped metrics are dropped for time series exceeding the given limit.
Scraped metrics are dropped for time series exceeding the given limit on the time window of 24h.
`vmagent` creates the following additional per-target metrics for targets with non-zero series limit:
- `scrape_series_limit_samples_dropped` - the number of dropped samples during the scrape when the unique series limit is exceeded.
@@ -764,6 +789,7 @@ These metrics allow building the following alerting rules:
- `scrape_series_current / scrape_series_limit > 0.9` - alerts when the number of series exposed by the target reaches 90% of the limit.
- `sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0` - alerts when some samples are dropped because the series limit on a particular target is reached.
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`.
The limit can be enforced by setting the following command-line flags:
@@ -795,7 +821,7 @@ See also [cardinality explorer docs](https://docs.victoriametrics.com/#cardinali
We recommend setting up regular scraping of this page either through `vmagent` itself or by Prometheus
so that the exported metrics may be analyzed later.
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview.
Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683-victoriametrics-vmagent/) for `vmagent` state overview.
Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
If you have suggestions for improvements or have found a bug - please open an issue on github or add a review to the dashboard.
@@ -1027,7 +1053,7 @@ It may be needed to build `vmagent` from source code when developing or testing
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmagent` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds the `vmagent` binary and puts it into the `bin` folder.
@@ -1056,7 +1082,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmagent-linux-arm` or `make vmagent-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics)
It builds `vmagent-linux-arm` or `vmagent-linux-arm64` binary respectively and puts it into the `bin` folder.
@@ -1115,7 +1141,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-datadog.sanitizeMetricName
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
-denyQueryTracing
@@ -1135,7 +1161,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
-graphiteListenAddr string
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty
TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol
-graphiteListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -graphiteListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-graphiteTrimTimestamp duration
Trim timestamps for Graphite data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
-http.connTimeout duration
@@ -1155,20 +1183,24 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-httpAuth.username string
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
-httpListenAddr string
TCP address to listen for http connections. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr='' (default ":8429")
TCP address to listen for http connections. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol (default ":8429")
-httpListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-import.maxLineLen size
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 104857600)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
-influx.databaseNames array
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
Supports an array of values separated by comma or specified via multiple flags.
-influx.maxLineSize size
The maximum size in bytes for a single InfluxDB line during parsing
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 262144)
-influxDBLabel string
Default label for the DB name sent over '?db={db_name}' query parameter (default "db")
-influxListenAddr string
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write
TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . See also -influxListenAddr.useProxyProtocol
-influxListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -influxListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-influxMeasurementFieldSeparator string
Separator for '{measurement}{separator}{field_name}' metric name when inserted via InfluxDB line protocol (default "_")
-influxSkipMeasurement
@@ -1178,7 +1210,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-influxTrimTimestamp duration
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-insert.maxQueueDuration duration
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
-internStringMaxLen int
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 300)
-kafka.consumer.topic array
Kafka topic names for data consumption. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
@@ -1211,6 +1245,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerJSONFields string
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
@@ -1220,26 +1256,30 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-loggerWarnsPerSecondLimit int
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-maxConcurrentInserts int
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
-maxInsertRequestSize size
The maximum size in bytes of a single Prometheus remote_write API request
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
-memory.allowedBytes size
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
-opentsdbHTTPListenAddr string
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty
TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
-opentsdbHTTPListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-opentsdbListenAddr string
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty
TCP and UDP address to listen for OpentTSDB metrics. Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol
-opentsdbListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-opentsdbTrimTimestamp duration
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
-opentsdbhttp.maxInsertRequestSize size
The maximum size of OpenTSDB HTTP put request
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
-opentsdbhttpTrimTimestamp duration
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
-pprofAuthKey string
@@ -1304,15 +1344,19 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
-promscrape.maxResponseHeadersSize size
The maximum size of http response headers from Prometheus scrape targets
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 4096)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 4096)
-promscrape.maxScrapeSize size
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16777216)
-promscrape.minResponseSizeForStreamParse size
The minimum target response size for automatic switching to stream parsing mode, which can reduce memory usage. See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 1000000)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 1000000)
-promscrape.noStaleMarkers
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
-promscrape.nomad.waitTime duration
Wait time used by Nomad service discovery. Default value is used if not set
-promscrape.nomadSDCheckInterval duration
Interval for checking for changes in Nomad. This works only if nomad_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#nomad_sd_configs for details (default 30s)
-promscrape.openstackSDCheckInterval duration
Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#openstack_sd_configs for details (default 30s)
-promscrape.seriesLimitPerTarget int
@@ -1384,12 +1428,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.maxBlockSize size
The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage. See also -remoteWrite.maxRowsPerBlock
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 8388608)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 8388608)
-remoteWrite.maxDailySeries int
The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
-remoteWrite.maxDiskUsagePerURL array
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500MB. Disk usage is unlimited if the value is set to 0
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB.
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB.
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.maxHourlySeries int
The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
@@ -1422,9 +1466,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data is sent after temporary unavailability of the remote storage
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.relabelConfig string
Optional path to file with relabel_config entries. The path can point either to local file or to http url. These entries are applied to all the metrics before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details
-remoteWrite.relabelDebug
Whether to log metrics before and after relabeling with -remoteWrite.relabelConfig. If the -remoteWrite.relabelDebug is enabled, then the metrics aren't sent to remote storage. This is useful for debugging the relabeling configs
Optional path to file with relabeling configs, which are applied to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent.html#relabeling
-remoteWrite.roundDigits array
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics
Supports array of values separated by comma or specified via multiple flags.
@@ -1436,6 +1478,15 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-remoteWrite.significantFigures array
The number of significant figures to leave in metric values before writing them to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.streamAggr.config array
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html . See also -remoteWrite.streamAggr.keepInput and -remoteWrite.streamAggr.dedupInterval
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.streamAggr.dedupInterval array
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.streamAggr.keepInput array
Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config. By default the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. See https://docs.victoriametrics.com/stream-aggregation.html
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.tlsCAFile array
Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. By default system CA is used
Supports an array of values separated by comma or specified via multiple flags.
@@ -1457,11 +1508,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Remote storage URL to write data to. It must support Prometheus remote_write API. It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . Pass multiple -remoteWrite.url flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.multitenantURL
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.urlRelabelConfig array
Optional path to relabel config for the corresponding -remoteWrite.url. The path can point either to local file or to http url
Optional path to relabel configs for the corresponding -remoteWrite.url. See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent.html#relabeling
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.urlRelabelDebug array
Whether to log metrics before and after relabeling with -remoteWrite.urlRelabelConfig. If the -remoteWrite.urlRelabelDebug is enabled, then the metrics aren't sent to the corresponding -remoteWrite.url. This is useful for debugging the relabeling configs
Supports array of values separated by comma or specified via multiple flags.
-sortLabels
Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}Enabled sorting for labels can slow down ingestion performance a bit
-tls

View File

@@ -10,7 +10,6 @@ import (
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -26,10 +25,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
return parser.ParseStream(req, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
}

View File

@@ -10,7 +10,6 @@ import (
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -28,11 +27,9 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
ce := req.Header.Get("Content-Encoding")
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
return insertRows(at, series, extraLabels)
})
ce := req.Header.Get("Content-Encoding")
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
return insertRows(at, series, extraLabels)
})
}
@@ -52,10 +49,18 @@ func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmars
Name: "__name__",
Value: ss.Metric,
})
labels = append(labels, prompbmarshal.Label{
Name: "host",
Value: ss.Host,
})
if ss.Host != "" {
labels = append(labels, prompbmarshal.Label{
Name: "host",
Value: ss.Host,
})
}
if ss.Device != "" {
labels = append(labels, prompbmarshal.Label{
Name: "device",
Value: ss.Device,
})
}
for _, tag := range ss.Tags {
name, value := parser.SplitTag(tag)
if name == "host" {

View File

@@ -7,7 +7,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -20,9 +19,7 @@ var (
//
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
func InsertHandler(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
return parser.ParseStream(r, insertRows)
}
func insertRows(rows []parser.Row) error {

View File

@@ -16,7 +16,6 @@ import (
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -37,10 +36,8 @@ var (
//
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
return insertRows(nil, db, rows, nil)
})
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
return insertRows(nil, db, rows, nil)
})
}
@@ -52,15 +49,13 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
q := req.URL.Query()
precision := q.Get("precision")
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
db := q.Get("db")
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
return insertRows(at, db, rows, extraLabels)
})
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
q := req.URL.Query()
precision := q.Get("precision")
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
db := q.Get("db")
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
return insertRows(at, db, rows, extraLabels)
})
}

View File

@@ -38,23 +38,35 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
var (
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''")
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
influxListenAddr = flag.String("influxListenAddr", "", "TCP and UDP address to listen for InfluxDB line protocol data. Usually :8089 must be set. Doesn't work if empty. "+
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write")
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
"This flag isn't needed when ingesting data over HTTP - just send it to http://<vmagent>:8429/write . "+
"See also -influxListenAddr.useProxyProtocol")
influxUseProxyProtocol = flag.Bool("influxListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -influxListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. "+
"See also -graphiteListenAddr.useProxyProtocol")
graphiteUseProxyProtocol = flag.Bool("graphiteListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -graphiteListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
"Usually :4242 must be set. Doesn't work if empty")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
"Usually :4242 must be set. Doesn't work if empty. See also -opentsdbListenAddr.useProxyProtocol")
opentsdbUseProxyProtocol = flag.Bool("opentsdbListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -opentsdbListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. "+
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
configAuthKey = flag.String("configAuthKey", "", "Authorization key for accessing /config page. It must be passed via authKey query arg")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . "+
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed by passing -promscrape.config.strictParse=false command-line flag")
)
@@ -104,28 +116,27 @@ func main() {
startTime := time.Now()
remotewrite.Init()
common.StartUnmarshalWorkers()
writeconcurrencylimiter.Init()
if len(*influxListenAddr) > 0 {
influxServer = influxserver.MustStart(*influxListenAddr, func(r io.Reader) error {
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
return influx.InsertHandlerForReader(r, false)
})
}
if len(*graphiteListenAddr) > 0 {
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, graphite.InsertHandler)
graphiteServer = graphiteserver.MustStart(*graphiteListenAddr, *graphiteUseProxyProtocol, graphite.InsertHandler)
}
if len(*opentsdbListenAddr) > 0 {
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, opentsdb.InsertHandler, httpInsertHandler)
opentsdbServer = opentsdbserver.MustStart(*opentsdbListenAddr, *opentsdbUseProxyProtocol, opentsdb.InsertHandler, httpInsertHandler)
}
if len(*opentsdbHTTPListenAddr) > 0 {
httpInsertHandler := getOpenTSDBHTTPInsertHandler()
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, httpInsertHandler)
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
}
promscrape.Init(remotewrite.Push)
if len(*httpListenAddr) > 0 {
go httpserver.Serve(*httpListenAddr, requestHandler)
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
}
logger.Infof("started vmagent in %.3f seconds", time.Since(startTime).Seconds())
@@ -207,6 +218,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
httpserver.WriteAPIHelp(w, [][2]string{
{"targets", "status for discovered active targets"},
{"service-discovery", "labels before and after relabeling for discovered targets"},
{"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"metrics", "available service metrics"},
@@ -224,7 +236,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(http.StatusNoContent)
statusCode := http.StatusNoContent
if strings.HasPrefix(path, "/prometheus/api/v1/import/prometheus/metrics/job/") ||
strings.HasPrefix(path, "/api/v1/import/prometheus/metrics/job/") {
// Return 200 status code for pushgateway requests.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
statusCode = http.StatusOK
}
w.WriteHeader(statusCode)
return true
}
if strings.HasPrefix(path, "datadog/") {
@@ -325,6 +344,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
promscrapeServiceDiscoveryRequests.Inc()
promscrape.WriteServiceDiscovery(w, r)
return true
case "/prometheus/metric-relabel-debug", "/metric-relabel-debug":
promscrapeMetricRelabelDebugRequests.Inc()
promscrape.WriteMetricRelabelDebug(w, r)
return true
case "/prometheus/target-relabel-debug", "/target-relabel-debug":
promscrapeTargetRelabelDebugRequests.Inc()
promscrape.WriteTargetRelabelDebug(w, r)
return true
case "/prometheus/api/v1/targets", "/api/v1/targets":
promscrapeAPIV1TargetsRequests.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -340,12 +367,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
return true
case "/prometheus/config", "/config":
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
return true
}
promscrapeConfigRequests.Inc()
@@ -354,12 +376,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
case "/prometheus/api/v1/status/config", "/api/v1/status/config":
// See https://prometheus.io/docs/prometheus/latest/querying/api/#config
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
return true
}
promscrapeStatusConfigRequests.Inc()
@@ -546,7 +563,11 @@ var (
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
promscrapeServiceDiscoveryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/service-discovery"}`)
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
promscrapeMetricRelabelDebugRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/metric-relabel-debug"}`)
promscrapeTargetRelabelDebugRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/target-relabel-debug"}`)
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
promscrapeTargetResponseRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/target_response"}`)
promscrapeTargetResponseErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/target_response"}`)

View File

@@ -12,7 +12,6 @@ import (
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -31,10 +30,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
isGzip := req.Header.Get("Content-Encoding") == "gzip"
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
return insertRows(at, block, extraLabels)
})
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
return insertRows(at, block, extraLabels)
})
}

View File

@@ -7,7 +7,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -20,9 +19,7 @@ var (
//
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
func InsertHandler(r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, insertRows)
})
return parser.ParseStream(r, insertRows)
}
func insertRows(rows []parser.Row) error {

View File

@@ -9,7 +9,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -25,10 +24,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
return parser.ParseStream(req, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
}

View File

@@ -1,17 +1,16 @@
package prometheusimport
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -31,20 +30,11 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
}, nil)
})
}
// InsertHandlerForReader processes metrics from given reader with optional gzip format
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, 0, isGzipped, func(rows []parser.Row) error {
return insertRows(nil, rows, nil)
}, nil)
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
}, func(s string) {
httpserver.LogError(req, s)
})
}

View File

@@ -0,0 +1,60 @@
package prometheusimport
import (
"bytes"
"flag"
"log"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
)
var (
srv *httptest.Server
testOutput *bytes.Buffer
)
func TestInsertHandler(t *testing.T) {
setUp()
defer tearDown()
req := httptest.NewRequest("POST", "/insert/0/api/v1/import/prometheus", bytes.NewBufferString(`{"foo":"bar"}
go_memstats_alloc_bytes_total 1`))
if err := InsertHandler(nil, req); err != nil {
t.Errorf("unxepected error %s", err)
}
expectedMsg := "cannot unmarshal Prometheus line"
if !strings.Contains(testOutput.String(), expectedMsg) {
t.Errorf("output %q should contain %q", testOutput.String(), expectedMsg)
}
}
func setUp() {
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(204)
}))
flag.Parse()
remoteWriteFlag := "remoteWrite.url"
if err := flag.Lookup(remoteWriteFlag).Value.Set(srv.URL); err != nil {
log.Fatalf("unable to set %q with value %q, err: %v", remoteWriteFlag, srv.URL, err)
}
logger.Init()
common.StartUnmarshalWorkers()
remotewrite.Init()
testOutput = &bytes.Buffer{}
logger.SetOutputForTests(testOutput)
}
func tearDown() {
common.StopUnmarshalWorkers()
srv.Close()
logger.ResetOutputForTest()
tmpDataDir := flag.Lookup("remoteWrite.tmpDataPath").Value.String()
fs.MustRemoveAll(tmpDataDir)
}

View File

@@ -1,7 +1,6 @@
package promremotewrite
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
@@ -13,7 +12,6 @@ import (
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -29,19 +27,8 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
return insertRows(at, tss, extraLabels)
})
})
}
// InsertHandlerForReader processes metrics from given reader
func InsertHandlerForReader(at *auth.Token, r io.Reader) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, func(tss []prompb.TimeSeries) error {
return insertRows(at, tss, nil)
})
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
return insertRows(at, tss, extraLabels)
})
}

View File

@@ -195,7 +195,7 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
}
bb := writeRequestBufPool.Get()
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
if len(bb.B) <= maxUnpackedBlockSize.N {
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
zb := snappyBufPool.Get()
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
writeRequestBufPool.Put(bb)

View File

@@ -15,16 +15,13 @@ import (
var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. "+
"The path can point either to local file or to http url. These entries are applied to all the metrics "+
"before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details")
relabelDebugGlobal = flag.Bool("remoteWrite.relabelDebug", false, "Whether to log metrics before and after relabeling with -remoteWrite.relabelConfig. "+
"If the -remoteWrite.relabelDebug is enabled, then the metrics aren't sent to remote storage. This is useful for debugging the relabeling configs")
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url. "+
"The path can point either to local file or to http url")
relabelDebug = flagutil.NewArrayBool("remoteWrite.urlRelabelDebug", "Whether to log metrics before and after relabeling with -remoteWrite.urlRelabelConfig. "+
"If the -remoteWrite.urlRelabelDebug is enabled, then the metrics aren't sent to the corresponding -remoteWrite.url. "+
"This is useful for debugging the relabeling configs")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel configs for the corresponding -remoteWrite.url. "+
"See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. "+
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+
"in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+
@@ -42,7 +39,7 @@ func CheckRelabelConfigs() error {
func loadRelabelConfigs() (*relabelConfigs, error) {
var rcs relabelConfigs
if *relabelConfigPathGlobal != "" {
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal, *relabelDebugGlobal)
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
}
@@ -58,7 +55,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
// Skip empty relabel config.
continue
}
prc, err := promrelabel.LoadRelabelConfigs(path, relabelDebug.GetOptionalArg(i))
prc, err := promrelabel.LoadRelabelConfigs(path)
if err != nil {
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
}
@@ -91,7 +88,7 @@ func initLabelsGlobal() {
}
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
if len(extraLabels) == 0 && pcs.Len() == 0 {
if len(extraLabels) == 0 && pcs.Len() == 0 && !*usePromCompatibleNaming {
// Nothing to change.
return tss
}

View File

@@ -0,0 +1,49 @@
package remotewrite
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
func TestApplyRelabeling(t *testing.T) {
f := func(extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
rctx := &relabelCtx{}
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
gotTss := rctx.applyRelabeling(tss, extraLabels, pcs)
if !reflect.DeepEqual(gotTss, expTss) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, gotTss)
}
}
f(nil, nil, "up", "up")
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, "up", `up{foo="bar"}`)
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, `up{foo="baz"}`, `up{foo="bar"}`)
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
- target_label: "foo"
replacement: "aaa"
- action: labeldrop
regex: "env.*"
`))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
f(nil, pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
oldVal := *usePromCompatibleNaming
*usePromCompatibleNaming = true
f(nil, nil, `foo.bar`, `foo_bar`)
*usePromCompatibleNaming = oldVal
}
func parseSeries(data string) []prompbmarshal.TimeSeries {
var tss []prompbmarshal.TimeSeries
tss = append(tss, prompbmarshal.TimeSeries{
Labels: promutils.MustNewLabelsFromString(data).GetLabels(),
})
return tss
}

View File

@@ -21,6 +21,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
"github.com/cespare/xxhash/v2"
@@ -58,6 +59,15 @@ var (
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/stream-aggregation.html . "+
"See also -remoteWrite.streamAggr.keepInput and -remoteWrite.streamAggr.dedupInterval")
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config. "+
"By default the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. "+
"See https://docs.victoriametrics.com/stream-aggregation.html")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", "Input samples are de-duplicated with this interval before being aggregated. "+
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
)
var (
@@ -140,6 +150,7 @@ func Init() {
logger.Fatalf("cannot load relabel configs: %s", err)
}
allRelabelConfigs.Store(rcs)
configSuccess.Set(1)
configTimestamp.Set(fasttime.UnixTimestamp())
@@ -251,7 +262,7 @@ func Stop() {
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
//
// If at is nil, then the data is pushed to the configured `-remoteWrite.url`.
// If at isn't nil, the the data is pushed to the configured `-remoteWrite.multitenantURL`.
// If at isn't nil, the data is pushed to the configured `-remoteWrite.multitenantURL`.
//
// Note that wr may be modified by Push due to relabeling and rounding.
func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
@@ -435,9 +446,13 @@ var (
)
type remoteWriteCtx struct {
idx int
fq *persistentqueue.FastQueue
c *client
idx int
fq *persistentqueue.FastQueue
c *client
sas *streamaggr.Aggregators
streamAggrKeepInput bool
pss []*pendingSeries
pssNextIdx uint64
@@ -469,6 +484,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
}
c.init(argIdx, *queues, sanitizedURL)
// Initialize pss
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
pssLen := *queues
@@ -481,7 +497,8 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
for i := range pss {
pss[i] = newPendingSeries(fq.MustWriteBlock, sf, rd)
}
return &remoteWriteCtx{
rwctx := &remoteWriteCtx{
idx: argIdx,
fq: fq,
c: c,
@@ -490,6 +507,20 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
}
// Initialize sas
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
if sasFile != "" {
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(argIdx, 0)
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
if err != nil {
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggrFile=%q: %s", sasFile, err)
}
rwctx.sas = sas
rwctx.streamAggrKeepInput = streamAggrKeepInput.GetOptionalArg(argIdx)
}
return rwctx
}
func (rwctx *remoteWriteCtx) MustStop() {
@@ -501,6 +532,8 @@ func (rwctx *remoteWriteCtx) MustStop() {
rwctx.fq.UnblockAllReaders()
rwctx.c.MustStop()
rwctx.c = nil
rwctx.sas.MustStop()
rwctx.sas = nil
rwctx.fq.MustClose()
rwctx.fq = nil
@@ -509,6 +542,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
}
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
// Apply relabeling
var rctx *relabelCtx
var v *[]prompbmarshal.TimeSeries
rcs := allRelabelConfigs.Load().(*relabelConfigs)
@@ -526,11 +560,17 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
rowsCountAfterRelabel := getRowsCount(tss)
rwctx.rowsDroppedByRelabel.Add(rowsCountBeforeRelabel - rowsCountAfterRelabel)
}
pss := rwctx.pss
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
rowsCount := getRowsCount(tss)
rwctx.rowsPushedAfterRelabel.Add(rowsCount)
pss[idx].Push(tss)
// Apply stream aggregation if any
rwctx.sas.Push(tss)
if rwctx.sas == nil || rwctx.streamAggrKeepInput {
// Push samples to the remote storage
rwctx.pushInternal(tss)
}
// Return back relabeling contexts to the pool
if rctx != nil {
*v = prompbmarshal.ResetTimeSeries(tss)
tssRelabelPool.Put(v)
@@ -538,6 +578,12 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
}
}
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
pss := rwctx.pss
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
pss[idx].Push(tss)
}
var tssRelabelPool = &sync.Pool{
New: func() interface{} {
a := []prompbmarshal.TimeSeries{}

View File

@@ -1,7 +1,6 @@
package vmimport
import (
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
@@ -13,7 +12,6 @@ import (
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
@@ -31,20 +29,9 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
if err != nil {
return err
}
return writeconcurrencylimiter.Do(func() error {
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
})
}
// InsertHandlerForReader processes metrics from given reader
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
return writeconcurrencylimiter.Do(func() error {
return parser.ParseStream(r, isGzipped, func(rows []parser.Row) error {
return insertRows(nil, rows, nil)
})
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
})
}

View File

@@ -9,6 +9,13 @@ protocol and require `-remoteWrite.url` to be configured.
Vmalert is heavily inspired by [Prometheus](https://prometheus.io/docs/alerting/latest/overview/)
implementation and aims to be compatible with its syntax.
A [single-node](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmalert)
or [cluster version](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmalert)
of VictoriaMetrics are capable of proxying requests to vmalert via `-vmalert.proxyURL` command-line flag.
Use this feature for the following cases:
* for proxying requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
* for accessing vmalert's UI through VictoriaMetrics Web interface.
## Features
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
@@ -69,16 +76,17 @@ Then configure `vmalert` accordingly:
-external.label=replica=a # Multiple external labels may be set
```
Note there's a separate `remoteWrite.url` to allow writing results of
Note there's a separate `-remoteWrite.url` command-line flag to allow writing results of
alerting/recording rules into a different storage than the initial data that's
queried. This allows using `vmalert` to aggregate data from a short-term,
high-frequency, high-cardinality storage into a long-term storage with
decreased cardinality and a bigger interval between samples.
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html).
See the full list of configuration flags in [configuration](#configuration) section.
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
to specify different `-external.label` command-line flags in order to define which `vmalert` generated rules or alerts.
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
@@ -191,6 +199,11 @@ expr: <string>
# Is applicable to alerting rules only.
[ debug: <bool> | default = false ]
# Defines the number of rule's updates entries stored in memory
# and available for view on rule's Details page.
# Overrides `rule.updateEntriesLimit` value for this specific rule.
[ update_entries_limit: <integer> | default 0 ]
# Labels to add or overwrite for each alert.
labels:
[ <labelname>: <tmpl_string> ]
@@ -213,7 +226,8 @@ The following variables are available in templating:
| $labels or .Labels | The list of labels of the current alert. Use as ".Labels.<label_name>". | {% raw %}Too high number of connections for {{ .Labels.instance }}{% endraw %} |
| $alertID or .AlertID | The current alert's ID generated by vmalert. | {% raw %}Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}{% endraw %} |
| $groupID or .GroupID | The current alert's group ID generated by vmalert. | {% raw %}Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}{% endraw %} |
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}/api/v1/query?query={{ $expr&#124;queryEscape }}{% endraw %} |
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}/api/v1/query?query={{ $expr&#124;queryEscape }}{% endraw %} |
| $for or .For | Alert's configured for param. | {% raw %}Number of connections is too high for more than {{ .For }}{% endraw %} |
| $externalLabels or .ExternalLabels | List of labels configured via `-external.label` command-line flag. | {% raw %}Issues with {{ $labels.instance }} (datacenter-{{ $externalLabels.dc }}){% endraw %} |
| $externalURL or .ExternalURL | URL configured via `-external.url` command-line flag. Used for cases when vmalert is hidden behind proxy. | {% raw %}Visit {{ $externalURL }} for more details{% endraw %} |
@@ -318,6 +332,12 @@ expr: <string>
# Labels to add or overwrite before storing the result.
labels:
[ <labelname>: <labelvalue> ]
# Defines the number of rule's updates entries stored in memory
# and available for view on rule's Details page.
# Overrides `rule.updateEntriesLimit` value for this specific rule.
[ update_entries_limit: <integer> | default 0 ]
```
For recording rules to work `-remoteWrite.url` must be specified.
@@ -468,7 +488,8 @@ Alertmanager will automatically deduplicate alerts with identical labels, so ens
all `vmalert`s are having the same config.
Don't forget to configure [cluster mode](https://prometheus.io/docs/alerting/latest/alertmanager/)
for Alertmanagers for better reliability.
for Alertmanagers for better reliability. List all Alertmanager URLs in vmalert's `-notifier.url`
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
This example uses single-node VM server for the sake of simplicity.
Check how to replace it with [cluster VictoriaMetrics](#cluster-victoriametrics) if needed.
@@ -501,8 +522,8 @@ groups:
expr: avg_over_time(http_requests[5m])
```
Ability of `vmalert` to be configured with different `datasource.url` and `remoteWrite.url` allows
reading data from one data source and backfilling results to another. This helps to build a system
Ability of `vmalert` to be configured with different `-datasource.url` and `-remoteWrite.url` command-line flags
allows reading data from one data source and backfilling results to another. This helps to build a system
for aggregating and downsampling the data.
The following example shows how to build a topology where `vmalert` will process data from one cluster
@@ -526,7 +547,7 @@ Please note, [replay](#rules-backfilling) feature may be used for transforming h
Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only recording rules are used.
See also [downsampling docs](https://docs.victoriametrics.com/#downsampling).
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) and [downsampling](https://docs.victoriametrics.com/#downsampling).
#### Multiple remote writes
@@ -677,7 +698,7 @@ The default list of alerting rules for these metric can be found [here](https://
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus so that the exported
metrics may be analyzed later.
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview.
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950-victoriametrics-vmalert/) for `vmalert` overview.
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
If you have suggestions for improvements or have found a bug - please open an issue on github or add
a review to the dashboard.
@@ -694,10 +715,10 @@ may get empty response from datasource and produce empty recording rules or rese
<img alt="vmalert evaluation when data is delayed" src="vmalert_ts_data_delay.gif">
_By default recently written samples to VictoriaMetrics aren't visible for queries for up to 30s.
This behavior is controlled by `-search.latencyOffset` command-line flag on vmselect. Usually, this results into
a 30s shift for recording rules results.
Note that too small value passed to `-search.latencyOffset` may lead to incomplete query results._
By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s.
This behavior is controlled by `-search.latencyOffset` command-line flag and the `latency_offset` query ag at `vmselect`.
Usually, this results into a 30s shift for recording rules results.
Note that too small value passed to `-search.latencyOffset` or to `latency_offest` query arg may lead to incomplete query results.
Try the following recommendations in such cases:
@@ -720,8 +741,9 @@ If `-remoteWrite.url` command-line flag is configured, vmalert will persist aler
[vmui](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmui) or Grafana to track how alerts state
changed in time.
vmalert also stores last N state updates for each rule. To check updates, click on `Details` link next to rule's name
on `/vmalert/groups` page and check the `Last updates` section:
vmalert stores last `-rule.updateEntriesLimit` (or `update_entries_limit` [per-rule config](https://docs.victoriametrics.com/vmalert.html#alerting-rules))
state updates for each rule. To check updates, click on `Details` link next to rule's name on `/vmalert/groups` page
and check the `Last updates` section:
<img alt="vmalert state" src="vmalert_state.png">
@@ -730,7 +752,7 @@ HTTP request sent by vmalert to the `-datasource.url` during evaluation. If spec
no samples returned and curl command returns data - then it is very likely there was no data in datasource on the
moment when rule was evaluated.
vmalert also alows configuring more detailed logging for specific rule. Just set `debug: true` in rule's configuration
vmalert allows configuring more detailed logging for specific alerting rule. Just set `debug: true` in rule's configuration
and vmalert will start printing additional log messages:
```terminal
2022-09-15T13:35:41.155Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:41+02:00: query returned 0 samples (elapsed: 5.896041ms)
@@ -835,7 +857,7 @@ The shortlist of configuration flags is the following:
-datasource.tlsServerName string
Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used
-datasource.url string
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also '-datasource.disablePathAppend', '-datasource.showURL'.
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL
-defaultTenant.graphite string
Default tenant for Graphite alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy .This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-defaultTenant.prometheus string
@@ -882,13 +904,21 @@ The shortlist of configuration flags is the following:
-httpAuth.username string
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
-httpListenAddr string
Address to listen for http connections (default ":8880")
Address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8880")
-httpListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-insert.maxQueueDuration duration
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
-internStringMaxLen int
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 300)
-loggerDisableTimestamps
Whether to disable writing timestamps in logs
-loggerErrorsPerSecondLimit int
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerJSONFields string
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
@@ -897,9 +927,11 @@ The shortlist of configuration flags is the following:
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
-loggerWarnsPerSecondLimit int
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-maxConcurrentInserts int
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
-memory.allowedBytes size
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
@@ -954,7 +986,7 @@ The shortlist of configuration flags is the following:
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used
Supports an array of values separated by comma or specified via multiple flags.
-notifier.url array
Prometheus alertmanager URL, e.g. http://127.0.0.1:9093
Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.
Supports an array of values separated by comma or specified via multiple flags.
-pprofAuthKey string
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
@@ -1050,6 +1082,8 @@ The shortlist of configuration flags is the following:
Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'.
-remoteWrite.oauth2.tokenUrl string
Optional OAuth2 tokenURL to use for -notifier.url.
-remoteWrite.sendTimeout duration
Timeout for sending data to the configured -remoteWrite.url. (default 30s)
-remoteWrite.showURL
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
-remoteWrite.tlsCAFile string
@@ -1099,6 +1133,8 @@ The shortlist of configuration flags is the following:
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
absolute path to all .tpl files in root.
Supports an array of values separated by comma or specified via multiple flags.
-rule.updateEntriesLimit int
Defines the max number of rule's state updates stored in-memory. Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overriden per rule via update_entries_limit param. (default 20)
-rule.validateExpressions
Whether to validate rules expressions via MetricsQL engine (default true)
-rule.validateTemplates
@@ -1182,6 +1218,8 @@ dns_sd_configs:
```
The list of configured or discovered Notifiers can be explored via [UI](#Web).
If Alertmanager runs in cluster mode then all its URLs needs to be available during discovery
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
The configuration file [specification](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/config.go)
is the following:
@@ -1315,7 +1353,7 @@ spec:
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmalert` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmalert` binary and puts it into the `bin` folder.
@@ -1331,7 +1369,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmalert-linux-arm` or `make vmalert-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmalert-linux-arm` or `vmalert-linux-arm64` binary respectively and puts it into the `bin` folder.

View File

@@ -74,10 +74,15 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
Debug: cfg.Debug,
}),
alerts: make(map[uint64]*notifier.Alert),
state: newRuleState(),
metrics: &alertingRuleMetrics{},
}
if cfg.UpdateEntriesLimit != nil {
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
} else {
ar.state = newRuleState(*ruleUpdateEntriesLimit)
}
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
func() float64 {
@@ -284,7 +289,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
duration: time.Since(start),
samples: len(qMetrics),
err: err,
req: req,
curl: requestToCurl(req),
}
defer func() {
@@ -456,6 +461,7 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
Value: m.Values[0],
ActiveAt: start,
Expr: ar.Expr,
For: ar.For,
}
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
return a, err
@@ -490,6 +496,7 @@ func (ar *AlertingRule) ToAPI() APIRule {
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: lastState.samples,
MaxUpdates: ar.state.size(),
Updates: ar.state.getAll(),
// encode as strings to avoid rounding in JSON

View File

@@ -709,7 +709,6 @@ func TestAlertingRule_Template(t *testing.T) {
"summary": `{{ $labels.alertname }}: Too high connection number for "{{ $labels.instance }}"`,
},
alerts: make(map[uint64]*notifier.Alert),
state: newRuleState(),
},
[]datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"),
@@ -749,7 +748,6 @@ func TestAlertingRule_Template(t *testing.T) {
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
},
alerts: make(map[uint64]*notifier.Alert),
state: newRuleState(),
},
[]datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
@@ -789,7 +787,6 @@ func TestAlertingRule_Template(t *testing.T) {
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
},
alerts: make(map[uint64]*notifier.Alert),
state: newRuleState(),
},
[]datasource.Metric{
metricWithValueAndLabels(t, 1,
@@ -820,6 +817,7 @@ func TestAlertingRule_Template(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.GroupID = fakeGroup.ID()
tc.rule.q = fq
tc.rule.state = newRuleState(10)
fq.add(tc.metrics...)
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected err: %s", err)
@@ -936,6 +934,6 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
For: waitFor,
EvalInterval: waitFor,
alerts: make(map[uint64]*notifier.Alert),
state: newRuleState(),
state: newRuleState(10),
}
}

View File

@@ -114,6 +114,9 @@ type Rule struct {
Labels map[string]string `yaml:"labels,omitempty"`
Annotations map[string]string `yaml:"annotations,omitempty"`
Debug bool `yaml:"debug,omitempty"`
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
// Overrides `-rule.updateEntriesLimit`.
UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"`
// Catches all undefined fields and must be empty after parsing.
XXX map[string]interface{} `yaml:",inline"`

View File

@@ -550,6 +550,20 @@ rules:
- alert: foo
expr: sum by(job) (up == 1)
debug: true
`)
})
t.Run("`update_entries_limit` change", func(t *testing.T) {
f(t, `
name: TestGroup
rules:
- alert: foo
expr: sum by(job) (up == 1)
`, `
name: TestGroup
rules:
- alert: foo
expr: sum by(job) (up == 1)
update_entries_limit: 33
`)
})
}

View File

@@ -12,6 +12,7 @@ groups:
expr: vm_tcplistener_conns > 0
for: 3m
debug: true
update_entries_limit: 40
annotations:
labels: "Available labels: {{ $labels }}"
summary: Too high connection number for {{ $labels.instance }}
@@ -20,6 +21,7 @@ groups:
{{ end }}
description: "It is {{ $value }} connections for {{$labels.instance}}"
- alert: ExampleAlertAlwaysFiring
update_entries_limit: -1
expr: sum by(job)
(up == 1)
labels:

View File

@@ -7,6 +7,7 @@ groups:
- alert: Conns
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
for: 3m
annotations:
summary: Too high connection number for {{$labels.instance}}
description: "It is {{ $value }} connections for {{$labels.instance}}"

View File

@@ -54,6 +54,19 @@ func (m *Metric) SetLabel(key, value string) {
m.AddLabel(key, value)
}
// SetLabels sets the given map as Metric labels
func (m *Metric) SetLabels(ls map[string]string) {
var i int
m.Labels = make([]Label, len(ls))
for k, v := range ls {
m.Labels[i] = Label{
Name: k,
Value: v,
}
i++
}
}
// AddLabel appends the given label to the label set
func (m *Metric) AddLabel(key, value string) {
m.Labels = append(m.Labels, Label{Name: key, Value: value})

View File

@@ -14,7 +14,7 @@ import (
var (
addr = flag.String("datasource.url", "", "Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. "+
"E.g. http://127.0.0.1:8428 . See also '-datasource.disablePathAppend', '-datasource.showURL'.")
"E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL")
appendTypePrefix = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.")
showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to show -datasource.url in the exported metrics. "+
"It is hidden by default, since it can contain sensitive info such as auth key")

View File

@@ -32,19 +32,17 @@ type promInstant struct {
}
func (r promInstant) metrics() ([]Metric, error) {
var result []Metric
result := make([]Metric, len(r.Result))
for i, res := range r.Result {
f, err := strconv.ParseFloat(res.TV[1].(string), 64)
if err != nil {
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
}
var m Metric
for k, v := range r.Result[i].Labels {
m.AddLabel(k, v)
}
m.SetLabels(res.Labels)
m.Timestamps = append(m.Timestamps, int64(res.TV[0].(float64)))
m.Values = append(m.Values, f)
result = append(result, m)
result[i] = m
}
return result, nil
}
@@ -149,6 +147,16 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
timestamp = timestamp.Truncate(s.evaluationInterval)
}
q.Set("time", fmt.Sprintf("%d", timestamp.Unix()))
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
}
if s.queryStep > 0 { // override step with user-specified value
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds())))
}
r.URL.RawQuery = q.Encode()
s.setPrometheusReqParams(r, query)
}
@@ -163,6 +171,11 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s
q := r.URL.Query()
q.Add("start", fmt.Sprintf("%d", start.Unix()))
q.Add("end", fmt.Sprintf("%d", end.Unix()))
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
}
r.URL.RawQuery = q.Encode()
s.setPrometheusReqParams(r, query)
}
@@ -178,15 +191,5 @@ func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
}
}
q.Set("query", query)
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
}
if s.queryStep > 0 { // override step with user-specified value
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds())))
}
r.URL.RawQuery = q.Encode()
}

View File

@@ -0,0 +1,20 @@
package datasource
import (
"encoding/json"
"testing"
)
func BenchmarkMetrics(b *testing.B) {
payload := []byte(`[{"metric":{"__name__":"vm_rows"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests", "foo":"bar", "baz": "qux"},"value":[1583786140,"2000"]}]`)
var pi promInstant
if err := json.Unmarshal(payload, &pi.Result); err != nil {
b.Fatalf(err.Error())
}
b.Run("Instant", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = pi.metrics()
}
})
}

View File

@@ -7,6 +7,7 @@ import (
"net/http/httptest"
"net/url"
"reflect"
"sort"
"strconv"
"strings"
"testing"
@@ -74,7 +75,7 @@ func TestVMInstantQuery(t *testing.T) {
case 5:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests"},"value":[1583786140,"2000"]}]}}`))
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
case 7:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
}
@@ -115,19 +116,17 @@ func TestVMInstantQuery(t *testing.T) {
}
expected := []Metric{
{
Labels: []Label{{Value: "vm_rows", Name: "__name__"}},
Labels: []Label{{Value: "vm_rows", Name: "__name__"}, {Value: "bar", Name: "foo"}},
Timestamps: []int64{1583786142},
Values: []float64{13763},
},
{
Labels: []Label{{Value: "vm_requests", Name: "__name__"}},
Labels: []Label{{Value: "vm_requests", Name: "__name__"}, {Value: "baz", Name: "foo"}},
Timestamps: []int64{1583786140},
Values: []float64{2000},
},
}
if !reflect.DeepEqual(m, expected) {
t.Fatalf("unexpected metric %+v want %+v", m, expected)
}
metricsEqual(t, m, expected)
m, req, err := pq.Query(ctx, query, ts) // 7 - scalar
if err != nil {
@@ -158,13 +157,36 @@ func TestVMInstantQuery(t *testing.T) {
if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
}
exp := Metric{
Labels: []Label{{Value: "constantLine(10)", Name: "name"}},
Timestamps: []int64{1611758403},
Values: []float64{10},
exp := []Metric{
{
Labels: []Label{{Value: "constantLine(10)", Name: "name"}},
Timestamps: []int64{1611758403},
Values: []float64{10},
},
}
if !reflect.DeepEqual(m[0], exp) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
metricsEqual(t, m, exp)
}
func metricsEqual(t *testing.T, gotM, expectedM []Metric) {
for i, exp := range expectedM {
got := gotM[i]
gotTS, expTS := got.Timestamps, exp.Timestamps
if !reflect.DeepEqual(gotTS, expTS) {
t.Fatalf("unexpected timestamps %+v want %+v", gotTS, expTS)
}
gotV, expV := got.Values, exp.Values
if !reflect.DeepEqual(gotV, expV) {
t.Fatalf("unexpected values %+v want %+v", gotV, expV)
}
sort.Slice(got.Labels, func(i, j int) bool {
return got.Labels[i].Name < got.Labels[j].Name
})
sort.Slice(exp.Labels, func(i, j int) bool {
return exp.Labels[i].Name < exp.Labels[j].Name
})
if !reflect.DeepEqual(exp.Labels, got.Labels) {
t.Fatalf("unexpected labels %+v want %+v", got.Labels, exp.Labels)
}
}
}
@@ -199,6 +221,10 @@ func TestVMRangeQuery(t *testing.T) {
if _, err := strconv.ParseInt(endTS, 10, 64); err != nil {
t.Errorf("failed to parse 'end' query param: %s", err)
}
step := r.URL.Query().Get("step")
if step != "15s" {
t.Errorf("expected 'step' query param to be 15s; got %q instead", step)
}
switch c {
case 0:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
@@ -212,7 +238,7 @@ func TestVMRangeQuery(t *testing.T) {
if err != nil {
t.Fatalf("unexpected: %s", err)
}
s := NewVMStorage(srv.URL, authCfg, time.Minute, 0, false, srv.Client())
s := NewVMStorage(srv.URL, authCfg, time.Minute, *queryStep, false, srv.Client())
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus), EvaluationInterval: 15 * time.Second})

View File

@@ -168,9 +168,12 @@ func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, lookb
if rr.For < 1 {
continue
}
// ignore g.ExtraFilterLabels on purpose, so it
// won't affect the restore procedure.
q := qb.BuildWithParams(datasource.QuerierParams{})
// ignore QueryParams on purpose, because they could contain
// query filters. This may affect the restore procedure.
q := qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: g.Type.String(),
Headers: g.Headers,
})
if err := rr.Restore(ctx, q, lookback, labels); err != nil {
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
}
@@ -418,20 +421,26 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
}
errGr := new(utils.ErrGroup)
if e.rw != nil {
pushToRW := func(tss []prompbmarshal.TimeSeries) {
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
var lastErr error
for _, ts := range tss {
remoteWriteTotal.Inc()
if err := e.rw.Push(ts); err != nil {
remoteWriteErrors.Inc()
errGr.Add(fmt.Errorf("rule %q: remote write failure: %w", rule, err))
lastErr = fmt.Errorf("rule %q: remote write failure: %w", rule, err)
}
}
return lastErr
}
pushToRW(tss)
if err := pushToRW(tss); err != nil {
return err
}
staleSeries := e.getStaleSeries(rule, tss, ts)
pushToRW(staleSeries)
if err := pushToRW(staleSeries); err != nil {
return err
}
}
ar, ok := rule.(*AlertingRule)
@@ -445,6 +454,7 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
}
wg := sync.WaitGroup{}
errGr := new(utils.ErrGroup)
for _, nt := range e.notifiers() {
wg.Add(1)
go func(nt notifier.Notifier) {

View File

@@ -3,8 +3,6 @@ package main
import (
"context"
"fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"reflect"
"sort"
"testing"
@@ -12,6 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
@@ -452,3 +453,24 @@ func TestFaultyNotifier(t *testing.T) {
}
t.Fatalf("alive notifier didn't receive notification by %v", deadline)
}
func TestFaultyRW(t *testing.T) {
fq := &fakeQuerier{}
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
r := &RecordingRule{
Name: "test",
state: newRuleState(10),
q: fq,
}
e := &executor{
rw: &remotewrite.Client{},
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
err := e.exec(context.Background(), r, time.Now(), 0, 10)
if err == nil {
t.Fatalf("expected to get an error from faulty RW client, got nil instead")
}
}

View File

@@ -49,14 +49,18 @@ absolute path to all .tpl files in root.`)
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
"By default the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
"which is by default equal to 3 evaluation intervals of the parent group.")
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overriden per rule via update_entries_limit param.")
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
@@ -168,7 +172,7 @@ func main() {
go configReload(ctx, manager, groupsCfg, sighupCh)
rh := &requestHandler{m: manager}
go httpserver.Serve(*httpListenAddr, rh.handler)
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, rh.handler)
sig := procutil.WaitForSigterm()
logger.Infof("service received signal %s", sig)

View File

@@ -64,17 +64,18 @@ func TestManagerUpdateConcurrent(t *testing.T) {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for i := 0; i < iterations; i++ {
rnd := rand.Intn(len(paths))
rnd := r.Intn(len(paths))
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
if err != nil { // update can fail and this is expected
continue
}
_ = m.update(context.Background(), cfg, false)
}
}()
}(i)
}
wg.Wait()
}

View File

@@ -45,6 +45,8 @@ type Alert struct {
ID uint64
// Restored is true if Alert was restored after restart
Restored bool
// For defines for how long Alert needs to be active to become StateFiring
For time.Duration
}
// AlertState type indicates the Alert state
@@ -80,6 +82,7 @@ type AlertTplData struct {
AlertID uint64
GroupID uint64
ActiveAt time.Time
For time.Duration
}
var tplHeaders = []string{
@@ -91,6 +94,7 @@ var tplHeaders = []string{
"{{ $alertID := .AlertID }}",
"{{ $groupID := .GroupID }}",
"{{ $activeAt := .ActiveAt }}",
"{{ $for := .For }}",
}
// ExecTemplate executes the Alert template for given
@@ -98,7 +102,15 @@ var tplHeaders = []string{
// Every alert could have a different datasource, so function
// requires a queryFunction as an argument.
func (a *Alert) ExecTemplate(q templates.QueryFn, labels, annotations map[string]string) (map[string]string, error) {
tplData := AlertTplData{Value: a.Value, Labels: labels, Expr: a.Expr, AlertID: a.ID, GroupID: a.GroupID, ActiveAt: a.ActiveAt}
tplData := AlertTplData{
Value: a.Value,
Labels: labels,
Expr: a.Expr,
AlertID: a.ID,
GroupID: a.GroupID,
ActiveAt: a.ActiveAt,
For: a.For,
}
tmpl, err := templates.GetWithFuncs(templates.FuncsWithQuery(q))
if err != nil {
return nil, fmt.Errorf("error getting a template: %w", err)

View File

@@ -54,14 +54,15 @@ func TestAlert_ExecTemplate(t *testing.T) {
"job": "staging",
"instance": "localhost",
},
For: 5 * time.Minute,
},
annotations: map[string]string{
"summary": "Too high connection number for {{$labels.instance}} for job {{$labels.job}}",
"description": "It is {{ $value }} connections for {{$labels.instance}}",
"description": "It is {{ $value }} connections for {{$labels.instance}} for more than {{ .For }}",
},
expTpl: map[string]string{
"summary": "Too high connection number for localhost for job staging",
"description": "It is 10000 connections for localhost",
"description": "It is 10000 connections for localhost for more than 5m0s",
},
},
{
@@ -152,7 +153,7 @@ func TestAlert_ExecTemplate(t *testing.T) {
},
},
{
name: "ActiveAt custome format",
name: "ActiveAt custom format",
alert: &Alert{
ActiveAt: time.Date(2022, 8, 19, 20, 34, 58, 651387237, time.UTC),
},
@@ -239,7 +240,7 @@ func TestAlert_toPromLabels(t *testing.T) {
replacement: "aaa"
- action: labeldrop
regex: "env.*"
`), false)
`))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}

View File

@@ -12,7 +12,6 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
@@ -84,12 +83,12 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
if cfg.Timeout.Duration() == 0 {
cfg.Timeout = promutils.NewDuration(time.Second * 10)
}
rCfg, err := promrelabel.ParseRelabelConfigs(cfg.RelabelConfigs, false)
rCfg, err := promrelabel.ParseRelabelConfigs(cfg.RelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse relabeling config: %w", err)
}
cfg.parsedRelabelConfigs = rCfg
arCfg, err := promrelabel.ParseRelabelConfigs(cfg.AlertRelabelConfigs, false)
arCfg, err := promrelabel.ParseRelabelConfigs(cfg.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert relabeling config: %w", err)
}
@@ -130,24 +129,24 @@ func parseConfig(path string) (*Config, error) {
return cfg, nil
}
func parseLabels(target string, metaLabels map[string]string, cfg *Config) (string, []prompbmarshal.Label, error) {
func parseLabels(target string, metaLabels *promutils.Labels, cfg *Config) (string, *promutils.Labels, error) {
labels := mergeLabels(target, metaLabels, cfg)
labels = cfg.parsedRelabelConfigs.Apply(labels, 0)
labels = promrelabel.RemoveMetaLabels(labels[:0], labels)
promrelabel.SortLabels(labels)
labels.Labels = cfg.parsedRelabelConfigs.Apply(labels.Labels, 0)
labels.RemoveMetaLabels()
labels.Sort()
// Remove references to already deleted labels, so GC could clean strings for label name and label value past len(labels).
// This should reduce memory usage when relabeling creates big number of temporary labels with long names and/or values.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825 for details.
labels = append([]prompbmarshal.Label{}, labels...)
labels = labels.Clone()
if len(labels) == 0 {
if labels.Len() == 0 {
return "", nil, nil
}
schemeRelabeled := promrelabel.GetLabelValueByName(labels, "__scheme__")
schemeRelabeled := labels.Get("__scheme__")
if len(schemeRelabeled) == 0 {
schemeRelabeled = "http"
}
addressRelabeled := promrelabel.GetLabelValueByName(labels, "__address__")
addressRelabeled := labels.Get("__address__")
if len(addressRelabeled) == 0 {
return "", nil, nil
}
@@ -155,7 +154,7 @@ func parseLabels(target string, metaLabels map[string]string, cfg *Config) (stri
return "", nil, nil
}
addressRelabeled = addMissingPort(schemeRelabeled, addressRelabeled)
alertsPathRelabeled := promrelabel.GetLabelValueByName(labels, "__alerts_path__")
alertsPathRelabeled := labels.Get("__alerts_path__")
if !strings.HasPrefix(alertsPathRelabeled, "/") {
alertsPathRelabeled = "/" + alertsPathRelabeled
}
@@ -179,21 +178,12 @@ func addMissingPort(scheme, target string) string {
return target
}
func mergeLabels(target string, metaLabels map[string]string, cfg *Config) []prompbmarshal.Label {
func mergeLabels(target string, metaLabels *promutils.Labels, cfg *Config) *promutils.Labels {
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
m := make(map[string]string)
m["__address__"] = target
m["__scheme__"] = cfg.Scheme
m["__alerts_path__"] = path.Join("/", cfg.PathPrefix, alertManagerPath)
for k, v := range metaLabels {
m[k] = v
}
result := make([]prompbmarshal.Label, 0, len(m))
for k, v := range m {
result = append(result, prompbmarshal.Label{
Name: k,
Value: v,
})
}
return result
m := promutils.NewLabels(3 + metaLabels.Len())
m.Add("__address__", target)
m.Add("__scheme__", cfg.Scheme)
m.Add("__alerts_path__", path.Join("/", cfg.PathPrefix, alertManagerPath))
m.AddFrom(metaLabels)
return m
}

View File

@@ -9,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
// configWatcher supports dynamic reload of Notifier objects
@@ -123,7 +124,7 @@ func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator)
var errors []error
duplicates := make(map[string]struct{})
for _, labels := range metaLabels {
target := labels["__address__"]
target := labels.Get("__address__")
u, processedLabels, err := parseLabels(target, labels, cfg)
if err != nil {
errors = append(errors, err)
@@ -156,7 +157,7 @@ func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator)
return targets, errors
}
type getLabels func() ([]map[string]string, error)
type getLabels func() ([]*promutils.Labels, error)
func (cw *configWatcher) start() error {
if len(cw.cfg.StaticConfigs) > 0 {
@@ -182,8 +183,8 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.ConsulSDConfigs) > 0 {
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]map[string]string, error) {
var labels []map[string]string
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutils.Labels, error) {
var labels []*promutils.Labels
for i := range cw.cfg.ConsulSDConfigs {
sdc := &cw.cfg.ConsulSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
@@ -200,8 +201,8 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.DNSSDConfigs) > 0 {
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]map[string]string, error) {
var labels []map[string]string
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutils.Labels, error) {
var labels []*promutils.Labels
for i := range cw.cfg.DNSSDConfigs {
sdc := &cw.cfg.DNSSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)

View File

@@ -162,14 +162,15 @@ consul_sd_configs:
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for i := 0; i < iterations; i++ {
rnd := rand.Intn(len(paths))
rnd := r.Intn(len(paths))
_ = cw.reload(paths[rnd]) // update can fail and this is expected
_ = cw.notifiers()
}
}()
}(i)
}
wg.Wait()
}

View File

@@ -10,14 +10,15 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
var (
configPath = flag.String("notifier.config", "", "Path to configuration file for notifiers")
suppressDuplicateTargetErrors = flag.Bool("notifier.suppressDuplicateTargetErrors", false, "Whether to suppress 'duplicate target' errors during discovery")
addrs = flagutil.NewArrayString("notifier.url", "Prometheus alertmanager URL, e.g. http://127.0.0.1:9093")
addrs = flagutil.NewArrayString("notifier.url", "Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. "+
"List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.")
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
@@ -159,7 +160,7 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
// list of labels added during discovery.
type Target struct {
Notifier
Labels []prompbmarshal.Label
Labels *promutils.Labels
}
// TargetType defines how the Target was discovered

View File

@@ -58,7 +58,6 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
Labels: cfg.Labels,
GroupID: group.ID(),
metrics: &recordingRuleMetrics{},
state: newRuleState(),
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: group.Type.String(),
EvaluationInterval: group.Interval,
@@ -67,6 +66,12 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
}),
}
if cfg.UpdateEntriesLimit != nil {
rr.state = newRuleState(*cfg.UpdateEntriesLimit)
} else {
rr.state = newRuleState(*ruleUpdateEntriesLimit)
}
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
func() float64 {
@@ -121,7 +126,7 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
at: ts,
duration: time.Since(start),
samples: len(qMetrics),
req: req,
curl: requestToCurl(req),
}
defer func() {
@@ -212,6 +217,7 @@ func (rr *RecordingRule) ToAPI() APIRule {
EvaluationTime: lastState.duration.Seconds(),
Health: "ok",
LastSamples: lastState.samples,
MaxUpdates: rr.state.size(),
Updates: rr.state.getAll(),
// encode as strings to avoid rounding

View File

@@ -19,7 +19,7 @@ func TestRecordingRule_Exec(t *testing.T) {
expTS []prompbmarshal.TimeSeries
}{
{
&RecordingRule{Name: "foo", state: newRuleState()},
&RecordingRule{Name: "foo"},
[]datasource.Metric{metricWithValueAndLabels(t, 10,
"__name__", "bar",
)},
@@ -30,7 +30,7 @@ func TestRecordingRule_Exec(t *testing.T) {
},
},
{
&RecordingRule{Name: "foobarbaz", state: newRuleState()},
&RecordingRule{Name: "foobarbaz"},
[]datasource.Metric{
metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 2, "__name__", "bar", "job", "bar"),
@@ -53,8 +53,7 @@ func TestRecordingRule_Exec(t *testing.T) {
},
{
&RecordingRule{
Name: "job:foo",
state: newRuleState(),
Name: "job:foo",
Labels: map[string]string{
"source": "test",
}},
@@ -80,6 +79,7 @@ func TestRecordingRule_Exec(t *testing.T) {
fq := &fakeQuerier{}
fq.add(tc.metrics...)
tc.rule.q = fq
tc.rule.state = newRuleState(10)
tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0)
if err != nil {
t.Fatalf("unexpected Exec err: %s", err)
@@ -198,7 +198,7 @@ func TestRecordingRuleLimit(t *testing.T) {
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
}
rule := &RecordingRule{Name: "job:foo", state: newRuleState(), Labels: map[string]string{
rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{
"source": "test_limit",
}}
var err error
@@ -216,7 +216,7 @@ func TestRecordingRuleLimit(t *testing.T) {
func TestRecordingRule_ExecNegative(t *testing.T) {
rr := &RecordingRule{
Name: "job:foo",
state: newRuleState(),
state: newRuleState(10),
Labels: map[string]string{
"job": "test",
},

View File

@@ -77,13 +77,12 @@ func Init(ctx context.Context) (*Client, error) {
}
return NewClient(ctx, Config{
Addr: *addr,
AuthCfg: authCfg,
Concurrency: *concurrency,
MaxQueueSize: *maxQueueSize,
MaxBatchSize: *maxBatchSize,
FlushInterval: *flushInterval,
DisablePathAppend: *disablePathAppend,
Transport: t,
Addr: *addr,
AuthCfg: authCfg,
Concurrency: *concurrency,
MaxQueueSize: *maxQueueSize,
MaxBatchSize: *maxBatchSize,
FlushInterval: *flushInterval,
Transport: t,
})
}

View File

@@ -22,6 +22,7 @@ import (
var (
disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
sendTimeout = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
)
// Client is an asynchronous HTTP client for writing
@@ -57,13 +58,8 @@ type Config struct {
MaxQueueSize int
// FlushInterval defines time interval for flushing batches
FlushInterval time.Duration
// WriteTimeout defines timeout for HTTP write request
// to remote storage
WriteTimeout time.Duration
// Transport will be used by the underlying http.Client
Transport *http.Transport
// DisablePathAppend can be used to not automatically append '/api/v1/write' to the remote write url
DisablePathAppend bool
}
const (
@@ -89,9 +85,6 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
if cfg.FlushInterval == 0 {
cfg.FlushInterval = defaultFlushInterval
}
if cfg.WriteTimeout == 0 {
cfg.WriteTimeout = defaultWriteTimeout
}
if cfg.Transport == nil {
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
}
@@ -101,7 +94,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
}
c := &Client{
c: &http.Client{
Timeout: cfg.WriteTimeout,
Timeout: *sendTimeout,
Transport: cfg.Transport,
},
addr: strings.TrimSuffix(cfg.Addr, "/"),

View File

@@ -27,12 +27,13 @@ func TestClient_Push(t *testing.T) {
if err != nil {
t.Fatalf("failed to create client: %s", err)
}
r := rand.New(rand.NewSource(1))
const rowsN = 1e4
var sent int
for i := 0; i < rowsN; i++ {
s := prompbmarshal.TimeSeries{
Samples: []prompbmarshal.Sample{{
Value: rand.Float64(),
Value: r.Float64(),
Timestamp: time.Now().Unix(),
}},
}

View File

@@ -3,7 +3,6 @@ package main
import (
"context"
"errors"
"net/http"
"sync"
"time"
@@ -38,6 +37,8 @@ type ruleState struct {
sync.RWMutex
entries []ruleStateEntry
cur int
// disabled defines whether ruleState tracks ruleStateEntry
disabled bool
}
type ruleStateEntry struct {
@@ -54,25 +55,40 @@ type ruleStateEntry struct {
// stores the number of samples returned during
// the last evaluation
samples int
// stores the HTTP request used by datasource during rule.Exec
req *http.Request
// stores the curl command reflecting the HTTP request used during rule.Exec
curl string
}
const defaultStateEntriesLimit = 20
func newRuleState() *ruleState {
func newRuleState(size int) *ruleState {
if size < 1 {
return &ruleState{disabled: true}
}
return &ruleState{
entries: make([]ruleStateEntry, defaultStateEntriesLimit),
entries: make([]ruleStateEntry, size),
}
}
func (s *ruleState) getLast() ruleStateEntry {
if s.disabled {
return ruleStateEntry{}
}
s.RLock()
defer s.RUnlock()
return s.entries[s.cur]
}
func (s *ruleState) size() int {
s.RLock()
defer s.RUnlock()
return len(s.entries)
}
func (s *ruleState) getAll() []ruleStateEntry {
if s.disabled {
return nil
}
entries := make([]ruleStateEntry, 0)
s.RLock()
@@ -95,6 +111,10 @@ func (s *ruleState) getAll() []ruleStateEntry {
}
func (s *ruleState) add(e ruleStateEntry) {
if s.disabled {
return
}
s.Lock()
defer s.Unlock()

View File

@@ -6,8 +6,27 @@ import (
"time"
)
func TestRule_stateDisabled(t *testing.T) {
state := newRuleState(-1)
e := state.getLast()
if !e.at.IsZero() {
t.Fatalf("expected entry to be zero")
}
state.add(ruleStateEntry{at: time.Now()})
if !e.at.IsZero() {
t.Fatalf("expected entry to be zero")
}
if len(state.getAll()) != 0 {
t.Fatalf("expected for state to have %d entries; got %d",
0, len(state.getAll()),
)
}
}
func TestRule_state(t *testing.T) {
state := newRuleState()
stateEntriesN := 20
state := newRuleState(stateEntriesN)
e := state.getLast()
if !e.at.IsZero() {
t.Fatalf("expected entry to be zero")
@@ -39,7 +58,7 @@ func TestRule_state(t *testing.T) {
}
var last time.Time
for i := 0; i < defaultStateEntriesLimit*2; i++ {
for i := 0; i < stateEntriesN*2; i++ {
last = time.Now()
state.add(ruleStateEntry{at: last})
}
@@ -50,9 +69,9 @@ func TestRule_state(t *testing.T) {
e.at, last)
}
if len(state.getAll()) != defaultStateEntriesLimit {
if len(state.getAll()) != stateEntriesN {
t.Fatalf("expected for state to have %d entries only; got %d",
defaultStateEntriesLimit, len(state.getAll()),
stateEntriesN, len(state.getAll()),
)
}
}
@@ -61,7 +80,7 @@ func TestRule_state(t *testing.T) {
// execution of state updates.
// Should be executed with -race flag
func TestRule_stateConcurrent(t *testing.T) {
state := newRuleState()
state := newRuleState(20)
const workers = 50
const iterations = 100

View File

@@ -27,11 +27,11 @@ import (
"strconv"
"strings"
"sync"
textTpl "text/template"
"time"
textTpl "text/template"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/formatutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
@@ -350,18 +350,10 @@ func templateFuncs() textTpl.FuncMap {
if math.Abs(v) <= 1 || math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v), nil
}
prefix := ""
for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} {
if math.Abs(v) < 1024 {
break
}
prefix = p
v /= 1024
}
return fmt.Sprintf("%.4g%s", v, prefix), nil
return formatutil.HumanizeBytes(v), nil
},
// humanizeDuration converts given seconds to a human readable duration
// humanizeDuration converts given seconds to a human-readable duration
"humanizeDuration": func(i interface{}) (string, error) {
v, err := toFloat64(i)
if err != nil {

View File

@@ -1,6 +1,7 @@
package templates
import (
"math"
"strings"
"testing"
textTpl "text/template"
@@ -50,6 +51,31 @@ func TestTemplateFuncs(t *testing.T) {
if !ok {
t.Fatalf("unexpected mismatch")
}
formatting := func(funcName string, p interface{}, resultExpected string) {
t.Helper()
v := funcs[funcName]
fLocal := v.(func(s interface{}) (string, error))
result, err := fLocal(p)
if err != nil {
t.Fatalf("unexpected error for %s(%f): %s", funcName, p, err)
}
if result != resultExpected {
t.Fatalf("unexpected result for %s(%f); got\n%s\nwant\n%s", funcName, p, result, resultExpected)
}
}
formatting("humanize1024", float64(0), "0")
formatting("humanize1024", math.Inf(0), "+Inf")
formatting("humanize1024", math.NaN(), "NaN")
formatting("humanize1024", float64(127087), "124.1ki")
formatting("humanize1024", float64(130137088), "124.1Mi")
formatting("humanize1024", float64(133260378112), "124.1Gi")
formatting("humanize1024", float64(136458627186688), "124.1Ti")
formatting("humanize1024", float64(139733634239168512), "124.1Pi")
formatting("humanize1024", float64(143087241460908556288), "124.1Ei")
formatting("humanize1024", float64(146521335255970361638912), "124.1Zi")
formatting("humanize1024", float64(150037847302113650318245888), "124.1Yi")
formatting("humanize1024", float64(153638755637364377925883789312), "1.271e+05Yi")
}
func mkTemplate(current, replacement interface{}) textTemplate {

View File

@@ -72,7 +72,7 @@ func (cw *curlWriter) add(str string) {
}
func requestToCurl(req *http.Request) string {
if req.URL == nil {
if req == nil || req.URL == nil {
return ""
}

View File

@@ -8,7 +8,6 @@ import (
"sort"
"strconv"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
@@ -18,20 +17,14 @@ import (
)
var (
once = sync.Once{}
apiLinks [][2]string
navItems []tpl.NavItem
)
func initLinks() {
apiLinks = [][2]string{
// api links are relative since they can be used by external clients,
// such as Grafana, and proxied via vmselect.
{"api/v1/rules", "list all loaded groups and rules"},
{"api/v1/alerts", "list all active alerts"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
// system links
}
systemLinks = [][2]string{
{"/flags", "command-line flags"},
{"/metrics", "list of application metrics"},
{"/-/reload", "reload configuration"},
@@ -43,7 +36,7 @@ func initLinks() {
{Name: "Notifiers", Url: "notifiers"},
{Name: "Docs", Url: "https://docs.victoriametrics.com/vmalert.html"},
}
}
)
type requestHandler struct {
m *manager
@@ -57,10 +50,6 @@ var (
)
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
once.Do(func() {
initLinks()
})
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
staticServer.ServeHTTP(w, r)
return true

View File

@@ -16,11 +16,16 @@
<p>
API:<br>
{% for _, p := range apiLinks %}
{%code
p, doc := p[0], p[1]
%}
<a href="{%s p %}">{%s p %}</a> - {%s doc %}<br/>
{%code p, doc := p[0], p[1] %}
<a href="{%s p %}">{%s p %}</a> - {%s doc %}<br/>
{% endfor %}
{% if r.Header.Get("X-Forwarded-For") == "" %}
System:<br>
{% for _, p := range systemLinks %}
{%code p, doc := p[0], p[1] %}
<a href="{%s p %}">{%s p %}</a> - {%s doc %}<br/>
{% endfor %}
{% endif %}
</p>
{%= tpl.Footer(r) %}
{% endfunc %}
@@ -248,7 +253,7 @@
{% for _, n := range ns %}
<tr>
<td>
{% for _, l := range n.Labels %}
{% for _, l := range n.Labels.GetLabels() %}
<span class="ms-1 badge bg-primary">{%s l.Name %}={%s l.Value %}</span>
{% endfor %}
</td>
@@ -435,7 +440,7 @@
</div>
<br>
<div class="display-6 pb-3">Last {%d len(rule.Updates) %} updates</span>:</div>
<div class="display-6 pb-3">Last {%d len(rule.Updates) %}/{%d rule.MaxUpdates %} updates</span>:</div>
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
@@ -457,7 +462,7 @@
<td class="text-center">{%f.3 u.duration.Seconds() %}s</td>
<td class="text-center">{%s u.at.Format(time.RFC3339) %}</td>
<td>
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">{%s requestToCurl(u.req) %}</textarea>
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">{%s u.curl %}</textarea>
</td>
</tr>
</li>

File diff suppressed because it is too large Load Diff

View File

@@ -17,7 +17,7 @@ func TestHandler(t *testing.T) {
alerts: map[uint64]*notifier.Alert{
0: {State: notifier.StateFiring},
},
state: newRuleState(),
state: newRuleState(10),
}
g := &Group{
Name: "group",

View File

@@ -121,6 +121,8 @@ type APIRule struct {
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// MaxUpdates is the max number of recorded ruleStateEntry objects
MaxUpdates int `json:"max_updates_entries"`
// Updates contains the ordered list of recorded ruleStateEntry objects
Updates []ruleStateEntry `json:"updates"`
}

View File

@@ -167,7 +167,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmauth` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmauth` binary and puts it into the `bin` folder.
@@ -261,7 +261,11 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
-httpAuth.username string
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
-httpListenAddr string
TCP address to listen for http connections (default ":8427")
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8427")
-httpListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-internStringMaxLen int
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 300)
-logInvalidAuthTokens
Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
-loggerDisableTimestamps
@@ -270,6 +274,8 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerJSONFields string
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
@@ -278,11 +284,13 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
-loggerWarnsPerSecondLimit int
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-maxConcurrentRequests int
The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxIdleConnsPerBackend (default 1000)
-maxIdleConnsPerBackend int
The maximum number of idle connections vmauth can open per each backend host (default 100)
The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100)
-memory.allowedBytes size
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
@@ -299,6 +307,8 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
Supports an array of values separated by comma or specified via multiple flags.
-reloadAuthKey string
Auth key for /-/reload http endpoint. It must be passed as authKey=...
-responseTimeout duration
The timeout for receiving a response from backend (default 5m0s)
-tls
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
-tlsCertFile string

View File

@@ -37,7 +37,7 @@ type UserInfo struct {
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
URLMap []URLMap `yaml:"url_map,omitempty"`
URLMaps []URLMap `yaml:"url_map,omitempty"`
Headers []Header `yaml:"headers,omitempty"`
requests *metrics.Counter
@@ -284,7 +284,7 @@ func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
return nil, err
}
}
for _, e := range ui.URLMap {
for _, e := range ui.URLMaps {
if len(e.SrcPaths) == 0 {
return nil, fmt.Errorf("missing `src_paths` in `url_map`")
}
@@ -295,7 +295,7 @@ func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
return nil, err
}
}
if len(ui.URLMap) == 0 && ui.URLPrefix == nil {
if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
return nil, fmt.Errorf("missing `url_prefix`")
}
if ui.BearerToken != "" {

View File

@@ -278,7 +278,7 @@ users:
`, map[string]*UserInfo{
getAuthToken("foo", "", ""): {
BearerToken: "foo",
URLMap: []URLMap{
URLMaps: []URLMap{
{
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),
@@ -304,7 +304,7 @@ users:
},
getAuthToken("", "foo", ""): {
BearerToken: "foo",
URLMap: []URLMap{
URLMaps: []URLMap{
{
SrcPaths: getSrcPaths([]string{"/api/v1/query", "/api/v1/query_range", "/api/v1/label/[^./]+/.+"}),
URLPrefix: mustParseURL("http://vmselect/select/0/prometheus"),

View File

@@ -3,8 +3,10 @@ package main
import (
"flag"
"fmt"
"io"
"net"
"net/http"
"net/http/httputil"
"net/textproto"
"net/url"
"os"
"strings"
@@ -12,20 +14,28 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections")
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
reloadAuthKey = flag.String("reloadAuthKey", "", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
"See also -maxConcurrentRequests")
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
"'429 Too Many Requests' http status code. See also -maxIdleConnsPerBackend")
reloadAuthKey = flag.String("reloadAuthKey", "", "Auth key for /-/reload http endpoint. It must be passed as authKey=...")
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
)
@@ -41,7 +51,7 @@ func main() {
logger.Infof("starting vmauth at %q...", *httpListenAddr)
startTime := time.Now()
initAuthConfig()
go httpserver.Serve(*httpListenAddr, requestHandler)
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
logger.Infof("started vmauth in %.3f seconds", time.Since(startTime).Seconds())
sig := procutil.WaitForSigterm()
@@ -60,9 +70,7 @@ func main() {
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
switch r.URL.Path {
case "/-/reload":
authKey := r.FormValue("authKey")
if authKey != *reloadAuthKey {
httpserver.Errorf(w, r, "invalid authKey %q. It must match the value from -reloadAuthKey command line flag", authKey)
if !httpserver.CheckAuthFlag(w, r, *reloadAuthKey, "reloadAuthKey") {
return true
}
configReloadRequests.Inc()
@@ -81,15 +89,20 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
// See https://docs.influxdata.com/influxdb/v2.0/api/
authToken = strings.Replace(authToken, "Token", "Bearer", 1)
}
ac := authConfig.Load().(map[string]*UserInfo)
ui := ac[authToken]
if ui == nil {
invalidAuthTokenRequests.Inc()
err := fmt.Errorf("cannot find the provided auth token %q in config", authToken)
if *logInvalidAuthTokens {
httpserver.Errorf(w, r, "cannot find the provided auth token %q in config", authToken)
err = &httpserver.ErrorWithStatusCode{
Err: err,
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
} else {
errStr := fmt.Sprintf("cannot find the provided auth token %q in config", authToken)
http.Error(w, errStr, http.StatusBadRequest)
http.Error(w, err.Error(), http.StatusUnauthorized)
}
return true
}
@@ -99,26 +112,121 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
httpserver.Errorf(w, r, "cannot determine targetURL: %s", err)
return true
}
r.Header.Set("vm-target-url", targetURL.String())
for _, h := range headers {
r.Header.Set(h.Name, h.Value)
// Limit the concurrency of requests to backends
concurrencyLimitOnce.Do(concurrencyLimitInit)
select {
case concurrencyLimitCh <- struct{}{}:
default:
concurrentRequestsLimitReachedTotal.Inc()
w.Header().Add("Retry-After", "10")
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh)),
StatusCode: http.StatusTooManyRequests,
}
httpserver.Errorf(w, r, "%s", err)
return true
}
proxyRequest(w, r)
processRequest(w, r, targetURL, headers)
<-concurrencyLimitCh
return true
}
func proxyRequest(w http.ResponseWriter, r *http.Request) {
defer func() {
err := recover()
if err == nil || err == http.ErrAbortHandler {
// Suppress http.ErrAbortHandler panic.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1353
return
func processRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, headers []Header) {
// This code has been copied from net/http/httputil/reverseproxy.go
req := sanitizeRequestHeaders(r)
req.URL = targetURL
for _, h := range headers {
req.Header.Set(h.Name, h.Value)
}
transportOnce.Do(transportInit)
res, err := transport.RoundTrip(req)
if err != nil {
err = &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("error when proxying the request to %q: %s", targetURL, err),
StatusCode: http.StatusBadGateway,
}
// Forward other panics to the caller.
panic(err)
}()
getReverseProxy().ServeHTTP(w, r)
httpserver.Errorf(w, r, "%s", err)
return
}
removeHopHeaders(res.Header)
copyHeader(w.Header(), res.Header)
w.WriteHeader(res.StatusCode)
copyBuf := copyBufPool.Get()
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
copyBufPool.Put(copyBuf)
_ = res.Body.Close()
if err != nil && !netutil.IsTrivialNetworkError(err) {
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
return
}
}
var copyBufPool bytesutil.ByteBufferPool
func copyHeader(dst, src http.Header) {
for k, vv := range src {
for _, v := range vv {
dst.Add(k, v)
}
}
}
func sanitizeRequestHeaders(r *http.Request) *http.Request {
// This code has been copied from net/http/httputil/reverseproxy.go
req := r.Clone(r.Context())
removeHopHeaders(req.Header)
if clientIP, _, err := net.SplitHostPort(req.RemoteAddr); err == nil {
// If we aren't the first proxy retain prior
// X-Forwarded-For information as a comma+space
// separated list and fold multiple headers into one.
prior := req.Header["X-Forwarded-For"]
if len(prior) > 0 {
clientIP = strings.Join(prior, ", ") + ", " + clientIP
}
req.Header.Set("X-Forwarded-For", clientIP)
}
return req
}
func removeHopHeaders(h http.Header) {
// remove hop-by-hop headers listed in the "Connection" header of h.
// See RFC 7230, section 6.1
for _, f := range h["Connection"] {
for _, sf := range strings.Split(f, ",") {
if sf = textproto.TrimString(sf); sf != "" {
h.Del(sf)
}
}
}
// Remove hop-by-hop headers to the backend. Especially
// important is "Connection" because we want a persistent
// connection, regardless of what the client sent to us.
for _, key := range hopHeaders {
h.Del(key)
}
}
// Hop-by-hop headers. These are removed when sent to the backend.
// As of RFC 7230, hop-by-hop headers are required to appear in the
// Connection header field. These are the headers defined by the
// obsoleted RFC 2616 (section 13.5.1) and are used for backward
// compatibility.
var hopHeaders = []string{
"Connection",
"Proxy-Connection", // non-standard but still sent by libcurl and rejected by e.g. google
"Keep-Alive",
"Proxy-Authenticate",
"Proxy-Authorization",
"Te", // canonicalized version of "TE"
"Trailer", // not Trailers per URL above; https://www.rfc-editor.org/errata_search.php?eid=4522
"Transfer-Encoding",
"Upgrade",
}
var (
@@ -128,43 +236,41 @@ var (
)
var (
reverseProxy *httputil.ReverseProxy
reverseProxyOnce sync.Once
transport *http.Transport
transportOnce sync.Once
)
func getReverseProxy() *httputil.ReverseProxy {
reverseProxyOnce.Do(initReverseProxy)
return reverseProxy
func transportInit() {
tr := http.DefaultTransport.(*http.Transport).Clone()
tr.ResponseHeaderTimeout = *responseTimeout
// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
tr.DisableCompression = true
// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
tr.ForceAttemptHTTP2 = false
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
}
transport = tr
}
// initReverseProxy must be called after flag.Parse(), since it uses command-line flags.
func initReverseProxy() {
reverseProxy = &httputil.ReverseProxy{
Director: func(r *http.Request) {
targetURL := r.Header.Get("vm-target-url")
target, err := url.Parse(targetURL)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing targetURL=%q: %s", targetURL, err)
}
r.URL = target
},
Transport: func() *http.Transport {
tr := http.DefaultTransport.(*http.Transport).Clone()
// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
tr.DisableCompression = true
// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
tr.ForceAttemptHTTP2 = false
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
}
return tr
}(),
FlushInterval: time.Second,
ErrorLog: logger.StdErrorLogger(),
}
var (
concurrencyLimitCh chan struct{}
concurrencyLimitOnce sync.Once
)
func concurrencyLimitInit() {
concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests)
_ = metrics.NewGauge("vmauth_concurrent_requests_capacity", func() float64 {
return float64(*maxConcurrentRequests)
})
_ = metrics.NewGauge("vmauth_concurrent_requests_current", func() float64 {
return float64(len(concurrencyLimitCh))
})
}
var concurrentRequestsLimitReachedTotal = metrics.NewCounter("vmauth_concurrent_requests_limit_reached_total")
func usage() {
const s = `
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.

View File

@@ -52,7 +52,7 @@ func createTargetURL(ui *UserInfo, uOrig *url.URL) (*url.URL, []Header, error) {
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1554
u.Path = ""
}
for _, e := range ui.URLMap {
for _, e := range ui.URLMaps {
for _, sp := range e.SrcPaths {
if sp.match(u.Path) {
return e.URLPrefix.mergeURLs(&u), e.Headers, nil

View File

@@ -54,7 +54,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
// Complex routing with `url_map`
ui := &UserInfo{
URLMap: []URLMap{
URLMaps: []URLMap{
{
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
@@ -86,7 +86,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
// Complex routing regexp paths in `url_map`
ui = &UserInfo{
URLMap: []URLMap{
URLMaps: []URLMap{
{
SrcPaths: getSrcPaths([]string{"/api/v1/query(_range)?", "/api/v1/label/[^/]+/values"}),
URLPrefix: mustParseURL("http://vmselect/0/prometheus"),
@@ -132,7 +132,7 @@ func TestCreateTargetURLFailure(t *testing.T) {
}
f(&UserInfo{}, "/foo/bar")
f(&UserInfo{
URLMap: []URLMap{
URLMaps: []URLMap{
{
SrcPaths: getSrcPaths([]string{"/api/v1/query"}),
URLPrefix: mustParseURL("http://foobar/baz"),

View File

@@ -21,7 +21,7 @@ creation of hourly, daily, weekly and monthly backups.
* [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
* [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs/). Example: `azblob://<bucket>/<path/to/backup>`
* [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs/). Example: `azblob://<container>/<path/to/backup>`
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/) or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
@@ -187,7 +187,7 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-dst string
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://bucket/path/to/backup or fs:///path/to/local/backup/dir
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
-enableTCP6
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
@@ -225,6 +225,8 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerJSONFields string
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
@@ -235,10 +237,10 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-maxBytesPerSecond size
The maximum upload speed. There is no limit if it is set to 0
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedBytes size
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
@@ -286,7 +288,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmbackup` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmbackup` binary and puts it into the `bin` folder.

View File

@@ -3,6 +3,7 @@ package main
import (
"flag"
"fmt"
"net/url"
"os"
"path/filepath"
"strings"
@@ -30,7 +31,7 @@ var (
snapshotDeleteURL = flag.String("snapshot.deleteURL", "", "VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. "+
"All created snapshots will be automatically deleted. Example: http://victoriametrics:8428/snapshot/delete")
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
"Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://bucket/path/to/backup or fs:///path/to/local/backup/dir\n"+
"Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir\n"+
"-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded")
origin = flag.String("origin", "", "Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups")
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce backup duration")
@@ -41,25 +42,36 @@ func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe.
flag.CommandLine.SetOutput(os.Stdout)
flag.Usage = usage
flagutil.RegisterSecretFlag("snapshot.createURL")
flagutil.RegisterSecretFlag("snapshot.deleteURL")
envflag.Parse()
buildinfo.Init()
logger.Init()
pushmetrics.Init()
if len(*snapshotCreateURL) > 0 {
// create net/url object
createUrl, err := url.Parse(*snapshotCreateURL)
if err != nil {
logger.Fatalf("cannot parse snapshotCreateURL: %s", err)
}
if len(*snapshotName) > 0 {
logger.Fatalf("-snapshotName shouldn't be set if -snapshot.createURL is set, since snapshots are created automatically in this case")
}
logger.Infof("Snapshot create url %s", *snapshotCreateURL)
logger.Infof("Snapshot create url %s", createUrl.Redacted())
if len(*snapshotDeleteURL) <= 0 {
err := flag.Set("snapshot.deleteURL", strings.Replace(*snapshotCreateURL, "/create", "/delete", 1))
if err != nil {
logger.Fatalf("Failed to set snapshot.deleteURL flag: %v", err)
}
}
logger.Infof("Snapshot delete url %s", *snapshotDeleteURL)
deleteUrl, err := url.Parse(*snapshotDeleteURL)
if err != nil {
logger.Fatalf("cannot parse snapshotDeleteURL: %s", err)
}
logger.Infof("Snapshot delete url %s", deleteUrl.Redacted())
name, err := snapshot.Create(*snapshotCreateURL)
name, err := snapshot.Create(createUrl.String())
if err != nil {
logger.Fatalf("cannot create snapshot: %s", err)
}
@@ -69,7 +81,7 @@ func main() {
}
defer func() {
err := snapshot.Delete(*snapshotDeleteURL, name)
err := snapshot.Delete(deleteUrl.String(), name)
if err != nil {
logger.Fatalf("cannot delete snapshot: %s", err)
}
@@ -81,7 +93,7 @@ func main() {
logger.Fatalf("invalid -snapshotName=%q: %s", *snapshotName, err)
}
go httpserver.Serve(*httpListenAddr, nil)
go httpserver.Serve(*httpListenAddr, false, nil)
srcFS, err := newSrcFS()
if err != nil {
@@ -118,7 +130,7 @@ func main() {
func usage() {
const s = `
vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3
vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3, azblob
or local filesystem. Backed up data can be restored with vmrestore.
See the docs at https://docs.victoriametrics.com/vmbackup.html .
@@ -145,7 +157,7 @@ func newSrcFS() (*fslocal.FS, error) {
fs := &fslocal.FS{
Dir: snapshotPath,
MaxBytesPerSecond: maxBytesPerSecond.N,
MaxBytesPerSecond: maxBytesPerSecond.IntN(),
}
if err := fs.Init(); err != nil {
return nil, fmt.Errorf("cannot initialize fs: %w", err)

View File

@@ -67,7 +67,7 @@ credentials.json
"project_id": "<project>",
"private_key_id": "",
"private_key": "-----BEGIN PRIVATE KEY-----\-----END PRIVATE KEY-----\n",
"client_email": test@<project>.iam.gserviceaccount.com",
"client_email": "test@<project>.iam.gserviceaccount.com",
"client_id": "",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
@@ -158,7 +158,7 @@ The result on the GCS bucket. We see only 3 daily backups:
* GET `/api/v1/backups` - returns list of backups in remote storage.
Example output:
```json
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
```
* POST `/api/v1/restore` - saves backup name to restore when [performing restore](#restore-commands).
@@ -211,7 +211,7 @@ It can be changed by using flag:
`vmbackupmanager backup list` lists backups in remote storage:
```console
$ ./vmbackupmanager backup list
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
```
### Restore commands
@@ -270,7 +270,15 @@ If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vm
### How to restore in Kubernetes
1. Enter container running `vmbackupmanager`
1. Ensure there is an init container with `vmbackupmanager restore` in `vmstorage` or `vmsingle` pod.
For [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) deployments it is required to add:
```yaml
vmbackup:
restore:
onStart: "true"
```
See operator `VMStorage` schema [here](https://docs.victoriametrics.com/operator/api.html#vmstorage) and `VMSingle` [here](https://docs.victoriametrics.com/operator/api.html#vmsinglespec).
2. Enter container running `vmbackupmanager`
2. Use `vmbackupmanager backup list` to get list of available backups:
```console
$ /vmbackupmanager-prod backup list
@@ -287,6 +295,43 @@ If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vm
```
4. Restart pod
#### Restore cluster into another cluster
These steps are assuming that [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) is used to manage `VMCluster`.
Clusters here are referred to as `source` and `destination`.
1. Create a new cluster with access to *source* cluster `vmbackupmanager` storage and same number of storage nodes.
Add the following section in order to enable restore on start (operator `VMStorage` schema can be found [here](https://docs.victoriametrics.com/operator/api.html#vmstorage):
```yaml
vmbackup:
restore:
onStart: "true"
```
Note: it is safe to leave this section in the cluster configuration, since it will be ignored if restore mark doesn't exist.
> Important! Use different `-dst` for *destination* cluster to avoid overwriting backup data of the *source* cluster.
2. Enter container running `vmbackupmanager` in *source* cluster
2. Use `vmbackupmanager backup list` to get list of available backups:
```console
$ /vmbackupmanager-prod backup list
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
```
3. Use `vmbackupmanager restore create` to create restore mark at each pod of the *destination* cluster.
Each pod in *destination* cluster should be restored from backup of respective pod in *source* cluster.
For example: `vmstorage-source-0` in *source* cluster should be restored from `vmstorage-destination-0` in *destination* cluster.
```console
$ /vmbackupmanager-prod restore create s3://source_cluster/vmstorage-source-0/daily/2022-10-06
```
## Monitoring
`vmbackupmanager` exports various metrics in Prometheus exposition format at `http://vmbackupmanager:8300/metrics` page. It is recommended setting up regular scraping of this page
either via [vmagent](https://docs.victoriametrics.com/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/17798-victoriametrics-backupmanager/) for `vmbackupmanager` overview.
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
If you have suggestions for improvements or have found a bug - please open an issue on github or add
a review to the dashboard.
## Configuration
### Flags
@@ -372,6 +417,8 @@ command-line flags:
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerJSONFields string
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
@@ -384,7 +431,7 @@ command-line flags:
The maximum upload speed. There is no limit if it is set to 0
-memory.allowedBytes size
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string

View File

@@ -7,9 +7,12 @@ vmctl provides various useful actions with VictoriaMetrics components.
Features:
- migrate data from [Prometheus](#migrating-data-from-prometheus) to VictoriaMetrics using snapshot API
- migrate data from [Thanos](#migrating-data-from-thanos) to VictoriaMetrics
- migrate data from [Cortex](#migrating-data-from-cortex) to VictoriaMetrics
- migrate data from [Mimir](#migrating-data-from-mimir) to VictoriaMetrics
- migrate data from [InfluxDB](#migrating-data-from-influxdb-1x) to VictoriaMetrics
- migrate data from [OpenTSDB](#migrating-data-from-opentsdb) to VictoriaMetrics
- migrate data between [VictoriaMetrics](#migrating-data-from-victoriametrics) single or cluster version.
- migrate data by [Prometheus remote read protocol](#migrating-data-by-remote-read-protocol) to VictoriaMetrics
- [verify](#verifying-exported-blocks-from-victoriametrics) exported blocks from VictoriaMetrics single or cluster version.
To see the full list of supported modes
@@ -28,6 +31,7 @@ COMMANDS:
influx Migrate timeseries from InfluxDB
prometheus Migrate timeseries from Prometheus
vm-native Migrate time series between VictoriaMetrics installations via native binary format
remote-read Migrate timeseries by Prometheus remote read protocol
verify-block Verifies correctness of data blocks exported via VictoriaMetrics Native format. See https://docs.victoriametrics.com/#how-to-export-data-in-native-format
```
@@ -432,6 +436,64 @@ Found 2 blocks to import. Continue? [Y/n] y
2020/02/23 15:51:07 Total time: 7.153158218s
```
## Migrating data by remote read protocol
`vmctl` supports the `remote-read` mode for migrating data from databases which support
[Prometheus remote read API](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/)
See `./vmctl remote-read --help` for details and full list of flags.
To start the migration process configure the following flags:
1. `--remote-read-src-addr` - data source address to read from;
2. `--vm-addr` - VictoriaMetrics address to write to. For single-node VM is usually equal to `--httpListenAddr`,
and for cluster version is equal to `--httpListenAddr` flag of vminsert component (for example `http://<vminsert>:8480/insert/<accountID>/prometheus`);
3. `--remote-read-filter-time-start` - the time filter in RFC3339 format to select time series with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z';
4. `--remote-read-filter-time-end` - the time filter in RFC3339 format to select time series with timestamp equal or smaller than provided value. E.g. '2020-01-01T20:07:00Z'. Current time is used when omitted.;
5. `--remote-read-step-interval` - split export data into chunks. Valid values are `month, day, hour, minute`;
The importing process example for local installation of Prometheus
and single-node VictoriaMetrics(`http://localhost:8428`):
```
./vmctl remote-read \
--remote-read-src-addr=http://127.0.0.1:9091 \
--remote-read-filter-time-start=2021-10-18T00:00:00Z \
--remote-read-step-interval=hour \
--vm-addr=http://127.0.0.1:8428 \
--vm-concurrency=6
Split defined times into 8798 ranges to import. Continue? [Y/n]
VM worker 0:↘ 127177 samples/s
VM worker 1:↘ 140137 samples/s
VM worker 2:↘ 151606 samples/s
VM worker 3:↘ 130765 samples/s
VM worker 4:↘ 131904 samples/s
VM worker 5:↘ 132693 samples/s
Processing ranges: 8798 / 8798 [█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 100.00%
2022/10/19 16:45:37 Import finished!
2022/10/19 16:45:37 VictoriaMetrics importer stats:
idle duration: 6m57.793987511s;
time spent while importing: 1m18.463744801s;
total samples: 25348208;
samples/s: 323056.31;
total bytes: 669.7 MB;
bytes/s: 8.5 MB;
import requests: 127;
import requests retries: 0;
2022/10/19 16:45:37 Total time: 1m19.406283424s
```
### Filtering
The filtering consists of two parts: by labels and time.
Filtering by time can be configured via flags `--remote-read-filter-time-start` and `--remote-read-filter-time-end`
in RFC3339 format.
Filtering by labels can be configured via flags `--remote-read-filter-label` and `--remote-read-filter-label-value`.
For example, `--remote-read-filter-label=tenant` and `--remote-read-filter-label-value="team-eu"` will select only series
with `tenant="team-eu"` label-value pair.
## Migrating data from Thanos
Thanos uses the same storage engine as Prometheus and the data layout on-disk should be the same. That means
@@ -478,6 +540,187 @@ then import it into VM using `vmctl` in `prometheus` mode.
vmctl prometheus --prom-snapshot thanos-data --vm-addr http://victoria-metrics:8428
```
### Remote read protocol
Currently, Thanos doesn't support streaming remote read protocol. It is [recommended](https://thanos.io/tip/thanos/integrations.md/#storeapi-as-prometheus-remote-read)
to use [thanos-remote-read](https://github.com/G-Research/thanos-remote-read) a proxy, that allows exposing any Thanos
service (or anything that exposes gRPC StoreAPI e.g. Querier) via Prometheus remote read protocol.
If you want to migrate data, you should run [thanos-remote-read](https://github.com/G-Research/thanos-remote-read) proxy
and define the Thanos store address `./thanos-remote-read -store 127.0.0.1:19194`.
It is important to know that `store` flag is Thanos Store API gRPC endpoint.
Also, it is important to know that thanos-remote-read proxy doesn't support `STREAMED_XOR_CHUNKS` mode.
When you run thanos-remote-read proxy, it exposes port to serve HTTP on `10080 by default`.
The importing process example for local installation of Thanos
and single-node VictoriaMetrics(`http://localhost:8428`):
```
./vmctl remote-read \
--remote-read-src-addr=http://127.0.0.1:10080 \
--remote-read-filter-time-start=2021-10-18T00:00:00Z \
--remote-read-step-interval=hour \
--vm-addr=http://127.0.0.1:8428 \
--vm-concurrency=6
```
On the [thanos-remote-read](https://github.com/G-Research/thanos-remote-read) proxy side you will see logs like:
```
ts=2022-10-19T15:05:04.193916Z caller=main.go:278 level=info traceID=00000000000000000000000000000000 msg="thanos request" request="min_time:1666180800000 max_time:1666184399999 matchers:<type:RE value:\".*\" > aggregates:RAW "
ts=2022-10-19T15:05:04.468852Z caller=main.go:278 level=info traceID=00000000000000000000000000000000 msg="thanos request" request="min_time:1666184400000 max_time:1666187999999 matchers:<type:RE value:\".*\" > aggregates:RAW "
ts=2022-10-19T15:05:04.553914Z caller=main.go:278 level=info traceID=00000000000000000000000000000000 msg="thanos request" request="min_time:1666188000000 max_time:1666191364863 matchers:<type:RE value:\".*\" > aggregates:RAW "
```
And when process will finish you will see:
```
Split defined times into 8799 ranges to import. Continue? [Y/n]
VM worker 0:↓ 98183 samples/s
VM worker 1:↓ 114640 samples/s
VM worker 2:↓ 131710 samples/s
VM worker 3:↓ 114256 samples/s
VM worker 4:↓ 105671 samples/s
VM worker 5:↓ 124000 samples/s
Processing ranges: 8799 / 8799 [█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 100.00%
2022/10/19 18:05:07 Import finished!
2022/10/19 18:05:07 VictoriaMetrics importer stats:
idle duration: 52m13.987637229s;
time spent while importing: 9m1.728983776s;
total samples: 70836111;
samples/s: 130759.32;
total bytes: 2.2 GB;
bytes/s: 4.0 MB;
import requests: 356;
import requests retries: 0;
2022/10/19 18:05:07 Total time: 9m2.607521618s
```
## Migrating data from Cortex
Cortex has an implementation of the Prometheus remote read protocol. That means
`vmctl` in mode `remote-read` may also be used for Cortex historical data migration.
These instructions may vary based on the details of your Cortex configuration.
Please read carefully and verify as you go.
### Remote read protocol
If you want to migrate data, you should check your cortex configuration in the section
```yaml
api:
prometheus_http_prefix:
```
If you defined some prometheus prefix, you should use it when you define flag `--remote-read-src-addr=http://127.0.0.1:9009/{prometheus_http_prefix}`.
By default, Cortex uses the `prometheus` path prefix, so you should define the flag `--remote-read-src-addr=http://127.0.0.1:9009/prometheus`.
It is important to know that Cortex doesn't support the `STREAMED_XOR_CHUNKS` mode.
When you run Cortex, it exposes a port to serve HTTP on `9009 by default`.
The importing process example for the local installation of Cortex
and single-node VictoriaMetrics(`http://localhost:8428`):
```
./vmctl remote-read \
--remote-read-src-addr=http://127.0.0.1:9009/prometheus \
--remote-read-filter-time-start=2021-10-18T00:00:00Z \
--remote-read-step-interval=hour \
--remote-read-src-check-alive=false \
--vm-addr=http://127.0.0.1:8428 \
--vm-concurrency=6
```
And when the process finishes, you will see the following:
```
Split defined times into 8842 ranges to import. Continue? [Y/n]
VM worker 0:↗ 3863 samples/s
VM worker 1:↗ 2686 samples/s
VM worker 2:↗ 2620 samples/s
VM worker 3:↗ 2705 samples/s
VM worker 4:↗ 2643 samples/s
VM worker 5:↗ 2593 samples/s
Processing ranges: 8842 / 8842 [█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 100.00%
2022/10/21 12:09:49 Import finished!
2022/10/21 12:09:49 VictoriaMetrics importer stats:
idle duration: 0s;
time spent while importing: 3.82640757s;
total samples: 160232;
samples/s: 41875.31;
total bytes: 11.3 MB;
bytes/s: 3.0 MB;
import requests: 6;
import requests retries: 0;
2022/10/21 12:09:49 Total time: 4.71824253s
```
It is important to know that if you run your Cortex installation in multi-tenant mode, remote read protocol
requires an Authentication header like `X-Scope-OrgID`. You can define it via the flag `--remote-read-headers=X-Scope-OrgID:demo`
## Migrating data from Mimir
Mimir has similar implemintation as Cortex and also support of the Prometheus remote read protocol. That means
`vmctl` in mode `remote-read` may also be used for Mimir historical data migration.
These instructions may vary based on the details of your Mimir configuration.
Please read carefully and verify as you go.
### Remote read protocol
If you want to migrate data, you should check your Mimir configuration in the section
```yaml
api:
prometheus_http_prefix:
```
If you defined some prometheus prefix, you should use it when you define flag `--remote-read-src-addr=http://127.0.0.1:9009/{prometheus_http_prefix}`.
By default, Mimir uses the `prometheus` path prefix, so you should define the flag `--remote-read-src-addr=http://127.0.0.1:9009/prometheus`.
Mimir supports both remote read mode, so you can use `STREAMED_XOR_CHUNKS` mode and `SAMPLES` mode.
When you run Mimir, it exposes a port to serve HTTP on `8080 by default`.
Next example of the local installation was in multi-tenant mode (3 instances of mimir) with nginx as load balancer.
Load balancer expose single port `:9090`.
As you can see in the example we call `:9009` instead of `:8080` because of proxy.
The importing process example for the local installation of Mimir
and single-node VictoriaMetrics(`http://localhost:8428`):
```
./vmctl remote-read
--remote-read-src-addr=http://127.0.0.1:9009/prometheus \
--remote-read-filter-time-start=2021-10-18T00:00:00Z \
--remote-read-step-interval=hour \
--remote-read-src-check-alive=false \
--remote-read-headers=X-Scope-OrgID:demo \
--remote-read-use-stream=true \
--vm-addr=http://127.0.0.1:8428 \
--vm-concurrency=6
```
And when the process finishes, you will see the following:
```
Split defined times into 8847 ranges to import. Continue? [Y/n]
VM worker 0:→ 12176 samples/s
VM worker 1:→ 11918 samples/s
VM worker 2:→ 11261 samples/s
VM worker 3:→ 12861 samples/s
VM worker 4:→ 11096 samples/s
VM worker 5:→ 11575 samples/s
Processing ranges: 8847 / 8847 [█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████] 100.00%
2022/10/21 17:22:23 Import finished!
2022/10/21 17:22:23 VictoriaMetrics importer stats:
idle duration: 0s;
time spent while importing: 15.379614356s;
total samples: 81243;
samples/s: 5282.51;
total bytes: 6.1 MB;
bytes/s: 397.8 kB;
import requests: 6;
import requests retries: 0;
2022/10/21 17:22:23 Total time: 16.287405248s
```
It is important to know that if you run your Mimir installation in multi-tenant mode, remote read protocol
requires an Authentication header like `X-Scope-OrgID`. You can define it via the flag `--remote-read-headers=X-Scope-OrgID:demo`
## Migrating data from VictoriaMetrics
### Native protocol
@@ -590,6 +833,80 @@ Total: 16 B ↗ Speed: 186.32 KiB p/s
2022/08/30 19:48:24 Total time: 12.680582ms
```
#### Cluster-to-cluster migration mode
Using cluster-to-cluster migration mode helps to migrate all tenants data in a single `vmctl` run.
Cluster-to-cluster uses `/admin/tenants` endpoint (available starting from [v1.84.0](https://docs.victoriametrics.com/CHANGELOG.html#v1840)) to discover list of tenants from source cluster.
To use this mode you need to set `--vm-intercluster` flag to `true`, `--vm-native-src-addr` flag to 'http://vmselect:8481/' and `--vm-native-dst-addr` value to http://vminsert:8480/:
```console
./bin/vmctl vm-native --vm-intercluster=true --vm-native-src-addr=http://localhost:8481/ --vm-native-dst-addr=http://172.17.0.3:8480/
VictoriaMetrics Native import mode
2022/12/05 21:20:06 Discovered tenants: [123:1 12812919:1 1289198:1 1289:1283 12:1 1:0 1:1 1:1231231 1:1271727 1:12819 1:281 812891298:1]
2022/12/05 21:20:06 Initing export pipe from "http://localhost:8481/select/123:1/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/123:1/prometheus/api/v1/import/native":
Total: 61.13 MiB ↖ Speed: 2.05 MiB p/s
Total: 61.13 MiB ↗ Speed: 2.30 MiB p/s
2022/12/05 21:20:33 Initing export pipe from "http://localhost:8481/select/12812919:1/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/12812919:1/prometheus/api/v1/import/native":
Total: 43.14 MiB ↘ Speed: 1.86 MiB p/s
Total: 43.14 MiB ↙ Speed: 2.36 MiB p/s
2022/12/05 21:20:51 Initing export pipe from "http://localhost:8481/select/1289198:1/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1289198:1/prometheus/api/v1/import/native":
Total: 16.64 MiB ↗ Speed: 2.66 MiB p/s
Total: 16.64 MiB ↘ Speed: 2.19 MiB p/s
2022/12/05 21:20:59 Initing export pipe from "http://localhost:8481/select/1289:1283/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1289:1283/prometheus/api/v1/import/native":
Total: 43.33 MiB ↙ Speed: 1.94 MiB p/s
Total: 43.33 MiB ↖ Speed: 2.35 MiB p/s
2022/12/05 21:21:18 Initing export pipe from "http://localhost:8481/select/12:1/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/12:1/prometheus/api/v1/import/native":
Total: 63.78 MiB ↙ Speed: 1.96 MiB p/s
Total: 63.78 MiB ↖ Speed: 2.28 MiB p/s
2022/12/05 21:21:46 Initing export pipe from "http://localhost:8481/select/1:0/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1:0/prometheus/api/v1/import/native":
2022/12/05 21:21:46 Import finished!
Total: 330 B ↗ Speed: 3.53 MiB p/s
2022/12/05 21:21:46 Initing export pipe from "http://localhost:8481/select/1:1/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1:1/prometheus/api/v1/import/native":
Total: 63.81 MiB ↙ Speed: 1.96 MiB p/s
Total: 63.81 MiB ↖ Speed: 2.28 MiB p/s
2022/12/05 21:22:14 Initing export pipe from "http://localhost:8481/select/1:1231231/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1:1231231/prometheus/api/v1/import/native":
Total: 63.84 MiB ↙ Speed: 1.93 MiB p/s
Total: 63.84 MiB ↖ Speed: 2.29 MiB p/s
2022/12/05 21:22:42 Initing export pipe from "http://localhost:8481/select/1:1271727/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1:1271727/prometheus/api/v1/import/native":
Total: 54.37 MiB ↘ Speed: 1.90 MiB p/s
Total: 54.37 MiB ↙ Speed: 2.37 MiB p/s
2022/12/05 21:23:05 Initing export pipe from "http://localhost:8481/select/1:12819/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1:12819/prometheus/api/v1/import/native":
Total: 17.01 MiB ↙ Speed: 1.75 MiB p/s
Total: 17.01 MiB ↖ Speed: 2.15 MiB p/s
2022/12/05 21:23:13 Initing export pipe from "http://localhost:8481/select/1:281/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/1:281/prometheus/api/v1/import/native":
Total: 63.89 MiB ↘ Speed: 1.90 MiB p/s
Total: 63.89 MiB ↙ Speed: 2.29 MiB p/s
2022/12/05 21:23:42 Initing export pipe from "http://localhost:8481/select/812891298:1/prometheus/api/v1/export/native" with filters:
filter: match[]={__name__!=""}
Initing import process to "http://172.17.0.3:8480/insert/812891298:1/prometheus/api/v1/import/native":
Total: 63.84 MiB ↖ Speed: 1.99 MiB p/s
Total: 63.84 MiB ↗ Speed: 2.26 MiB p/s
2022/12/05 21:24:10 Total time: 4m4.1466565s
```
## Verifying exported blocks from VictoriaMetrics
@@ -700,7 +1017,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmctl` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl` binary and puts it into the `bin` folder.
@@ -729,7 +1046,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
#### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.3.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
2. Run `make vmctl-linux-arm` or `make vmctl-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmctl-linux-arm` or `vmctl-linux-arm64` binary respectively and puts it into the `bin` folder.

View File

@@ -2,6 +2,7 @@ package main
import (
"fmt"
"time"
"github.com/urfave/cli/v2"
@@ -43,6 +44,8 @@ const (
// also used in vm-native
vmExtraLabel = "vm-extra-label"
vmRateLimit = "vm-rate-limit"
vmInterCluster = "vm-intercluster"
)
var (
@@ -350,7 +353,7 @@ var (
},
&cli.StringFlag{
Name: vmNativeStepInterval,
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are '%s','%s','%s'.", vmNativeFilterTimeStart, stepper.StepMonth, stepper.StepDay, stepper.StepHour),
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are '%s','%s','%s','%s'.", vmNativeFilterTimeStart, stepper.StepMonth, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
},
&cli.StringFlag{
Name: vmNativeSrcAddr,
@@ -397,6 +400,105 @@ var (
Usage: "Optional data transfer rate limit in bytes per second.\n" +
"By default the rate limit is disabled. It can be useful for limiting load on source or destination databases.",
},
&cli.BoolFlag{
Name: vmInterCluster,
Usage: "Enables cluster-to-cluster migration mode with automatic tenants data migration.\n" +
fmt.Sprintf(" In this mode --%s flag format is: 'http://vmselect:8481/'. --%s flag format is: http://vminsert:8480/. \n", vmNativeSrcAddr, vmNativeDstAddr) +
" TenantID will be appended automatically after discovering tenants from src.",
},
}
)
const (
remoteRead = "remote-read"
remoteReadUseStream = "remote-read-use-stream"
remoteReadConcurrency = "remote-read-concurrency"
remoteReadFilterTimeStart = "remote-read-filter-time-start"
remoteReadFilterTimeEnd = "remote-read-filter-time-end"
remoteReadFilterLabel = "remote-read-filter-label"
remoteReadFilterLabelValue = "remote-read-filter-label-value"
remoteReadStepInterval = "remote-read-step-interval"
remoteReadSrcAddr = "remote-read-src-addr"
remoteReadUser = "remote-read-user"
remoteReadPassword = "remote-read-password"
remoteReadHTTPTimeout = "remote-read-http-timeout"
remoteReadHeaders = "remote-read-headers"
remoteReadInsecureSkipVerify = "remote-read-insecure-skip-verify"
)
var (
remoteReadFlags = []cli.Flag{
&cli.IntFlag{
Name: remoteReadConcurrency,
Usage: "Number of concurrently running remote read readers",
Value: 1,
},
&cli.TimestampFlag{
Name: remoteReadFilterTimeStart,
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
Layout: time.RFC3339,
},
&cli.TimestampFlag{
Name: remoteReadFilterTimeEnd,
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
Layout: time.RFC3339,
},
&cli.StringFlag{
Name: remoteReadFilterLabel,
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
Value: "__name__",
},
&cli.StringFlag{
Name: remoteReadFilterLabelValue,
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
Value: ".*",
},
&cli.BoolFlag{
Name: remoteRead,
Usage: "Use Prometheus remote read protocol",
Value: false,
},
&cli.BoolFlag{
Name: remoteReadUseStream,
Usage: "Defines whether to use SAMPLES or STREAMED_XOR_CHUNKS mode. By default is uses SAMPLES mode. See https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/#streamed-chunks",
Value: false,
},
&cli.StringFlag{
Name: remoteReadStepInterval,
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are %q,%q,%q,%q.", remoteReadFilterTimeStart, stepper.StepMonth, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
Required: true,
},
&cli.StringFlag{
Name: remoteReadSrcAddr,
Usage: "Remote read address to perform read from.",
Required: true,
},
&cli.StringFlag{
Name: remoteReadUser,
Usage: "Remote read username for basic auth",
EnvVars: []string{"REMOTE_READ_USERNAME"},
},
&cli.StringFlag{
Name: remoteReadPassword,
Usage: "Remote read password for basic auth",
EnvVars: []string{"REMOTE_READ_PASSWORD"},
},
&cli.DurationFlag{
Name: remoteReadHTTPTimeout,
Usage: "Timeout defines timeout for HTTP write request to remote storage",
},
&cli.StringFlag{
Name: remoteReadHeaders,
Value: "",
Usage: "Optional HTTP headers to send with each request to the corresponding remote source storage \n" +
"For example, --remote-read-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding remote source storage. \n" +
"Multiple headers must be delimited by '^^': --remote-read-headers='header1:value1^^header2:value2'",
},
&cli.BoolFlag{
Name: remoteReadInsecureSkipVerify,
Usage: "Whether to skip TLS certificate verification when connecting to the remote read address",
Value: false,
},
}
)

View File

@@ -11,6 +11,7 @@ import (
"syscall"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
"github.com/urfave/cli/v2"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
@@ -40,7 +41,7 @@ func main() {
Commands: []*cli.Command{
{
Name: "opentsdb",
Usage: "Migrate timeseries from OpenTSDB",
Usage: "Migrate time series from OpenTSDB",
Flags: mergeFlags(globalFlags, otsdbFlags, vmFlags),
Action: func(c *cli.Context) error {
fmt.Println("OpenTSDB import mode")
@@ -75,7 +76,7 @@ func main() {
},
{
Name: "influx",
Usage: "Migrate timeseries from InfluxDB",
Usage: "Migrate time series from InfluxDB",
Flags: mergeFlags(globalFlags, influxFlags, vmFlags),
Action: func(c *cli.Context) error {
fmt.Println("InfluxDB import mode")
@@ -114,9 +115,49 @@ func main() {
return processor.run(c.Bool(globalSilent), c.Bool(globalVerbose))
},
},
{
Name: "remote-read",
Usage: "Migrate time series via Prometheus remote-read protocol",
Flags: mergeFlags(globalFlags, remoteReadFlags, vmFlags),
Action: func(c *cli.Context) error {
rr, err := remoteread.NewClient(remoteread.Config{
Addr: c.String(remoteReadSrcAddr),
Username: c.String(remoteReadUser),
Password: c.String(remoteReadPassword),
Timeout: c.Duration(remoteReadHTTPTimeout),
UseStream: c.Bool(remoteReadUseStream),
Headers: c.String(remoteReadHeaders),
LabelName: c.String(remoteReadFilterLabel),
LabelValue: c.String(remoteReadFilterLabelValue),
InsecureSkipVerify: c.Bool(remoteReadInsecureSkipVerify),
})
if err != nil {
return fmt.Errorf("error create remote read client: %s", err)
}
vmCfg := initConfigVM(c)
importer, err := vm.NewImporter(vmCfg)
if err != nil {
return fmt.Errorf("failed to create VM importer: %s", err)
}
rmp := remoteReadProcessor{
src: rr,
dst: importer,
filter: remoteReadFilter{
timeStart: c.Timestamp(remoteReadFilterTimeStart),
timeEnd: c.Timestamp(remoteReadFilterTimeEnd),
chunk: c.String(remoteReadStepInterval),
},
cc: c.Int(remoteReadConcurrency),
}
return rmp.run(ctx, c.Bool(globalSilent), c.Bool(globalVerbose))
},
},
{
Name: "prometheus",
Usage: "Migrate timeseries from Prometheus",
Usage: "Migrate time series from Prometheus",
Flags: mergeFlags(globalFlags, promFlags, vmFlags),
Action: func(c *cli.Context) error {
fmt.Println("Prometheus import mode")
@@ -160,7 +201,8 @@ func main() {
}
p := vmNativeProcessor{
rateLimit: c.Int64(vmRateLimit),
rateLimit: c.Int64(vmRateLimit),
interCluster: c.Bool(vmInterCluster),
filter: filter{
match: c.String(vmNativeFilterMatch),
timeStart: c.String(vmNativeFilterTimeStart),

View File

@@ -9,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/chunkenc"
)
type prometheusProcessor struct {
@@ -101,6 +102,7 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
if err != nil {
return fmt.Errorf("failed to read block: %s", err)
}
var it chunkenc.Iterator
for ss.Next() {
var name string
var labels []vm.LabelPair
@@ -122,8 +124,16 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
var timestamps []int64
var values []float64
it := series.Iterator()
for it.Next() {
it = series.Iterator(it)
for {
typ := it.Next()
if typ == chunkenc.ValNone {
break
}
if typ != chunkenc.ValFloat {
// Skip unsupported values
continue
}
t, v := it.At()
timestamps = append(timestamps, t)
values = append(values, v)

View File

@@ -4,7 +4,7 @@ import (
"fmt"
"time"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb"
)

View File

@@ -0,0 +1,319 @@
package main
import (
"context"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/stepper"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/testdata/servers_integration_test"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/prometheus/prometheus/prompb"
)
func TestRemoteRead(t *testing.T) {
var testCases = []struct {
name string
remoteReadConfig remoteread.Config
vmCfg vm.Config
start string
end string
numOfSamples int64
numOfSeries int64
rrp remoteReadProcessor
chunk string
remoteReadSeries func(start, end, numOfSeries, numOfSamples int64) []*prompb.TimeSeries
expectedSeries []vm.TimeSeries
}{
{
name: "step minute on minute time range",
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*"},
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
start: "2022-11-26T11:23:05+02:00",
end: "2022-11-26T11:24:05+02:00",
numOfSamples: 2,
numOfSeries: 3,
chunk: stepper.StepMinute,
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
expectedSeries: []vm.TimeSeries{
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
Timestamps: []int64{1669454585000, 1669454615000},
Values: []float64{0, 0},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
Timestamps: []int64{1669454585000, 1669454615000},
Values: []float64{100, 100},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
Timestamps: []int64{1669454585000, 1669454615000},
Values: []float64{200, 200},
},
},
},
{
name: "step month on month time range",
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*"},
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
start: "2022-09-26T11:23:05+02:00",
end: "2022-11-26T11:24:05+02:00",
numOfSamples: 2,
numOfSeries: 3,
chunk: stepper.StepMonth,
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
expectedSeries: []vm.TimeSeries{
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
Timestamps: []int64{1664184185000},
Values: []float64{0},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
Timestamps: []int64{1664184185000},
Values: []float64{100},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
Timestamps: []int64{1664184185000},
Values: []float64{200},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
Timestamps: []int64{1666819415000},
Values: []float64{0},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
Timestamps: []int64{1666819415000},
Values: []float64{100},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
Timestamps: []int64{1666819415000},
Values: []float64{200}},
},
},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
remoteReadServer := remote_read_integration.NewRemoteReadServer(t)
defer remoteReadServer.Close()
remoteWriteServer := remote_read_integration.NewRemoteWriteServer(t)
defer remoteWriteServer.Close()
tt.remoteReadConfig.Addr = remoteReadServer.URL()
rr, err := remoteread.NewClient(tt.remoteReadConfig)
if err != nil {
t.Fatalf("error create remote read client: %s", err)
}
start, err := time.Parse(time.RFC3339, tt.start)
if err != nil {
t.Fatalf("Error parse start time: %s", err)
}
end, err := time.Parse(time.RFC3339, tt.end)
if err != nil {
t.Fatalf("Error parse end time: %s", err)
}
rrs := tt.remoteReadSeries(start.Unix(), end.Unix(), tt.numOfSeries, tt.numOfSamples)
remoteReadServer.SetRemoteReadSeries(rrs)
remoteWriteServer.ExpectedSeries(tt.expectedSeries)
tt.vmCfg.Addr = remoteWriteServer.URL()
importer, err := vm.NewImporter(tt.vmCfg)
if err != nil {
t.Fatalf("failed to create VM importer: %s", err)
}
defer importer.Close()
rmp := remoteReadProcessor{
src: rr,
dst: importer,
filter: remoteReadFilter{
timeStart: &start,
timeEnd: &end,
chunk: tt.chunk,
},
cc: 1,
}
ctx := context.Background()
err = rmp.run(ctx, true, false)
if err != nil {
t.Fatalf("failed to run remote read processor: %s", err)
}
})
}
}
func TestSteamRemoteRead(t *testing.T) {
var testCases = []struct {
name string
remoteReadConfig remoteread.Config
vmCfg vm.Config
start string
end string
numOfSamples int64
numOfSeries int64
rrp remoteReadProcessor
chunk string
remoteReadSeries func(start, end, numOfSeries, numOfSamples int64) []*prompb.TimeSeries
expectedSeries []vm.TimeSeries
}{
{
name: "step minute on minute time range",
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*", UseStream: true},
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
start: "2022-11-26T11:23:05+02:00",
end: "2022-11-26T11:24:05+02:00",
numOfSamples: 2,
numOfSeries: 3,
chunk: stepper.StepMinute,
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
expectedSeries: []vm.TimeSeries{
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
Timestamps: []int64{1669454585000, 1669454615000},
Values: []float64{0, 0},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
Timestamps: []int64{1669454585000, 1669454615000},
Values: []float64{100, 100},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
Timestamps: []int64{1669454585000, 1669454615000},
Values: []float64{200, 200},
},
},
},
{
name: "step month on month time range",
remoteReadConfig: remoteread.Config{Addr: "", LabelName: "__name__", LabelValue: ".*", UseStream: true},
vmCfg: vm.Config{Addr: "", Concurrency: 1, DisableProgressBar: true},
start: "2022-09-26T11:23:05+02:00",
end: "2022-11-26T11:24:05+02:00",
numOfSamples: 2,
numOfSeries: 3,
chunk: stepper.StepMonth,
remoteReadSeries: remote_read_integration.GenerateRemoteReadSeries,
expectedSeries: []vm.TimeSeries{
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
Timestamps: []int64{1664184185000},
Values: []float64{0},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
Timestamps: []int64{1664184185000},
Values: []float64{100},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
Timestamps: []int64{1664184185000},
Values: []float64{200},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "0"}},
Timestamps: []int64{1666819415000},
Values: []float64{0},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "1"}},
Timestamps: []int64{1666819415000},
Values: []float64{100},
},
{
Name: "vm_metric_1",
LabelPairs: []vm.LabelPair{{Name: "job", Value: "2"}},
Timestamps: []int64{1666819415000},
Values: []float64{200}},
},
},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
remoteReadServer := remote_read_integration.NewRemoteReadStreamServer(t)
defer remoteReadServer.Close()
remoteWriteServer := remote_read_integration.NewRemoteWriteServer(t)
defer remoteWriteServer.Close()
tt.remoteReadConfig.Addr = remoteReadServer.URL()
rr, err := remoteread.NewClient(tt.remoteReadConfig)
if err != nil {
t.Fatalf("error create remote read client: %s", err)
}
start, err := time.Parse(time.RFC3339, tt.start)
if err != nil {
t.Fatalf("Error parse start time: %s", err)
}
end, err := time.Parse(time.RFC3339, tt.end)
if err != nil {
t.Fatalf("Error parse end time: %s", err)
}
rrs := tt.remoteReadSeries(start.Unix(), end.Unix(), tt.numOfSeries, tt.numOfSamples)
remoteReadServer.InitMockStorage(rrs)
remoteWriteServer.ExpectedSeries(tt.expectedSeries)
tt.vmCfg.Addr = remoteWriteServer.URL()
importer, err := vm.NewImporter(tt.vmCfg)
if err != nil {
t.Fatalf("failed to create VM importer: %s", err)
}
defer importer.Close()
rmp := remoteReadProcessor{
src: rr,
dst: importer,
filter: remoteReadFilter{
timeStart: &start,
timeEnd: &end,
chunk: tt.chunk,
},
cc: 1,
}
ctx := context.Background()
err = rmp.run(ctx, true, false)
if err != nil {
t.Fatalf("failed to run remote read processor: %s", err)
}
})
}
}

127
app/vmctl/remoteread.go Normal file
View File

@@ -0,0 +1,127 @@
package main
import (
"context"
"fmt"
"log"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/stepper"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/cheggaaa/pb/v3"
)
type remoteReadProcessor struct {
filter remoteReadFilter
dst *vm.Importer
src *remoteread.Client
cc int
}
type remoteReadFilter struct {
timeStart *time.Time
timeEnd *time.Time
chunk string
}
func (rrp *remoteReadProcessor) run(ctx context.Context, silent, verbose bool) error {
rrp.dst.ResetStats()
if rrp.filter.timeEnd == nil {
t := time.Now().In(rrp.filter.timeStart.Location())
rrp.filter.timeEnd = &t
}
if rrp.cc < 1 {
rrp.cc = 1
}
ranges, err := stepper.SplitDateRange(*rrp.filter.timeStart, *rrp.filter.timeEnd, rrp.filter.chunk)
if err != nil {
return fmt.Errorf("failed to create date ranges for the given time filters: %v", err)
}
question := fmt.Sprintf("Selected time range %q - %q will be split into %d ranges according to %q step. Continue?",
rrp.filter.timeStart.String(), rrp.filter.timeEnd.String(), len(ranges), rrp.filter.chunk)
if !silent && !prompt(question) {
return nil
}
var bar *pb.ProgressBar
if !silent {
bar = barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing ranges"), len(ranges))
if err := barpool.Start(); err != nil {
return err
}
}
defer func() {
if !silent {
barpool.Stop()
}
log.Println("Import finished!")
log.Print(rrp.dst.Stats())
}()
rangeC := make(chan *remoteread.Filter)
errCh := make(chan error)
var wg sync.WaitGroup
wg.Add(rrp.cc)
for i := 0; i < rrp.cc; i++ {
go func() {
defer wg.Done()
for r := range rangeC {
if err := rrp.do(ctx, r); err != nil {
errCh <- fmt.Errorf("request failed for: %s", err)
return
}
if bar != nil {
bar.Increment()
}
}
}()
}
for _, r := range ranges {
select {
case infErr := <-errCh:
return fmt.Errorf("remote read error: %s", infErr)
case vmErr := <-rrp.dst.Errors():
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, verbose))
case rangeC <- &remoteread.Filter{
StartTimestampMs: r[0].UnixMilli(),
EndTimestampMs: r[1].UnixMilli(),
}:
}
}
close(rangeC)
wg.Wait()
rrp.dst.Close()
close(errCh)
// drain import errors channel
for vmErr := range rrp.dst.Errors() {
if vmErr.Err != nil {
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, verbose))
}
}
for err := range errCh {
return fmt.Errorf("import process failed: %s", err)
}
return nil
}
func (rrp *remoteReadProcessor) do(ctx context.Context, filter *remoteread.Filter) error {
return rrp.src.Read(ctx, filter, func(series *vm.TimeSeries) error {
if err := rrp.dst.Input(series); err != nil {
return fmt.Errorf(
"failed to read data for time range start: %d, end: %d, %s",
filter.StartTimestampMs, filter.EndTimestampMs, err)
}
return nil
})
}

View File

@@ -0,0 +1,352 @@
package remoteread
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/gogo/protobuf/proto"
"github.com/golang/snappy"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/storage/remote"
"github.com/prometheus/prometheus/tsdb/chunkenc"
)
const (
defaultReadTimeout = 30 * time.Second
remoteReadPath = "/api/v1/read"
healthPath = "/-/healthy"
)
// StreamCallback is a callback function for processing time series
type StreamCallback func(series *vm.TimeSeries) error
// Client is an HTTP client for reading
// time series via remote read protocol.
type Client struct {
addr string
c *http.Client
user string
password string
useStream bool
headers []keyValue
matchers []*prompb.LabelMatcher
}
// Config is config for remote read.
type Config struct {
// Addr of remote storage
Addr string
// Timeout defines timeout for HTTP requests
// made by remote read client
Timeout time.Duration
// Username is the remote read username, optional.
Username string
// Password is the remote read password, optional.
Password string
// UseStream defines whether to use SAMPLES or STREAMED_XOR_CHUNKS mode
// see https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/#samples
// https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/#streamed-chunks
UseStream bool
// Headers optional HTTP headers to send with each request to the corresponding remote storage
Headers string
// LabelName, LabelValue stands for label=~value pair used for read requests.
// Is optional.
LabelName, LabelValue string
// TLSSkipVerify defines whether to skip TLS certificate verification when connecting to the remote read address.
InsecureSkipVerify bool
}
// Filter defines a list of filters applied to requested data
type Filter struct {
StartTimestampMs int64
EndTimestampMs int64
}
// NewClient returns client for
// reading time series via remote read protocol.
func NewClient(cfg Config) (*Client, error) {
if cfg.Addr == "" {
return nil, fmt.Errorf("config.Addr can't be empty")
}
if cfg.Timeout == 0 {
cfg.Timeout = defaultReadTimeout
}
var hdrs []string
if cfg.Headers != "" {
hdrs = strings.Split(cfg.Headers, "^^")
}
headers, err := parseHeaders(hdrs)
if err != nil {
return nil, err
}
var m *prompb.LabelMatcher
if cfg.LabelName != "" && cfg.LabelValue != "" {
m = &prompb.LabelMatcher{
Type: prompb.LabelMatcher_RE,
Name: cfg.LabelName,
Value: cfg.LabelValue,
}
}
c := &Client{
c: &http.Client{
Timeout: cfg.Timeout,
Transport: utils.Transport(cfg.Addr, cfg.InsecureSkipVerify),
},
addr: strings.TrimSuffix(cfg.Addr, "/"),
user: cfg.Username,
password: cfg.Password,
useStream: cfg.UseStream,
headers: headers,
matchers: []*prompb.LabelMatcher{m},
}
return c, nil
}
// Read fetch data from remote read source
func (c *Client) Read(ctx context.Context, filter *Filter, streamCb StreamCallback) error {
req := &prompb.ReadRequest{
Queries: []*prompb.Query{
{
StartTimestampMs: filter.StartTimestampMs,
EndTimestampMs: filter.EndTimestampMs - 1,
Matchers: c.matchers,
},
},
}
if c.useStream {
req.AcceptedResponseTypes = []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS}
}
data, err := proto.Marshal(req)
if err != nil {
return fmt.Errorf("unable to marshal read request: %w", err)
}
b := snappy.Encode(nil, data)
if err := c.fetch(ctx, b, streamCb); err != nil {
if errors.Is(err, context.Canceled) {
return fmt.Errorf("fetch request has ben cancelled")
}
return fmt.Errorf("error while fetching data from remote storage: %s", err)
}
return nil
}
func (c *Client) do(req *http.Request) (*http.Response, error) {
if c.user != "" {
req.SetBasicAuth(c.user, c.password)
}
for _, h := range c.headers {
req.Header.Add(h.key, h.value)
}
return c.c.Do(req)
}
// Ping checks the health of the read source
func (c *Client) Ping() error {
url := c.addr + healthPath
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return fmt.Errorf("cannot create request to %q: %s", url, err)
}
resp, err := c.do(req)
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status code: %d", resp.StatusCode)
}
return nil
}
func (c *Client) fetch(ctx context.Context, data []byte, streamCb StreamCallback) error {
r := bytes.NewReader(data)
url := c.addr + remoteReadPath
req, err := http.NewRequest("POST", url, r)
if err != nil {
return fmt.Errorf("failed to create new HTTP request: %w", err)
}
req.Header.Add("Content-Encoding", "snappy")
req.Header.Add("Accept-Encoding", "snappy")
req.Header.Set("Content-Type", "application/x-protobuf")
if c.useStream {
req.Header.Set("Content-Type", "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse")
}
req.Header.Set("X-Prometheus-Remote-Read-Version", "0.1.0")
resp, err := c.do(req.WithContext(ctx))
if err != nil {
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
req.URL.Redacted(), err, len(data), r.Size())
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL.Redacted(), body)
}
if c.useStream {
return processStreamResponse(resp.Body, streamCb)
}
return processResponse(resp.Body, streamCb)
}
func processResponse(body io.ReadCloser, callback StreamCallback) error {
d, err := io.ReadAll(body)
if err != nil {
return fmt.Errorf("error reading response: %w", err)
}
uncompressed, err := snappy.Decode(nil, d)
if err != nil {
return fmt.Errorf("error decoding response: %w", err)
}
var readResp prompb.ReadResponse
err = proto.Unmarshal(uncompressed, &readResp)
if err != nil {
return fmt.Errorf("unable to unmarshal response body: %w", err)
}
// response could have no results for the given filter, but that
// shouldn't be accounted as an error.
for _, res := range readResp.Results {
for _, ts := range res.Timeseries {
vmTs := convertSamples(ts.Samples, ts.Labels)
if err := callback(vmTs); err != nil {
return err
}
}
}
return nil
}
var bbPool bytesutil.ByteBufferPool
func processStreamResponse(body io.ReadCloser, callback StreamCallback) error {
bb := bbPool.Get()
defer func() { bbPool.Put(bb) }()
stream := remote.NewChunkedReader(body, remote.DefaultChunkedReadLimit, bb.B)
for {
res := &prompb.ChunkedReadResponse{}
err := stream.NextProto(res)
if err == io.EOF {
break
}
if err != nil {
return err
}
for _, series := range res.ChunkedSeries {
samples := make([]prompb.Sample, 0)
for _, chunk := range series.Chunks {
s, err := parseSamples(chunk.Data)
if err != nil {
return err
}
samples = append(samples, s...)
}
ts := convertSamples(samples, series.Labels)
if err := callback(ts); err != nil {
return err
}
}
}
return nil
}
func parseSamples(chunk []byte) ([]prompb.Sample, error) {
c, err := chunkenc.FromData(chunkenc.EncXOR, chunk)
if err != nil {
return nil, fmt.Errorf("error read chunk: %w", err)
}
var samples []prompb.Sample
it := c.Iterator(nil)
for {
typ := it.Next()
if typ == chunkenc.ValNone {
break
}
if typ != chunkenc.ValFloat {
// Skip unsupported values
continue
}
if it.Err() != nil {
return nil, fmt.Errorf("error iterate over chunks: %w", it.Err())
}
ts, v := it.At()
s := prompb.Sample{
Timestamp: ts,
Value: v,
}
samples = append(samples, s)
}
return samples, it.Err()
}
type keyValue struct {
key string
value string
}
func parseHeaders(headers []string) ([]keyValue, error) {
if len(headers) == 0 {
return nil, nil
}
kvs := make([]keyValue, len(headers))
for i, h := range headers {
n := strings.IndexByte(h, ':')
if n < 0 {
return nil, fmt.Errorf(`missing ':' in header %q; expecting "key: value" format`, h)
}
kv := &kvs[i]
kv.key = strings.TrimSpace(h[:n])
kv.value = strings.TrimSpace(h[n+1:])
}
return kvs, nil
}
func convertSamples(samples []prompb.Sample, labels []prompb.Label) *vm.TimeSeries {
labelPairs := make([]vm.LabelPair, 0, len(labels))
nameValue := ""
for _, label := range labels {
if label.Name == "__name__" {
nameValue = label.Value
continue
}
labelPairs = append(labelPairs, vm.LabelPair{Name: label.Name, Value: label.Value})
}
n := len(samples)
values := make([]float64, 0, n)
timestamps := make([]int64, 0, n)
for _, sample := range samples {
values = append(values, sample.Value)
timestamps = append(timestamps, sample.Timestamp)
}
return &vm.TimeSeries{
Name: nameValue,
LabelPairs: labelPairs,
Timestamps: timestamps,
Values: values,
}
}

View File

@@ -12,6 +12,8 @@ const (
StepDay string = "day"
// StepHour represents a one hour interval
StepHour string = "hour"
// StepMinute represents a one minute interval
StepMinute string = "minute"
)
// SplitDateRange splits start-end range in a subset of ranges respecting the given step
@@ -42,8 +44,13 @@ func SplitDateRange(start, end time.Time, step string) ([][]time.Time, error) {
nextStep = func(t time.Time) (time.Time, time.Time) {
return t, t.Add(time.Hour * 1)
}
case StepMinute:
nextStep = func(t time.Time) (time.Time, time.Time) {
return t, t.Add(time.Minute * 1)
}
default:
return nil, fmt.Errorf("failed to parse step value, valid values are: '%s', '%s', '%s'. provided: '%s'", StepMonth, StepDay, StepHour, step)
return nil, fmt.Errorf("failed to parse step value, valid values are: '%s', '%s', '%s', '%s'. provided: '%s'",
StepMonth, StepDay, StepHour, StepMinute, step)
}
currentStep := start

View File

@@ -0,0 +1,368 @@
package remote_read_integration
import (
"context"
"fmt"
"io"
"net/http"
"net/http/httptest"
"strconv"
"strings"
"testing"
"github.com/gogo/protobuf/proto"
"github.com/golang/snappy"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/storage/remote"
"github.com/prometheus/prometheus/tsdb/chunks"
)
const (
maxBytesInFrame = 1024 * 1024
)
type RemoteReadServer struct {
server *httptest.Server
series []*prompb.TimeSeries
storage *MockStorage
}
// NewRemoteReadServer creates a remote read server. It exposes a single endpoint and responds with the
// passed series based on the request to the read endpoint. It returns a server which should be closed after
// being used.
func NewRemoteReadServer(t *testing.T) *RemoteReadServer {
rrs := &RemoteReadServer{
series: make([]*prompb.TimeSeries, 0),
}
rrs.server = httptest.NewServer(rrs.getReadHandler(t))
return rrs
}
// Close closes the server.
func (rrs *RemoteReadServer) Close() {
rrs.server.Close()
}
func (rrs *RemoteReadServer) URL() string {
return rrs.server.URL
}
func (rrs *RemoteReadServer) SetRemoteReadSeries(series []*prompb.TimeSeries) {
rrs.series = append(rrs.series, series...)
}
func (rrs *RemoteReadServer) getReadHandler(t *testing.T) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if !validateReadHeaders(t, r) {
t.Fatalf("invalid read headers")
}
compressed, err := io.ReadAll(r.Body)
if err != nil {
t.Fatalf("error read body: %s", err)
}
reqBuf, err := snappy.Decode(nil, compressed)
if err != nil {
t.Fatalf("error decode compressed data:%s", err)
}
var req prompb.ReadRequest
if err := proto.Unmarshal(reqBuf, &req); err != nil {
t.Fatalf("error unmarshal read request: %s", err)
}
resp := &prompb.ReadResponse{
Results: make([]*prompb.QueryResult, len(req.Queries)),
}
for i, r := range req.Queries {
startTs := r.StartTimestampMs
endTs := r.EndTimestampMs
ts := make([]*prompb.TimeSeries, len(rrs.series))
for i, s := range rrs.series {
var samples []prompb.Sample
for _, sample := range s.Samples {
if sample.Timestamp >= startTs && sample.Timestamp < endTs {
samples = append(samples, sample)
}
}
var series prompb.TimeSeries
if len(samples) > 0 {
series.Labels = s.Labels
series.Samples = samples
}
ts[i] = &series
}
resp.Results[i] = &prompb.QueryResult{Timeseries: ts}
data, err := proto.Marshal(resp)
if err != nil {
t.Fatalf("error marshal response: %s", err)
}
compressed = snappy.Encode(nil, data)
w.Header().Set("Content-Type", "application/x-protobuf")
w.Header().Set("Content-Encoding", "snappy")
w.WriteHeader(http.StatusOK)
if _, err := w.Write(compressed); err != nil {
t.Fatalf("snappy encode error: %s", err)
}
}
})
}
func NewRemoteReadStreamServer(t *testing.T) *RemoteReadServer {
rrs := &RemoteReadServer{
series: make([]*prompb.TimeSeries, 0),
}
rrs.server = httptest.NewServer(rrs.getStreamReadHandler(t))
return rrs
}
func (rrs *RemoteReadServer) InitMockStorage(series []*prompb.TimeSeries) {
rrs.storage = NewMockStorage(series)
}
func (rrs *RemoteReadServer) getStreamReadHandler(t *testing.T) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if !validateStreamReadHeaders(t, r) {
t.Fatalf("invalid read headers")
}
f, ok := w.(http.Flusher)
if !ok {
t.Fatalf("internal http.ResponseWriter does not implement http.Flusher interface")
}
stream := remote.NewChunkedWriter(w, f)
data, err := io.ReadAll(r.Body)
if err != nil {
t.Fatalf("error read body: %s", err)
}
decodedData, err := snappy.Decode(nil, data)
if err != nil {
t.Fatalf("error decode compressed data:%s", err)
}
var req prompb.ReadRequest
if err := proto.Unmarshal(decodedData, &req); err != nil {
t.Fatalf("error unmarshal read request: %s", err)
}
var chks []prompb.Chunk
ctx := context.Background()
for idx, r := range req.Queries {
startTs := r.StartTimestampMs
endTs := r.EndTimestampMs
var matchers []*labels.Matcher
cb := func() (int64, error) { return 0, nil }
c := remote.NewSampleAndChunkQueryableClient(rrs.storage, nil, matchers, true, cb)
q, err := c.ChunkQuerier(ctx, startTs, endTs)
if err != nil {
t.Fatalf("error init chunk querier: %s", err)
}
ss := q.Select(false, nil, matchers...)
var iter chunks.Iterator
for ss.Next() {
series := ss.At()
iter = series.Iterator(iter)
labels := remote.MergeLabels(labelsToLabelsProto(series.Labels()), nil)
frameBytesLeft := maxBytesInFrame
for _, lb := range labels {
frameBytesLeft -= lb.Size()
}
isNext := iter.Next()
for isNext {
chunk := iter.At()
if chunk.Chunk == nil {
t.Fatalf("error found not populated chunk returned by SeriesSet at ref: %v", chunk.Ref)
}
chks = append(chks, prompb.Chunk{
MinTimeMs: chunk.MinTime,
MaxTimeMs: chunk.MaxTime,
Type: prompb.Chunk_Encoding(chunk.Chunk.Encoding()),
Data: chunk.Chunk.Bytes(),
})
frameBytesLeft -= chks[len(chks)-1].Size()
// We are fine with minor inaccuracy of max bytes per frame. The inaccuracy will be max of full chunk size.
isNext = iter.Next()
if frameBytesLeft > 0 && isNext {
continue
}
resp := &prompb.ChunkedReadResponse{
ChunkedSeries: []*prompb.ChunkedSeries{
{Labels: labels, Chunks: chks},
},
QueryIndex: int64(idx),
}
b, err := proto.Marshal(resp)
if err != nil {
t.Fatalf("error marshal response: %s", err)
}
if _, err := stream.Write(b); err != nil {
t.Fatalf("error write to stream: %s", err)
}
chks = chks[:0]
rrs.storage.Reset()
}
if err := iter.Err(); err != nil {
t.Fatalf("error iterate over chunk series: %s", err)
}
}
}
})
}
func validateReadHeaders(t *testing.T, r *http.Request) bool {
if r.Method != http.MethodPost {
t.Fatalf("got %q method, expected %q", r.Method, http.MethodPost)
}
if r.Header.Get("Content-Encoding") != "snappy" {
t.Fatalf("got %q content encoding header, expected %q", r.Header.Get("Content-Encoding"), "snappy")
}
if r.Header.Get("Content-Type") != "application/x-protobuf" {
t.Fatalf("got %q content type header, expected %q", r.Header.Get("Content-Type"), "application/x-protobuf")
}
remoteReadVersion := r.Header.Get("X-Prometheus-Remote-Read-Version")
if remoteReadVersion == "" {
t.Fatalf("got empty prometheus remote read header")
}
if !strings.HasPrefix(remoteReadVersion, "0.1.") {
t.Fatalf("wrong remote version defined")
}
return true
}
func validateStreamReadHeaders(t *testing.T, r *http.Request) bool {
if r.Method != http.MethodPost {
t.Fatalf("got %q method, expected %q", r.Method, http.MethodPost)
}
if r.Header.Get("Content-Encoding") != "snappy" {
t.Fatalf("got %q content encoding header, expected %q", r.Header.Get("Content-Encoding"), "snappy")
}
if r.Header.Get("Content-Type") != "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" {
t.Fatalf("got %q content type header, expected %q", r.Header.Get("Content-Type"), "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse")
}
remoteReadVersion := r.Header.Get("X-Prometheus-Remote-Read-Version")
if remoteReadVersion == "" {
t.Fatalf("got empty prometheus remote read header")
}
if !strings.HasPrefix(remoteReadVersion, "0.1.") {
t.Fatalf("wrong remote version defined")
}
return true
}
func GenerateRemoteReadSeries(start, end, numOfSeries, numOfSamples int64) []*prompb.TimeSeries {
var ts []*prompb.TimeSeries
j := 0
for i := 0; i < int(numOfSeries); i++ {
if i%3 == 0 {
j++
}
timeSeries := prompb.TimeSeries{
Labels: []prompb.Label{
{Name: labels.MetricName, Value: fmt.Sprintf("vm_metric_%d", j)},
{Name: "job", Value: strconv.Itoa(i)},
},
}
ts = append(ts, &timeSeries)
}
for i := range ts {
ts[i].Samples = generateRemoteReadSamples(i, start, end, numOfSamples)
}
return ts
}
func generateRemoteReadSamples(idx int, startTime, endTime, numOfSamples int64) []prompb.Sample {
samples := make([]prompb.Sample, 0)
delta := (endTime - startTime) / numOfSamples
t := startTime
for t != endTime {
v := 100 * int64(idx)
samples = append(samples, prompb.Sample{
Timestamp: t * 1000,
Value: float64(v),
})
t = t + delta
}
return samples
}
type MockStorage struct {
query *prompb.Query
store []*prompb.TimeSeries
}
func NewMockStorage(series []*prompb.TimeSeries) *MockStorage {
return &MockStorage{store: series}
}
func (ms *MockStorage) Read(_ context.Context, query *prompb.Query) (*prompb.QueryResult, error) {
if ms.query != nil {
return nil, fmt.Errorf("expected only one call to remote client got: %v", query)
}
ms.query = query
q := &prompb.QueryResult{Timeseries: make([]*prompb.TimeSeries, 0, len(ms.store))}
for _, s := range ms.store {
var samples []prompb.Sample
for _, sample := range s.Samples {
if sample.Timestamp >= query.StartTimestampMs && sample.Timestamp < query.EndTimestampMs {
samples = append(samples, sample)
}
}
var series prompb.TimeSeries
if len(samples) > 0 {
series.Labels = s.Labels
series.Samples = samples
}
q.Timeseries = append(q.Timeseries, &series)
}
return q, nil
}
func (ms *MockStorage) Reset() {
ms.query = nil
}
func labelsToLabelsProto(labels labels.Labels) []prompb.Label {
result := make([]prompb.Label, 0, len(labels))
for _, l := range labels {
result = append(result, prompb.Label{
Name: l.Name,
Value: l.Value,
})
}
return result
}

View File

@@ -0,0 +1,86 @@
package remote_read_integration
import (
"bufio"
"net/http"
"net/http/httptest"
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
)
type RemoteWriteServer struct {
server *httptest.Server
series []vm.TimeSeries
}
// NewRemoteWriteServer prepares test remote write server
func NewRemoteWriteServer(t *testing.T) *RemoteWriteServer {
rws := &RemoteWriteServer{series: make([]vm.TimeSeries, 0)}
mux := http.NewServeMux()
mux.Handle("/api/v1/import", rws.getWriteHandler(t))
mux.Handle("/health", rws.handlePing())
rws.server = httptest.NewServer(mux)
return rws
}
// Close closes the server.
func (rws *RemoteWriteServer) Close() {
rws.server.Close()
}
func (rws *RemoteWriteServer) ExpectedSeries(series []vm.TimeSeries) {
rws.series = append(rws.series, series...)
}
func (rws *RemoteWriteServer) URL() string {
return rws.server.URL
}
func (rws *RemoteWriteServer) getWriteHandler(t *testing.T) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var tss []vm.TimeSeries
scanner := bufio.NewScanner(r.Body)
var rows parser.Rows
for scanner.Scan() {
rows.Unmarshal(scanner.Text())
for _, row := range rows.Rows {
var labelPairs []vm.LabelPair
var ts vm.TimeSeries
nameValue := ""
for _, tag := range row.Tags {
if string(tag.Key) == "__name__" {
nameValue = string(tag.Value)
continue
}
labelPairs = append(labelPairs, vm.LabelPair{Name: string(tag.Key), Value: string(tag.Value)})
}
ts.Values = append(ts.Values, row.Values...)
ts.Timestamps = append(ts.Timestamps, row.Timestamps...)
ts.Name = nameValue
ts.LabelPairs = labelPairs
tss = append(tss, ts)
}
rows.Reset()
}
if !reflect.DeepEqual(tss, rws.series) {
w.WriteHeader(http.StatusInternalServerError)
t.Fatalf("datasets not equal, expected: %#v; \n got: %#v", rws.series, tss)
return
}
w.WriteHeader(http.StatusNoContent)
})
}
func (rws *RemoteWriteServer) handlePing() http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("OK"))
})
}

25
app/vmctl/utils/tls.go Normal file
View File

@@ -0,0 +1,25 @@
package utils
import (
"crypto/tls"
"net/http"
"strings"
)
// Transport creates http.Transport object based on provided URL.
// Returns Transport with TLS configuration if URL contains `https` prefix
func Transport(URL string, insecureSkipVerify bool) *http.Transport {
t := http.DefaultTransport.(*http.Transport).Clone()
if !strings.HasPrefix(URL, "https") {
return t
}
t.TLSClientConfig = TLSConfig(insecureSkipVerify)
return t
}
// TLSConfig creates tls.Config object from provided arguments
func TLSConfig(insecureSkipVerify bool) *tls.Config {
return &tls.Config{
InsecureSkipVerify: insecureSkipVerify,
}
}

View File

@@ -182,6 +182,8 @@ func (im *Importer) Errors() chan *ImportError { return im.errors }
// that need to be imported
func (im *Importer) Input(ts *TimeSeries) error {
select {
case <-im.close:
return fmt.Errorf("importer is closed")
case im.input <- ts:
return nil
case err := <-im.errors:
@@ -197,6 +199,7 @@ func (im *Importer) Input(ts *TimeSeries) error {
func (im *Importer) Close() {
im.once.Do(func() {
close(im.close)
close(im.input)
im.wg.Wait()
close(im.errors)
})
@@ -209,6 +212,10 @@ func (im *Importer) startWorker(bar *pb.ProgressBar, batchSize, significantFigur
for {
select {
case <-im.close:
for ts := range im.input {
ts = roundTimeseriesValue(ts, significantFigures, roundDigits)
batch = append(batch, ts)
}
exitErr := &ImportError{
Batch: batch,
}
@@ -217,24 +224,17 @@ func (im *Importer) startWorker(bar *pb.ProgressBar, batchSize, significantFigur
}
im.errors <- exitErr
return
case ts := <-im.input:
case ts, ok := <-im.input:
if !ok {
continue
}
// init waitForBatch when first
// value was received
if waitForBatch.IsZero() {
waitForBatch = time.Now()
}
if significantFigures > 0 {
for i, v := range ts.Values {
ts.Values[i] = decimal.RoundToSignificantFigures(v, significantFigures)
}
}
if roundDigits < 100 {
for i, v := range ts.Values {
ts.Values[i] = decimal.RoundToDecimalDigits(v, roundDigits)
}
}
ts = roundTimeseriesValue(ts, significantFigures, roundDigits)
batch = append(batch, ts)
dataPoints += len(ts.Values)
@@ -418,3 +418,18 @@ func byteCountSI(b int64) string {
return fmt.Sprintf("%.1f %cB",
float64(b)/float64(div), "kMGTPE"[exp])
}
func roundTimeseriesValue(ts *TimeSeries, significantFigures, roundDigits int) *TimeSeries {
if significantFigures > 0 {
for i, v := range ts.Values {
ts.Values[i] = decimal.RoundToSignificantFigures(v, significantFigures)
}
}
if roundDigits < 100 {
for i, v := range ts.Values {
ts.Values[i] = decimal.RoundToDecimalDigits(v, roundDigits)
}
}
return ts
}

View File

@@ -2,6 +2,7 @@ package main
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
@@ -19,8 +20,9 @@ type vmNativeProcessor struct {
filter filter
rateLimit int64
dst *vmNativeClient
src *vmNativeClient
dst *vmNativeClient
src *vmNativeClient
interCluster bool
}
type vmNativeClient struct {
@@ -49,15 +51,16 @@ func (f filter) String() string {
}
const (
nativeExportAddr = "api/v1/export/native"
nativeImportAddr = "api/v1/import/native"
nativeExportAddr = "api/v1/export/native"
nativeImportAddr = "api/v1/import/native"
nativeTenantsAddr = "admin/tenants"
nativeBarTpl = `Total: {{counters . }} {{ cycle . "↖" "↗" "↘" "↙" }} Speed: {{speed . }} {{string . "suffix"}}`
)
func (p *vmNativeProcessor) run(ctx context.Context) error {
if p.filter.chunk == "" {
return p.runSingle(ctx, p.filter)
return p.runWithFilter(ctx, p.filter)
}
startOfRange, err := time.Parse(time.RFC3339, p.filter.timeStart)
@@ -89,7 +92,7 @@ func (p *vmNativeProcessor) run(ctx context.Context) error {
timeStart: formattedStartTime,
timeEnd: formattedEndTime,
}
err := p.runSingle(ctx, f)
err := p.runWithFilter(ctx, f)
if err != nil {
log.Printf("processing failed for range %d/%d: %s - %s \n", rangeIdx+1, len(ranges), formattedStartTime, formattedEndTime)
@@ -99,25 +102,52 @@ func (p *vmNativeProcessor) run(ctx context.Context) error {
return nil
}
func (p *vmNativeProcessor) runSingle(ctx context.Context, f filter) error {
pr, pw := io.Pipe()
func (p *vmNativeProcessor) runWithFilter(ctx context.Context, f filter) error {
nativeImportAddr, err := vm.AddExtraLabelsToImportPath(nativeImportAddr, p.dst.extraLabels)
log.Printf("Initing export pipe from %q with filters: %s\n", p.src.addr, f)
exportReader, err := p.exportPipe(ctx, f)
if err != nil {
return fmt.Errorf("failed to add labels to import path: %s", err)
}
if !p.interCluster {
srcURL := fmt.Sprintf("%s/%s", p.src.addr, nativeExportAddr)
dstURL := fmt.Sprintf("%s/%s", p.dst.addr, nativeImportAddr)
return p.runSingle(ctx, f, srcURL, dstURL)
}
tenants, err := p.getSourceTenants(ctx, f)
if err != nil {
return fmt.Errorf("failed to get source tenants: %s", err)
}
log.Printf("Discovered tenants: %v", tenants)
for _, tenant := range tenants {
// src and dst expected formats: http://vminsert:8480/ and http://vmselect:8481/
srcURL := fmt.Sprintf("%s/select/%s/prometheus/%s", p.src.addr, tenant, nativeExportAddr)
dstURL := fmt.Sprintf("%s/insert/%s/prometheus/%s", p.dst.addr, tenant, nativeImportAddr)
if err := p.runSingle(ctx, f, srcURL, dstURL); err != nil {
return fmt.Errorf("failed to migrate data for tenant %q: %s", tenant, err)
}
}
return nil
}
func (p *vmNativeProcessor) runSingle(ctx context.Context, f filter, srcURL, dstURL string) error {
log.Printf("Initing export pipe from %q with filters: %s\n", srcURL, f)
exportReader, err := p.exportPipe(ctx, srcURL, f)
if err != nil {
return fmt.Errorf("failed to init export pipe: %s", err)
}
nativeImportAddr, err := vm.AddExtraLabelsToImportPath(nativeImportAddr, p.dst.extraLabels)
if err != nil {
return err
}
pr, pw := io.Pipe()
sync := make(chan struct{})
go func() {
defer func() { close(sync) }()
u := fmt.Sprintf("%s/%s", p.dst.addr, nativeImportAddr)
req, err := http.NewRequestWithContext(ctx, "POST", u, pr)
req, err := http.NewRequestWithContext(ctx, "POST", dstURL, pr)
if err != nil {
log.Fatalf("cannot create import request to %q: %s", p.dst.addr, err)
}
@@ -130,7 +160,7 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f filter) error {
}
}()
fmt.Printf("Initing import process to %q:\n", p.dst.addr)
fmt.Printf("Initing import process to %q:\n", dstURL)
pool := pb.NewPool()
bar := pb.ProgressBarTemplate(nativeBarTpl).New(0)
pool.Add(bar)
@@ -166,9 +196,43 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f filter) error {
return nil
}
func (p *vmNativeProcessor) exportPipe(ctx context.Context, f filter) (io.ReadCloser, error) {
u := fmt.Sprintf("%s/%s", p.src.addr, nativeExportAddr)
func (p *vmNativeProcessor) getSourceTenants(ctx context.Context, f filter) ([]string, error) {
u := fmt.Sprintf("%s/%s", p.src.addr, nativeTenantsAddr)
req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
if err != nil {
return nil, fmt.Errorf("cannot create request to %q: %s", u, err)
}
params := req.URL.Query()
if f.timeStart != "" {
params.Set("start", f.timeStart)
}
if f.timeEnd != "" {
params.Set("end", f.timeEnd)
}
req.URL.RawQuery = params.Encode()
resp, err := p.src.do(req, http.StatusOK)
if err != nil {
return nil, fmt.Errorf("tenants request failed: %s", err)
}
var r struct {
Tenants []string `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&r); err != nil {
return nil, fmt.Errorf("cannot decode tenants response: %s", err)
}
if err := resp.Body.Close(); err != nil {
return nil, fmt.Errorf("cannot close tenants response body: %s", err)
}
return r.Tenants, nil
}
func (p *vmNativeProcessor) exportPipe(ctx context.Context, url string, f filter) (io.ReadCloser, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, fmt.Errorf("cannot create request to %q: %s", p.src.addr, err)
}

View File

@@ -171,6 +171,41 @@ curl 'http://localhost:8431/api/v1/labels' -H 'Authorization: Bearer eyJhbGciOiJ
# check rate limit
```
## JWT signature verification
`vmgateway` supports JWT signature verification.
Supported algorithms are `RS256`, `RS384`, `RS512`, `ES256`, `ES384`, `ES512`, `PS256`, `PS384`, `PS512`.
Tokens with unsupported algorithms will be rejected.
In order to enable JWT signature verification, you need to specify keys for signature verification.
The following flags are used to specify keys:
- `-auth.publicKeyFiles` - allows to pass file path to file with public key.
- `-auth.publicKeys` - allows to pass public key directly.
Note that both flags support passing multiple keys and also can be used together.
Example usage:
```console
./bin/vmgateway -eula \
-enable.auth \
-write.url=http://localhost:8480 \
-read.url=http://localhost:8481 \
-auth.publicKeyFiles=public_key.pem \
-auth.publicKeyFiles=public_key2.pem \
-auth.publicKeys=`-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAu1SU1LfVLPHCozMxH2Mo
4lgOEePzNm0tRgeLezV6ffAt0gunVTLw7onLRnrq0/IzW7yWR7QkrmBL7jTKEn5u
+qKhbwKfBstIs+bMY2Zkp18gnTxKLxoS2tFczGkPLPgizskuemMghRniWaoLcyeh
kd3qqGElvW/VDL5AaWTg0nLVkjRo9z+40RQzuVaE8AkAFmxZzow3x+VJYKdjykkJ
0iT9wCS0DRTXu269V264Vf/3jvredZiKRkgwlL9xNAwxXFg0x/XFw005UWVRIkdg
cKWTjpBP2dPwVZ4WWC+9aGVd+Gyn1o0CLelf4rEjGoXbAAEgAqeGUxrcIlbjXfbc
mwIDAQAB
-----END PUBLIC KEY-----
`
```
This command will result in 3 keys loaded: 2 keys from files and 1 from command line.
## Configuration
The shortlist of configuration flags include the following:
@@ -178,6 +213,12 @@ The shortlist of configuration flags include the following:
```console
-auth.httpHeader string
HTTP header name to look for JWT authorization token (default "Authorization")
-auth.publicKeyFiles array
Path file with public key to verify JWT token signature
Supports an array of values separated by comma or specified via multiple flags.
-auth.publicKeys array
Public keys to verify JWT token signature
Supports an array of values separated by comma or specified via multiple flags.
-clusterMode
enable this for the cluster version
-datasource.appendTypePrefix
@@ -229,7 +270,7 @@ The shortlist of configuration flags include the following:
-datasource.tlsServerName string
Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used
-datasource.url string
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also '-datasource.disablePathAppend', '-datasource.showURL'.
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL
-enable.auth
enables auth with jwt token
-enable.rateLimit
@@ -263,13 +304,19 @@ The shortlist of configuration flags include the following:
-httpAuth.username string
Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
-httpListenAddr string
TCP address to listen for http connections (default ":8431")
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8431")
-httpListenAddr.useProxyProtocol
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
-internStringMaxLen int
The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 300)
-loggerDisableTimestamps
Whether to disable writing timestamps in logs
-loggerErrorsPerSecondLimit int
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
-loggerFormat string
Format for logs. Possible values: default, json (default "default")
-loggerJSONFields string
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerOutput string
@@ -280,7 +327,7 @@ The shortlist of configuration flags include the following:
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-memory.allowedBytes size
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
-memory.allowedPercent float
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
-metricsAuthKey string
@@ -336,7 +383,7 @@ The shortlist of configuration flags include the following:
## Limitations
* Access Control:
* `jwt` token must be validated by external system, currently `vmgateway` can't validate the signature.
* `jwt` token signature verification for `HMAC` algorithms is not supported.
* RateLimiting:
* limits applied based on queries to `datasource.url`
* only cluster version can be rate-limited.

View File

@@ -19,7 +19,10 @@ type InsertCtx struct {
mrs []storage.MetricRow
metricNamesBuf []byte
relabelCtx relabel.Ctx
relabelCtx relabel.Ctx
streamAggrCtx streamAggrCtx
skipStreamAggr bool
}
// Reset resets ctx for future fill with rowsLen rows.
@@ -42,6 +45,8 @@ func (ctx *InsertCtx) Reset(rowsLen int) {
ctx.mrs = ctx.mrs[:0]
ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
ctx.relabelCtx.Reset()
ctx.streamAggrCtx.Reset()
ctx.skipStreamAggr = false
}
func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
@@ -132,6 +137,16 @@ func (ctx *InsertCtx) ApplyRelabeling() {
// FlushBufs flushes buffered rows to the underlying storage.
func (ctx *InsertCtx) FlushBufs() error {
if sa != nil && !ctx.skipStreamAggr {
ctx.streamAggrCtx.push(ctx.mrs)
if !*streamAggrKeepInput {
ctx.Reset(0)
return nil
}
}
// There is no need in limiting the number of concurrent calls to vmstorage.AddRows() here,
// since the number of concurrent FlushBufs() calls should be already limited via writeconcurrencylimiter
// used at every ParseStream() call under lib/protoparser/*/streamparser.go
err := vmstorage.AddRows(ctx.mrs)
ctx.Reset(0)
if err == nil {

View File

@@ -0,0 +1,121 @@
package common
import (
"flag"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
)
var (
streamAggrConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/stream-aggregation.html . "+
"See also -remoteWrite.streamAggr.keepInput and -streamAggr.dedupInterval")
streamAggrKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples after the aggregation with -streamAggr.config. "+
"By default the input is dropped after the aggregation, so only the aggregate data is stored. "+
"See https://docs.victoriametrics.com/stream-aggregation.html")
streamAggrDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before being aggregated. "+
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
)
// InitStreamAggr must be called after flag.Parse and before using the common package.
//
// MustStopStreamAggr must be called when stream aggr is no longer needed.
func InitStreamAggr() {
if *streamAggrConfig == "" {
// Nothing to initialize
return
}
a, err := streamaggr.LoadFromFile(*streamAggrConfig, pushAggregateSeries, *streamAggrDedupInterval)
if err != nil {
logger.Fatalf("cannot load -streamAggr.config=%q: %s", *streamAggrConfig, err)
}
sa = a
}
// MustStopStreamAggr stops stream aggregators.
func MustStopStreamAggr() {
sa.MustStop()
sa = nil
}
var sa *streamaggr.Aggregators
type streamAggrCtx struct {
mn storage.MetricName
tss [1]prompbmarshal.TimeSeries
}
func (ctx *streamAggrCtx) Reset() {
ctx.mn.Reset()
ts := &ctx.tss[0]
promrelabel.CleanLabels(ts.Labels)
}
func (ctx *streamAggrCtx) push(mrs []storage.MetricRow) {
mn := &ctx.mn
tss := ctx.tss[:]
ts := &tss[0]
labels := ts.Labels
samples := ts.Samples
for _, mr := range mrs {
if err := mn.UnmarshalRaw(mr.MetricNameRaw); err != nil {
logger.Panicf("BUG: cannot unmarshal recently marshaled MetricName: %s", err)
}
labels = append(labels[:0], prompbmarshal.Label{
Name: "__name__",
Value: bytesutil.ToUnsafeString(mn.MetricGroup),
})
for _, tag := range mn.Tags {
labels = append(labels, prompbmarshal.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
samples = append(samples[:0], prompbmarshal.Sample{
Timestamp: mr.Timestamp,
Value: mr.Value,
})
ts.Labels = labels
ts.Samples = samples
sa.Push(tss)
}
}
func pushAggregateSeries(tss []prompbmarshal.TimeSeries) {
currentTimestamp := int64(fasttime.UnixTimestamp()) * 1000
var ctx InsertCtx
ctx.Reset(len(tss))
ctx.skipStreamAggr = true
for _, ts := range tss {
labels := ts.Labels
for _, label := range labels {
name := label.Name
if name == "__name__" {
name = ""
}
ctx.AddLabel(name, label.Value)
}
value := ts.Samples[0].Value
if err := ctx.WriteDataPoint(nil, ctx.Labels, currentTimestamp, value); err != nil {
logger.Errorf("cannot store aggregate series: %s", err)
// Do not continue pushing the remaining samples, since it is likely they will return the same error.
return
}
}
// There is no need in limiting the number of concurrent calls to vmstorage.AddRows() here,
// since the number of concurrent pushAggregateSeries() calls should be already limited by lib/streamaggr.
if err := vmstorage.AddRows(ctx.mrs); err != nil {
logger.Errorf("cannot flush aggregate series: %s", err)
}
}

Some files were not shown because too many files have changed in this diff Show More