Compare commits

...

565 Commits

Author SHA1 Message Date
Nikolay
08ff9be0e4 app/vminsert: add clusternative.tls configuration flags
Previously, it was not possible to configure mTLS between multi-level
vminserts. But vmselect supported such feature. It was a configuration
discrepancy.

This commit adds the same flags to the vminsert.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10958
2026-05-20 12:38:13 +02:00
Stephan Burns
47a3124d70 docs/enterprise.md: add clarification for when license expires (#10956)
Many people are concerned about what happens when the license expires on
a VictoriaMetrics component. This PR aims to address those concerns.

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10956

---------

Signed-off-by: Stephan Burns <34520077+Sleuth56@users.noreply.github.com>
Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Pablo Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-05-19 17:52:13 +03:00
Rudransh Shrivastava
50c11dd7d9 .github: pin actions to full-length commit SHAs (#10953)
Pin GitHub actions to their full-length commit SHAs.
Semver tags were updated to be more precise: e.g. `v7` to `v7.0.0`

---------

Signed-off-by: Rudransh Shrivastava <rudransh@victoriametrics.com>
2026-05-19 17:34:33 +03:00
Victoria Nduka
ddfdc9cd9d docs/quick-start.md: fix punctuation and grammar in Quick-Start.md (#10957)
Fix punctuation and grammar in Quick-Start.md

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10957

Signed-off-by: Victoria Nduka <122698422+nwanduka@users.noreply.github.com>
2026-05-19 15:01:17 +03:00
Phuong Le
01aa986418 vmagent/remotewrite: mark remoteWrite.proxyURL as secret
This change marks `-remoteWrite.proxyURL` as a secret flag in vmagent
remotewrite, similarly to other sensitive remote write flags.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10971/
2026-05-19 13:48:31 +02:00
f41gh7
e7ae4c81f3 app/vmselect: hide vmalert.proxyURL as secret flag
follow-up for cc45a139db

This commit registers vmselect and vminsert `vmalert.proxyURL` flags as a secret flag in the
same way as vmagent does.
2026-05-19 13:28:11 +02:00
f41gh7
358c29ea77 app/vmalert: hide *.headers as secret flags
follow-up for 33d8e02ea8

This commit registers vmalert `*.headers` flags as a secret flags in the
same way as vmagent does.
2026-05-19 13:26:17 +02:00
Pablo (Tomas) Fernandez
d40182360e docs/vmagent.md: grammar and proofread pass for docs; structurize multitenancy doc for vmagent (#10946)
This is a grammar pass and proofread for
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10943

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
2026-05-19 14:24:43 +03:00
Max Kotliar
2e654ac0e9 app/vmagent: drain in-memory blocks to rw first during shutdown, fallback to persited queue after (#10932)
vmagent would try to flush in-memory blocks to rw for the duration of graceful shutdown (5 seconds) only
after falling back and storing them in the persisted queue.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9996
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10932
2026-05-19 12:43:45 +03:00
Max Kotliar
15ddcf2db4 docs/changelog: add links to the issues\prs 2026-05-18 19:33:16 +03:00
Max Kotliar
99abaea87e docs/changelog: fix link to the issue
follow-up on
e613c3fd6b
2026-05-18 19:23:08 +03:00
Max Kotliar
65240b9fec app/vmagent: fix sharding correctness when disableOnDiskQueue is set (#10947)
When -remoteWrite.shardByURL is enabled, and one of the remote write
targets has -remoteWrite.disableOnDiskQueue set becomes blocked, samples
could be rerouted to other shards (see `getEligibleRemoteWriteCtxs` impl), breaking the sharding guarantee. Fix this by always using `rwctxsGlobal` in sharding mode.

Add a startup check that requires `-remoteWrite.disableOnDiskQueue` to be
configured uniformly across all targets when -remoteWrite.shardByURL` is enabled.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10507
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10947
2026-05-18 14:46:47 +03:00
Immanuel Tikhonov
be74090554 chore: use %s instead of %w for error formatting in logger calls
Previously, errors in app/vmalert-tool and lib packages used the %w verb
in logger.Errorf calls, which is intended for wrapping errors via fmt.Errorf.
Using %w with the logger package does not wrap the error — instead, it prints
a malformed %!w(...) placeholder rather than the actual error message.

This commit replaces all affected occurrences of %w with %s to correctly
format and display errors.

Related PR: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10962
2026-05-18 13:14:58 +02:00
Phuong Le
0a549b1f4c vmagent/remotewrite: hide -remoteWrite.headers as a secret flag
This registers `remoteWrite.headers` in `InitSecretFlags()` so it is
masked by the existing secret-flag.

Without this, values passed via `-remoteWrite.headers` are exposed in
startup flag logs, /metrics, and /flags, because these paths only redact
flags recognized by `flagutil.IsSecretFlag()`.

The change keeps the existing `-remoteWrite.showURL` behavior for
`remoteWrite.url`, while always treating `-remoteWrite.headers` as
secret.
2026-05-18 12:43:04 +02:00
f41gh7
4bd1515d50 app/vmstorage: properly init vminsert server mtls connection
Release v1.130 added a regression into enterprise vmstorage version.
Server configuration for vminsert listener was initialized without mtls
configuration args. It made impossible vminsert to vmstorage mtls
connection.

 This commit fixes regression and adds a integration tests to verify
 it.

Related to https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10958
2026-05-18 12:17:35 +02:00
Fred Navruzov
e5f6c95cff docs/vmanomaly: release v1.29.4 (#10963)
Update anomaly detection docs to release `v1.29.4`
2026-05-16 09:55:29 +03:00
Aliaksandr Valialkin
94cd662f6b docs/victoriametrics/Articles.md: return back a link to the Monitoring K8s with Victoriametrics talk from ZERODHA
The outdated link to the slides for this talk has been dropped in the commit f0a147fdf7 .
The video recording for the talk is still available at YouTube ( https://www.youtube.com/watch?v=ZJQYW-cFOms ),
so put it to the articles page.
2026-05-14 16:57:22 +02:00
Aliaksandr Valialkin
d3e33f5cae docs/victoriametrics/enterprise.md: mention about VictoriaTraces
Enterprise version of VictoriaTraces isn't available yet, but it is better to mention it
at the https://docs.victoriametrics.com/victoriametrics/enterprise/ page for the sake of consistency.

While at it, consistently use absolute links, even if they point to the same document.
This simplifies moving the text between docs without breaking the links.
2026-05-14 16:38:55 +02:00
Aliaksandr Valialkin
29eaf687c5 docs/victoriametrics/goals.md: clarify the development goals a bit 2026-05-14 16:38:53 +02:00
Roman Khavronenko
de3e79aba6 docs: structurize multitenancy doc for vmagent (#10943)
This change should clearly distinguish different multitnenacy scenarios
for vmagent. It is expected to be easier to read and follow for users.

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Pablo Fernandez <46322567+TomFern@users.noreply.github.com>
(cherry picked from commit 5f5a2109e8)
2026-05-13 13:08:13 +02:00
June
7eba836ec2 app/{vmalert,vmagent}: add basicAuth.usernameFile CLI flags
The core `lib/promauth` already supports `usernameFile`
configs, but the CLI flags for vmagent remotewrite and vmalert
datasource/remotewrite/remoteread/notifier only expose
`basicAuth.username`.

This commit adds the corresponding `basicAuth.usernameFile` flags to match
the existing `basicAuth.passwordFile` pattern, closing the gap between
YAML and CLI configuration.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9436
2026-05-12 22:58:39 +02:00
Zhu Jiekun
9ed21f1e17 lib/memory: adds memory.allowedBytes warning message
This commit adds a warning message, if `-memory.allowedBytes` has value less than 1MB.

 It should help to debug possible issues, if there is a problem with app start-up due to low memory limit.

 For example, fastcache could panic at `-memory.allowedBytes=`

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10935
2026-05-12 22:58:39 +02:00
Hui Wang
74c0b492ba app/vmalert: support sending data to -remoteWrite.url via zstd
In most cases, vmalert is configured to write to vm components like
vminsert or vmagent, using VictoriaMetrics remote write protocol can
save network bandwidth.
The VictoriaMetrics remote write protocol is used by default, and the
protocol is downgraded from VictoriaMetrics to Prometheus remote write
if one request fails with protocol error.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10929
2026-05-12 22:58:38 +02:00
Max Kotliar
0fed267f9a docs: use git worktrees instead of checkout for updating flags in docs
Replace the pattern of `git checkout <tag> && make <binary>` with `git
worktree add /tmp/vm-* <tag>` so that flag updates no longer switch the
working tree of the current repository. Each variant (opensource,
enterprise, cluster) gets its own worktree, removing the need to restore
the original branch between steps.

Also normalize dynamic default values in vmctl prometheus flags
(-prom-tmp-dir-path) to `os.TempDir()` to reduce noisy diffs caused by
machine-specific temp paths.
2026-05-12 19:39:44 +03:00
Max Kotliar
79769e2300 app/vmui: improve Top Queries table UX (#10790)
- Add tooltip support to column headers with info icons, explaining what
each column is for.
- Format duration columns using humanizeSeconds instead of raw seconds
- Format memory column with human-readable units (B/KB/MB/GB/TB)
- Shorten column titles ("sum duration, sec" → "duration", "query time
interval" → "range", "avg memory usage, bytes" → "memory")
- Show "instant" for queries with no time range instead of empty value

Before:
<img width="1512" height="863" alt="Screenshot 2026-05-11 at 21 28 49"
src="https://github.com/user-attachments/assets/4e4dc67c-d121-4ecc-974f-3e1e9e28f3b7"
/>

After:
<img width="1512" height="862" alt="Screenshot 2026-05-11 at 21 28 21"
src="https://github.com/user-attachments/assets/89b21e58-a2c4-44d4-8806-a72e9f1555f3"
/>

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10790

---------

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Yury Moladau <yurymolodov@gmail.com>
2026-05-12 18:58:03 +03:00
Uğur Tafralı
0604077b5c lib/backup/fslocal: remove traling slash in provided directory (#10825)
Trailing slash in -storageDataPath was causing vmrestore to panic. The fix calls filepath.Clean() in Init() to normalize the path. Added a test to verify ListParts works correctly with a trailing slash.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10823
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10825

---------

Signed-off-by: JAYICE <jayice.zhou@qq.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-05-12 18:12:11 +03:00
Max Kotliar
8acfd53e53 docs: rename invalid -snapshot.createUrl flag to -snapshot.createURL
I copy pasted the flag from the doc and it did not work. Turned out that
the flag has to be -snaphsot.createURL.
2026-05-12 18:09:04 +03:00
Artem Fetishev
a52a0ddf2e Refactor apptests (#10940)
Fixes #10938.

---------

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-05-12 16:26:15 +02:00
Max Kotliar
57924f4167 app/vmagent: fix rare hash collision in getLabelsHash (#10937)
Add '=' separator between label name and value when computing the hash
to prevent false collisions, like {a="bc"} and {ab="c"} hashing to the
same value.

getLabelsHashForShard is added to avoid sharding disruptions in vmagent
(-remoteWrite.shardByURL=true mode). The function preserves previous
behavior, without '=' between name and value.

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10937
2026-05-12 15:43:09 +03:00
andriibeee
8948cd7738 lib/protoparser: add flag to allow OpenTelemetry underscore labels to pass through without being prefixed (#10475)
Add `-opentelemetry.labelNameUnderscoreSanitization` command-line flag to control whether to enable prepending of `key` to labels starting with `_` when `-opentelemetry.usePrometheusNaming` is enabled. The labels starting with `__` are not modified.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9663

Signed-off-by: andriibeee <154226341+andriibeee@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-05-12 15:18:02 +03:00
Max Kotliar
33373f85a9 .github: follow-up on prev commit abff93cf53 2026-05-12 14:46:24 +03:00
Max Kotliar
0fc863b991 .github: add netbsd build (#10941) 2026-05-12 14:43:10 +03:00
Max Kotliar
083a78091f docs/vmauth: use canonical link 2026-05-11 14:45:52 +03:00
Andrii Chubatiuk
df1f58b017 lib/streamaggr: use max samples lag for flush delay calculation (#10835)
### Describe Your Changes

fixes #10402

use max sample lag for flush delay calculation when aggregation windows
enabled. before 95th percentile of samples lag was used, which led to
dropped data

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit e7c46a0f4c)
2026-05-11 13:25:05 +02:00
Alexei Margasov
29a5c914c8 lib/streamaggr: fix stale quantiles output (#10918)
### Describe Your Changes

Fix stale `quantiles(...)` stream aggregation output for series without
samples in the current aggregation interval.

Previously, `quantilesAggrConfig` reused the `quantiles` buffer across
aggregation values. If `quantilesAggrValue.flush` was called for a
series without samples after another series had already calculated
quantiles, the stale quantile
values could be emitted for the empty series.

This could produce unrealistic `*_quantiles` output values and make the
same aggregated value appear across unrelated labelsets.

The PR skips `quantiles(...)` output when there is no histogram for the
current interval and adds a regression test for this case.

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Co-authored-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 20d4314168)
2026-05-11 13:25:05 +02:00
Roman Khavronenko
221c88797b lib/streamaggr: update sync tests (#10939)
synctest runs inner closure in a new goroutine, which makes `t.Helper` instruction
useless on `t.Fatalf` checks. So when test fails we observe the log line where `t.Fatalf`
was called, instead of where `f()` was called.

Moving checks out of synctest closure makes `t.Helper` useful again.

--

In the synctest we were waiting for ingest a new batch of samples for aggregation interval.
Because of this, the new batch had 50% chance to be ingested in the previous or current
aggregation interval, depending on whether go run time initiated flush() call or no.

This change waits for additional 1ms for flush to happen. Locally, it stopped producing
flaky tests.
---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit b30c307bbb)
2026-05-11 13:25:04 +02:00
f41gh7
001d93cf29 docs: update flags with actual v1.143.0 binaries
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-05-11 11:21:54 +02:00
f41gh7
487ac00e95 docs: bump version to v1.143.0
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-05-11 11:20:12 +02:00
f41gh7
0de6957062 deplyoment/docker: bump version to v1.143.0
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-05-11 11:19:41 +02:00
f41gh7
9f576dbd2c docs: forward port LTS v1.122.22 changelog to upstream
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-05-11 11:14:32 +02:00
f41gh7
ddf35f69bb docs: forward port LTS v1.136.9 changelog to upstream
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-05-11 11:13:59 +02:00
f41gh7
f44e6cda5d docs/changelog: cut release v1.143.0
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-05-08 13:46:04 +02:00
f41gh7
3701870855 docs: update version to v1.143.0
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-05-08 13:45:23 +02:00
f41gh7
0a74ffee1e make vmui-update 2026-05-08 13:38:40 +02:00
f41gh7
1e56467e01 vendor: update github.com/prometheus/prometheus 2026-05-08 13:38:40 +02:00
Nikolay
90695f3efa app/vmselect/searchutil: prioritize URL query params over form values
When a request contains both URL path query params and POST form values
for extra_label and extra_filters[], URL query params now take
precedence. This resolves the conflict between the two sources and
simplifies security enforcement for extra_label/extra_filters policies
via vmauth or any other http proxy.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10908
2026-05-08 10:12:47 +02:00
Max Kotliar
af0e1456a1 docs/integrations: add available_from placeholder for native histogram feature
Follow up on
76e0bcdf45
2026-05-08 10:57:25 +03:00
f41gh7
fab4726509 deployment/docker: update Go builder from Go1.26.2 to Go1.26.3
See https://github.com/golang/go/issues?q=milestone%3AGo1.26.3%20label%3ACherryPickApproved
2026-05-08 09:31:44 +02:00
f41gh7
cd6e1987e8 lib/fs: introduce new metric for Filesystem type name
This commit introduces a new metric to expose fs type for the provided path.

 For example:
```
vm_fs_info{path="/vmstorage-data", fs_type="xfs"}
```

 Path must be registered with new method `fs.RegisterPathFsMetrics`.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482
2026-05-08 09:22:42 +02:00
Max Kotliar
bedf1a039e docs/changelog: fix order - first features than bugs.
For some reason bugs were the first.
2026-05-07 21:17:05 +03:00
Kirill Yurkov
f72c449f61 dsahboards: Add Kafka (Enterprise) row to vmagent dashboard (#10728)
Add a new `Kafka (Enterprise)` row to both vmagent dashboards:

- `dashboards/vmagent.json`
- `dashboards/vm/vmagent.json`

The row is placed before `Drilldown` and contains three Kafka-specific
panels:

- `Kafka bytes`
- `Kafka messages in/out`
- `Kafka and consumer errors`

The goal is to provide a compact Kafka-focused view for enterprise
vmagent deployments without duplicating the existing generic remote
write panels such as connection saturation and persistent queue size.

The new row helps distinguish:

- producer vs consumer throughput at the Kafka topic level
- message-rate shifts that may indicate smaller Kafka payloads and
higher per-message overhead
- producer-side Kafka errors vs consumer-side Kafka errors

Descriptions include links to the relevant Kafka documentation sections.

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10728

---------

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-05-07 21:15:39 +03:00
andriibeee
1f451603f7 app/vmselect: set CORS headers on /api/v1/export endpoints (#10900)
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10899
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10900

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-05-07 21:05:05 +03:00
JAYICE
73eefc3c4c app/vmauth: pick first backend to process request when all backends are unavailable (#10886)
The commit restores the previous behavior where the first backend is still selected and the request is sent to it. This behavior existed before commit 9c36f0931a, but was later changed to return no backends. Hence, vmauth would reject all requests for the next 3s if all backends are unavailable. In some rare cases, it leads to an increase in error responses. 

The commit restores the original behavior, adds comments explaining why it is important, and introduces tests covering the logic.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10837
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10886

---------

Signed-off-by: JAYICE <1185430411@qq.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
Co-authored-by: Hui Wang <haley@victoriametrics.com>
2026-05-07 20:45:48 +03:00
Hui Wang
8a6e8a95f0 lib/prompb: support prometheus native histogram during ingestion
This commit adds support for Prometheus Native Histogram https://prometheus.io/docs/specs/native_histograms data ingestion via Prometheus RemoteWrite format. It converts Native Histograms into VictoriaMetrics histogram format.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10743
2026-05-07 19:07:24 +02:00
dependabot[bot]
f75c59307c build(deps): bump github/codeql-action from 4.35.1 to 4.35.2 (#10921)
Bumps [github/codeql-action](https://github.com/github/codeql-action)
from 4.35.1 to 4.35.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/github/codeql-action/releases">github/codeql-action's
releases</a>.</em></p>
<blockquote>
<h2>v4.35.2</h2>
<ul>
<li>The undocumented TRAP cache cleanup feature that could be enabled
using the <code>CODEQL_ACTION_CLEANUP_TRAP_CACHES</code> environment
variable is deprecated and will be removed in May 2026. If you are
affected by this, we recommend disabling TRAP caching by passing the
<code>trap-caching: false</code> input to the <code>init</code> Action.
<a
href="https://redirect.github.com/github/codeql-action/pull/3795">#3795</a></li>
<li>The Git version 2.36.0 requirement for improved incremental analysis
now only applies to repositories that contain submodules. <a
href="https://redirect.github.com/github/codeql-action/pull/3789">#3789</a></li>
<li>Python analysis on GHES no longer extracts the standard library,
relying instead on models of the standard library. This should result in
significantly faster extraction and analysis times, while the effect on
alerts should be minimal. <a
href="https://redirect.github.com/github/codeql-action/pull/3794">#3794</a></li>
<li>Fixed a bug in the validation of OIDC configurations for private
registries that was added in CodeQL Action 4.33.0 / 3.33.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3807">#3807</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.2">2.25.2</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3823">#3823</a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/github/codeql-action/blob/main/CHANGELOG.md">github/codeql-action's
changelog</a>.</em></p>
<blockquote>
<h2>4.35.2 - 15 Apr 2026</h2>
<ul>
<li>The undocumented TRAP cache cleanup feature that could be enabled
using the <code>CODEQL_ACTION_CLEANUP_TRAP_CACHES</code> environment
variable is deprecated and will be removed in May 2026. If you are
affected by this, we recommend disabling TRAP caching by passing the
<code>trap-caching: false</code> input to the <code>init</code> Action.
<a
href="https://redirect.github.com/github/codeql-action/pull/3795">#3795</a></li>
<li>The Git version 2.36.0 requirement for improved incremental analysis
now only applies to repositories that contain submodules. <a
href="https://redirect.github.com/github/codeql-action/pull/3789">#3789</a></li>
<li>Python analysis on GHES no longer extracts the standard library,
relying instead on models of the standard library. This should result in
significantly faster extraction and analysis times, while the effect on
alerts should be minimal. <a
href="https://redirect.github.com/github/codeql-action/pull/3794">#3794</a></li>
<li>Fixed a bug in the validation of OIDC configurations for private
registries that was added in CodeQL Action 4.33.0 / 3.33.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3807">#3807</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.2">2.25.2</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3823">#3823</a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="95e58e9a2c"><code>95e58e9</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3824">#3824</a>
from github/update-v4.35.2-d2e135a73</li>
<li><a
href="6f31bfe060"><code>6f31bfe</code></a>
Update changelog for v4.35.2</li>
<li><a
href="d2e135a73a"><code>d2e135a</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3823">#3823</a>
from github/update-bundle/codeql-bundle-v2.25.2</li>
<li><a
href="60abb65df0"><code>60abb65</code></a>
Add changelog note</li>
<li><a
href="5a0a562209"><code>5a0a562</code></a>
Update default bundle to codeql-bundle-v2.25.2</li>
<li><a
href="65216971a1"><code>6521697</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3820">#3820</a>
from github/dependabot/github_actions/dot-github/wor...</li>
<li><a
href="3c45af2dd2"><code>3c45af2</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3821">#3821</a>
from github/dependabot/npm_and_yarn/npm-minor-345b93...</li>
<li><a
href="f1c339364c"><code>f1c3393</code></a>
Rebuild</li>
<li><a
href="1024fc496c"><code>1024fc4</code></a>
Rebuild</li>
<li><a
href="9dd4cfed96"><code>9dd4cfe</code></a>
Bump the npm-minor group across 1 directory with 6 updates</li>
<li>Additional commits viewable in <a
href="https://github.com/github/codeql-action/compare/v4.35.1...v4.35.2">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github/codeql-action&package-manager=github_actions&previous-version=4.35.1&new-version=4.35.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-05-07 16:27:05 +03:00
dependabot[bot]
9ab629b7b5 build(deps): bump marked from 18.0.0 to 18.0.2 in /app/vmui/packages/vmui (#10904)
Bumps [marked](https://github.com/markedjs/marked) from 18.0.0 to
18.0.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/markedjs/marked/releases">marked's
releases</a>.</em></p>
<blockquote>
<h2>v18.0.2</h2>
<h2><a
href="https://github.com/markedjs/marked/compare/v18.0.1...v18.0.2">18.0.2</a>
(2026-04-18)</h2>
<h3>Bug Fixes</h3>
<ul>
<li>fix infinite loop for indented code blank line (<a
href="https://redirect.github.com/markedjs/marked/issues/3947">#3947</a>)
(<a
href="58a52e8a49">58a52e8</a>)</li>
</ul>
<h2>v18.0.1</h2>
<h2><a
href="https://github.com/markedjs/marked/compare/v18.0.0...v18.0.1">18.0.1</a>
(2026-04-17)</h2>
<h3>Bug Fixes</h3>
<ul>
<li><strong>rules:</strong> ensure lookbehind regex is evaluated
correctly by minifiers (<a
href="https://redirect.github.com/markedjs/marked/issues/3945">#3945</a>)
(<a
href="abd907aab5">abd907a</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="c4f4529d69"><code>c4f4529</code></a>
chore(release): 18.0.2 [skip ci]</li>
<li><a
href="58a52e8a49"><code>58a52e8</code></a>
fix: fix infinite loop for indented code blank line (<a
href="https://redirect.github.com/markedjs/marked/issues/3947">#3947</a>)</li>
<li><a
href="98b38246c0"><code>98b3824</code></a>
chore(release): 18.0.1 [skip ci]</li>
<li><a
href="abd907aab5"><code>abd907a</code></a>
fix(rules): ensure lookbehind regex is evaluated correctly by minifiers
(<a
href="https://redirect.github.com/markedjs/marked/issues/3945">#3945</a>)</li>
<li><a
href="96351c4a22"><code>96351c4</code></a>
chore(deps-dev): bump marked-highlight from 2.2.3 to 2.2.4 (<a
href="https://redirect.github.com/markedjs/marked/issues/3946">#3946</a>)</li>
<li><a
href="c1326994ed"><code>c132699</code></a>
chore: update testutils (<a
href="https://redirect.github.com/markedjs/marked/issues/3942">#3942</a>)</li>
<li>See full diff in <a
href="https://github.com/markedjs/marked/compare/v18.0.0...v18.0.2">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=marked&package-manager=npm_and_yarn&previous-version=18.0.0&new-version=18.0.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/VictoriaMetrics/VictoriaMetrics/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-05-07 16:27:04 +03:00
JAYICE
20b9c5c220 lib/backup: explicitly use MD5 checksum header in S3 DeleteObjects requests (#1038)
The change improves compatibility with 3rd party S3 implementations. MD5 had been a default checksum method for a long time, but in v1.73.0 it was changed to CRC by AWS. Some implementations do not support CRC, such as Dell ECS.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10907
PR https://github.com/VictoriaMetrics/VictoriaMetrics-enterprise/pull/1038

---------

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-05-07 14:51:46 +03:00
Roman Khavronenko
b952f3c3de apptest/vmaget: add helper for creating vmagent instance with low flush interval (#10925)
This change introduces a helper `MustStartDefaultRWVmagent` that by
default sets `-remoteWrite.flushInterval=50ms`. This helper makes it
easier to setup RW tests as all of them rely on frequent flushes. So
instead of overloading the flag, we can use dedicated helper for that.

This helper was added after newly added RW test became flaky because it
didn't have `-remoteWrite.flushInterval=50ms` set.

---------

Failing test
https://github.com/VictoriaMetrics/VictoriaMetrics/actions/runs/25446725004/job/74769752869#step:5:71

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-05-07 14:19:32 +03:00
andriibeee
a608e3395d app/vmauth: honor -maxRequestBodySizeToRetry independently of -requestBufferSize (#10882)
This PR makes vmauth honor `-maxRequestBodySizeToRetry` regardless of `-requestBufferSize`. Previously the larger of the two was used, so the retry could not be disabled by setting `-maxRequestBodySizeToRetry=0`, `-requestBufferSize` has to be set to zero too. 

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10857
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10882

---------

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-05-07 13:43:14 +03:00
hagen1778
d970aa6224 docs/articles: add "Creating Kubernetes debugging AI Agent for VictoriaMetrics"
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit ee8bb76808)
2026-05-06 20:44:16 +02:00
hagen1778
e364b788ac docs/articles: merge article and video links into one option
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 0554c35d45)
2026-05-06 20:44:16 +02:00
hagen1778
4acbd176c6 docs/articles: update link that was moved from datanami
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit dd72d3492d)
2026-05-06 20:44:16 +02:00
hagen1778
460930d749 docs/articles: drop dead link
Original link can't be found anywhere else, so dropping it.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit f0a147fdf7)
2026-05-06 20:44:16 +02:00
Nikolay
354abbeaf8 apptests: add opentemetry protocol integration tests 2026-05-06 18:06:46 +02:00
Roman Khavronenko
e0793a4e9c lib/httpserver: support multitnenacy via headers
This commit adds possibility to omit tenantID in the URL path. In this case,
tenantID will be fetched from HTTP headers `AccountID` and `ProjectID`.
If headers are missing too, then default `0:0` tenantID is used.

This functionality can be enabled only if -enableMultitenantHandlers
cmd-line flag was set to vminsert, vmselect or vmagent.

Motivation: this change makes VM configuration for multienancy
consistent with VL configuration - see
https://docs.victoriametrics.com/victorialogs/#multitenancy. And keeps
backward compatibility in the same time.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4241
2026-05-06 17:42:38 +02:00
Roman Khavronenko
eb0fd177c5 docs/vmalert: print templates content in a raw format (#10912)
Before, some of the template examples were wrongly renderred by hugo.
For example:
```
http://vm-grafana.com/<dashboard-id>?viewPanel=<panel-id>&from={{($activeAt.Add (parseDurationTime \"-1h\")).UnixMilli}}&to={{($activeAt.Add (parseDurationTime \"1h\")).UnixMilli}}
```
was renderred like:
```
http://vm-grafana.com/ ?viewPanel=&from={{($activeAt.Add (parseDurationTime "-1h")).UnixMilli}}&to={{($activeAt.Add (parseDurationTime "1h")).UnixMilli}}
```

Wrapping examples in ` helps to render them raw.
While there, also fixed some examples.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 8fa785bb64)
2026-05-06 17:02:22 +02:00
hagen1778
14dee1db14 docs: rm duplicated article
https://medium.com/airbnb-engineering/building-a-high-volume-metrics-pipeline-with-opentelemetry-and-vmagent-c714d6910b45 was already mentioned before
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 6bddb233f7)
2026-05-06 17:02:22 +02:00
hagen1778
0f2bc753f5 docs: add link to https://docs.victoriametrics.com/guides/
Mention https://docs.victoriametrics.com/guides/ in the Articles/guides.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 4bb874df1c)
2026-05-06 17:02:22 +02:00
Julius Rickert
5f2945a052 lib/promscrape//etzner: update hetzner_sd_configs for Hetzner Cloud datacenter → location API change
On 2025-12-16, Hetzner Cloud deprecated the `datacenter` field in their
Servers API and introduced a top-level `location` field carrying the
same data. The `datacenter` field will be removed after 2026-07-01.
Without this change, `__meta_hetzner_hcloud_datacenter_location`, and
`__meta_hetzner_hcloud_datacenter_location_network_zone` would silently
become empty for the `hcloud` role after that date.

This mirrors the change made in Prometheus v3.11.0
([prometheus/prometheus#17850](https://github.com/prometheus/prometheus/pull/17850)).

## Changes

**`hcloud` role:**
- Add `HCloudLocation` struct and `Location` field on `HCloudServer`,
mapped to the new top-level `location` API field
- Emit two new canonical labels: `__meta_hetzner_hcloud_location` and
`__meta_hetzner_hcloud_location_network_zone`
- Keep the deprecated `__meta_hetzner_hcloud_datacenter_location` and
`__meta_hetzner_hcloud_datacenter_location_network_zone` labels, now
sourced from the new `location` field so they continue to work past
2026-07-01
- `__meta_hetzner_datacenter` (the datacenter name, e.g. `fsn1-dc14`) is
unaffected for this role — the datacenter name is a distinct concept
from location and is kept as-is (this will stop working starting
2026-07-01)

**`robot` role:**
- Add `__meta_hetzner_robot_datacenter` as the canonical replacement for
`__meta_hetzner_datacenter`; the old label is kept for backward
compatibility

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10909
2026-05-05 18:02:03 +02:00
Max Kotliar
46f50a3240 docs/changelog: add update note about bug in vminsert 2026-04-30 21:08:32 +03:00
Max Kotliar
1f5fc8e3e2 docs: forward port LTS v1.136.8 changelog to upstream
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-30 20:50:50 +03:00
f41gh7
87ac3a8589 docs/changelog: mention vminsert enterprise bugfix
At v1.142.0 was introduced a bug, when changes from OSS version were
 back-ported into Enterprise branch. It changed the order of storage
 nodes discovery. And resulted into:
 * overwrite of discovered storage nodes
 * duplicate of per storage node metrics

  This bug only affects enterprise vminsert version.
2026-04-30 17:16:56 +02:00
Roman Khavronenko
c9ae8f0a74 docs/vmalert: mention -rule.stripFilePath in #security (#10902)
Mention -rule.stripFilePath cmd-limne flag in security recommendations,
so users can be aware of it.

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Haley Wang <haley@victoriametrics.com>
(cherry picked from commit 6100b8ba10)
2026-04-29 20:02:28 +02:00
Roman Khavronenko
7650073ec9 docs: mention AI observability (#10903)
The change adds `AI observability` section to `AI tools` documentation.
It mentions excellent @Amper articles describing these integrations in
all details.

The doc change doesn't repeat the articles, but rather helps users to
discover them.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 403d32f57f)
2026-04-29 20:02:27 +02:00
Mathias Palmersheim
908a47d11e docs/vmalert: clarified urls for tenant option (#10898)
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10897 by clarifying what URLS should be used for `-datasource.url`, `-remoteRead.url`, and `-remoteWrite.url` when `-clusterMode` is specified.


PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10898

---------

Co-authored-by: Haley Wang <haley@victoriametrics.com>
2026-04-29 12:18:26 +03:00
Hui Wang
7439822360 docs: polish stream aggregation doc (#10896)
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10896

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-29 12:12:49 +03:00
Max Kotliar
ac82853f48 docs: update release guidance doc (#10887)
Leave only generic details about the release process in public docs.

To maintainers: 
All internal details are described in
https://github.com/VictoriaMetrics/release/blob/main/README.md. The new
document contains up-to-date release process guidance. Please refer to
it instead while preparing a new release.

An archived version of this document is available at:
https://github.com/VictoriaMetrics/release/blob/main/legacy_docs/Release-Guide.md.
2026-04-29 12:05:31 +03:00
Hui Wang
f57534089b app/vmalert: add -rule.stripFilePath flag
The flag already exists in the ENT version. We decided to expose it in
OSS and strip the path from all public places, including all
APIs(includes `/metrics`) and debug logs(it's minor info there).

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5625
2026-04-29 10:14:16 +02:00
andriibeee
8b9bfc9e29 app/vmalert: add formatTime template function
This commit adds `formatTime` template function to the vmalert. Which accepts format string and current timestamp.

{{ now | formatTime "2006-01-02T15:04:05Z07:00" }}


Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10624
2026-04-29 10:14:16 +02:00
Nikolay
efbf62e4b0 lib/httpserver: suppress TCP health check for tls connections
Previously, if `-tls` flag was provided, victoria metrics components
produced the following log error entry at health checks:

 http: TLS handshake error from 10.244.0.1:46556: EOF

Such health checks are common for many orchestration systems, such as
consul
or kubernetes. And default http server already suppresses such EOF
health checks.

 This commit adds suppression to the tls server as well.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10538
2026-04-29 10:14:15 +02:00
Max Kotliar
194bebcb06 docs: add links to telegram channels (#10894)
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10894
2026-04-28 19:23:30 +03:00
Pablo (Tomas) Fernandez
9c95db254f docs: update guide "Collecting OpenShift logs with Victoria Logs" (#10864)
# What Changed

- Updated the operator installation procedure
- Updated the commands to match the rest of the guides
- Updated screenshots
- Reordered steps to make more sense of the process
- Fixed issues in the YAML
- Tested on actual OpenShift trial instance running on AWS
- Added steps to confirm log ingestion using VMUI

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10864
2026-04-28 16:59:47 +03:00
Pablo (Tomas) Fernandez
aa2ff7822b docs: fix links in docs; refine security page (#10874)
This PR fixes several broken links and anchors in the victoriametrics
docs.

Note about links changes in FAQ.md file. The links inside the paragraph
break navigation in the right-side menu. To fix this, an explicit anchor
definition has been added. The anchor is the same as before, setting it
explsitly fixes the siebar links.

See https://github.com/VictoriaMetrics/vmdocs/issues/221 for the
up-to-date list once this PR is merged.

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10874
2026-04-28 16:58:27 +03:00
Max Kotliar
3b40c7c616 app/vmalert: fix typo in comment 2026-04-28 16:38:34 +03:00
Max Kotliar
354ce2e570 docs: Replace waiting_for_release with completed label in CONTRIBUTING.md 2026-04-28 16:37:30 +03:00
Max Kotliar
5520bfa845 docs/changelog: cleanup CHANGELOG_2025.md 2026-04-28 16:32:01 +03:00
Max Kotliar
1f1051c9cf docs: bump version to v1.142.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-28 14:06:00 +03:00
Max Kotliar
0f19991cd4 deplyoment/docker: bump version to v1.142.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-28 14:05:04 +03:00
Max Kotliar
5bf94f1737 docs: forward port LTS v1.136.7 changelog to upstream
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-28 14:02:22 +03:00
Max Kotliar
2460cf4518 docs/changelog: cut release v1.142.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-28 12:55:56 +03:00
Nikolay
1f1ed8ecd2 lib/opentelemetry: properly reset metric metadata
Previously, metricMetadata was not properly reset during parsing of
metrics. It could result into `Unit` suffix to be added from previously
parsed metric into next metric without Unit field.

  For example, metric `http_request` with `Unit` `seconds` will be
converted into `http_request_seconds` and `Unit` field hold `seconds`.
Next parsed metric `cpu_usage_ratio` has no `Unit` and it will get
previous `seconds` `Unit` -> `cpu_usage_ratio_seconds`.

 This commit adds metricMetadata reset call before parsing of next
 metric.

 Bug was introduced at 293d80910c

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10889
2026-04-28 11:18:01 +02:00
Hui Wang
714454a8b3 dashboards: polish vmauth dashboard (#10884)
See updated dashboard in
https://play-grafana.victoriametrics.com/d/nbuo5Mr4k/victoriametrics-vmauth?orgId=1&from=now-3h&to=now&timezone=browser&var-ds=P4169E866C3094E38&var-job=vmclusterlb-benchmark-vm-cluster-lts&var-instance=$__all&var-user=$__all&var-adhoc=&refresh=30s.

`Stats`:
1. `Users count`: set default value 0;
2. `Uptime`: count vmauth instances per job instead of showing instance
uptime, to be consistent with other dashboards. The actual uptime is not
very useful and is hard to read.

`Overview`:
1. Reorder panels;
2. `Requests rejected rate`: add a `>0` threshold in query.

`Troubleshooting`:
1. Remove unused `Restarts` panel;
2. `Logging rate`: add a `>0` threshold in query;
3. Add `Requests backend error rate` to show underlying backend errors
in addition to request errors.

I don’t see a specific change that needs to be mentioned in the
changelog.
2026-04-27 20:21:14 +03:00
Roman Khavronenko
9bca09b7ad docs/playgrounds: mention iximiuz playgrounds (#10878)
Iximiuz labs prepared a set of playgrounds for VictoriaMetrics. These
are interactive playgrounds backed by real Linux machines running
VictoriaMetrics software, allowing experimenting and investigating right
in the browser tab.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-04-27 19:58:23 +03:00
Zakhar Bessarab
ff907f5c47 docs/playgrounds: add links to SSO playground (#10877)
Added info about Grafana SSO playground to playgrounds docs.

---------

Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-27 19:43:37 +03:00
Max Kotliar
cab551fd35 docs: update flags with actual v1.141.0 binaries
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-27 14:38:28 +03:00
Max Kotliar
debd74d566 docs: bump version to v1.141.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-27 14:36:09 +03:00
Max Kotliar
7f2bfb1b29 deplyoment/docker: bump version to v1.141.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-27 14:29:55 +03:00
Max Kotliar
b23215e0b9 docs: forward port LTS v1.122.21 changelog to upstream
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-27 13:57:55 +03:00
Max Kotliar
5bc184d29c docs: forward port LTS v1.136.6 changelog to upstream
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-27 13:57:08 +03:00
Max Kotliar
7e6f70af2d docs/changelog: fix upgrade alpine version
follow-up for
49a8dd4da6
2026-04-24 21:38:15 +03:00
Max Kotliar
c77b5725e2 docs/changelog: cut release v1.141.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-24 19:41:31 +03:00
Max Kotliar
fbbf04dd2c app/vmselect: run make vmui-update
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-24 19:37:59 +03:00
Max Kotliar
08398b1a6a deployment/docker: update base Alpine Docker image from 3.23.2 to 3.23.3
See
https://www.alpinelinux.org/posts/Alpine-3.20.10-3.21.7-3.22.4-3.23.4-released.html
2026-04-24 18:22:23 +03:00
f41gh7
21fb0ab54e deplyoment/docker: bump version to v1.140.0 2026-04-24 16:15:02 +03:00
Max Kotliar
77bcf2178d docs/changelog: chore 2026-04-24 16:00:05 +03:00
Max Kotliar
e56b2b66c2 Makefile: apptest should compile vmagent-race for recently added test
Follow-up for
1ca4b3ba3c

The commit added a new TestClusterVMAgentForwardMetricsMetadata, so
apptest now requires vmagent-race to be compiled.
2026-04-24 14:58:40 +03:00
Nikolay
e6d764e7f6 app/vmagent: properly attach tenant information to metadata (#10865)
Previously, vmagent ignored tenant ID information obtained from
`__tenant_id__` label for metrics metadata. It made it impossible to route
metrics metadata to the `/multitenant` endpoints. This commit adds tenant ID to the metrics metadata.

It also fixes VMagent multitenant ingestion endpoints. Previously, the tenant info defined there was not properly set to metadata. 

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10828
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10865

---------

Signed-off-by: Nikolay <nik@victoriametrics.com>
Signed-off-by: f41gh7 <nik@victoriametrics.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-24 14:36:56 +03:00
Hui Wang
f2a9db4e0d dashboards: add metadata ingestion row rate queries to vmagent&vmcluster dashboards (#10868)
Metadata is enabled by default since v1.137.0, and the metadata volume
can be a big contributor to resource usage and network traffic.

vmagent dahsboard:
1. `Troubleshooting` section: rename `Datapoints rate` panel to `Rows
rate` to include metadata rate;
2. `Ingestion` section: add metadata rate to existing `Rows rate` panel.
(The difference between this panel and the one above is that this panel
only contains data from write requests, while the above panel also
includes the scraping part.)


vmcluster dashboard:
1. `vminsert` section: add `Rows rate` panel

Didn’t see a good place for it in the vmsingle dashboard, since it
doesn’t have a dedicated insert section, and I don’t want to add it to
`overview` yet.

https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10868
2026-04-24 14:07:49 +03:00
Yury Moladau
23ba12012a app/vmui: improve series color visibility (#10872)
### Describe Your Changes

Improve generated series colors to increase visibility and consistency
across light and dark themes.

Related issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10869
PR: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10872

| Before | After |
|---|---|
| <img width="758" height="469" alt="image"
src="https://github.com/user-attachments/assets/dfe879fc-c1ff-4128-923b-24dd0b829421"
/> | <img width="758" height="469" alt="image"
src="https://github.com/user-attachments/assets/7ea6f618-2d6d-43b6-b881-9525a2897ef6"
/> |
| <img width="758" height="469" alt="image"
src="https://github.com/user-attachments/assets/ab07e223-5ab5-43dc-8c3f-7ab28d4ab2b6"
/> | <img width="758" height="469" alt="image"
src="https://github.com/user-attachments/assets/988d19b6-ca16-4ca6-af8a-e043cfb066d3"
/> |

---------

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
2026-04-24 13:37:51 +03:00
hagen1778
0169411056 docs: mention accuracy issues for histogram aggregation
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit ca8d9d21a9)
2026-04-24 10:31:54 +02:00
hagen1778
bd53af3d31 docs: mention histogram aggregation link
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 0653b7c7b8)
2026-04-24 10:31:54 +02:00
Roman Khavronenko
8907caf176 docs: update stream aggregation docs (#10871)
* add visual mermaid diagram to demonstrate aggregation concept;
* update Recording-rules-alternative:
* * recommend using rate_sum instead of total for better reliability
* * demonstrate how to calculate sliding window, typicall for recording
rules

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Pablo Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
(cherry picked from commit 569197d038)
2026-04-24 10:31:54 +02:00
Max Kotliar
a4612edf56 docs/changelog: chore update notes
force evey update note to be on a new line
2026-04-23 20:36:56 +03:00
Artem Fetishev
e26de23739 lib/storage: support samples with future timestamps (#10718)
Add the support of storage and retrieval of samples with future
timestamps as requested in https://github.com/VictoriaMetrics/VictoriaMetrics/issues/827

What to expect:

- By default, the max future timestamp is still limited to `now+2d`. To
change it, set the `-futureRetention` flag in `vmstorage`. The max flag
value is currently limited to `100y`. It can be extended if we see a
demand for this, but it can't be more than `~ 290y` due to how the time
duration is implemented in Go. The flag value can't be less than `2d`.
- downsampling and retention filters (available in enterprise edition)
are currently not supported for future timestamps
- If `vmstorage` restarts with a smaller value of `-futureRetention`
flag, any future partitions that are outside the new future retention
will be automatically deleted.
- Data ingestion, data retrieval, backup/restore, timeseries (soft)
deletion, and other operations work with future timestamps the same way
as with the historical timestamps.
- In the cluster version, the affected binaries are `vmstorage` and
`vmselect`. This means that `vmselect` version must match `vmstorage`
version if you want to query future timestamps. `vminsert` was not
affected, so its version can be a lower one.
- If you downgrade the `vmstorage`, the data with future timestamps will
remain on disk and memory (per-partition caches) but won't be available
for querying.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
Signed-off-by: Artem Fetishev <149964189+rtm0@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-04-23 18:31:07 +02:00
Artem Fetishev
dcb314ab38 lib/timeutil: ensure parsed time is in allowed range (#10870)
Update `timeutil.ParseTimeAt` to check the time limits for all date/time formats, not just year.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-04-23 17:39:39 +02:00
Max Kotliar
e8efce8f24 docs: forward port LTS v1.122.20 changelog to upstream
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-23 17:38:06 +03:00
Max Kotliar
fa2074374d docs: forward port LTS v1.136.5 changelog to upstream
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-23 17:32:34 +03:00
Nikolay
6db534f830 app/vmauth: properly start backend healths
Previously, backend url health check start could produce a data race
and a race condition.

 The following panic could be produced:
`panic: sync: WaitGroup is reused before previous Wait has returned`

 It happened because concurrent goroutine could process request, while
 configuration was reloaded and stopHealthChecks method was called.

 This commit adds a dedicated structure for backend health checks.
Which protects from data race with mutex guard. And prevents race
condition with a boolean flag.

Fixes: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10806
2026-04-23 11:10:18 +02:00
Roman Khavronenko
607141a93a github: update PR template
Visually outline that guideline message should be removed from
description before submitting the PR. This should prevent cases when PR
template was blending into the PRs description remaining unnoticed.
2026-04-23 11:10:18 +02:00
Max Kotliar
ebfd2fef00 go.mod: update metricsql to version that fixes bug in binary op evaluation ordering
The commit in metricsql
d0bc93816e
introduced a bug that changes an order of binary op evaluation. This
commit updates to metricsql version that fixes a bug by reverting to
previous behavior.

The bug was introduced in v1.140.0, v1.136.4, and v1.122.19 releases.

It was reported in
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856
2026-04-22 20:40:25 +03:00
cubic-dev-ai[bot]
8c5104c627 app/vmctl: return errors instead of silently skipping unexpected OpenTSDB responses
Previously 
- `GetData` in the OpenTSDB client was returning empty `Metric{}` with
`nil` error for several conditions (multiple series returned, aggregate
tags present, `modifyData` failures), causing `vmctl opentsdb` to
silently drop series during migration

 This commit changes these silent return paths to return proper errors with
descriptive messages including the query string, so operators can detect
and diagnose partial migrations.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10797
2026-04-22 11:30:06 +02:00
andriibeee
4a24feebb4 lib/cgroup: support reading cpu/memory limits from systemd slices
cgroup v2 version supports slices ( aka path hierarchy) for resource limits. It's mostly supported by systemd
and container runtime build on top of it.

 This commit reads subpath for systemd slices and traverse it with reading minimal limit value.

Related docs:
https://docs.oracle.com/en/operating-systems/oracle-linux/9/systemd/SystemdMngCgroupsV2.html#SlicesServicesScopesHierarchy
https://www.freedesktop.org/software/systemd/man/latest/systemd.slice.html

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10635
2026-04-22 11:30:06 +02:00
Max Kotliar
b111fc29c6 docs/vmauth: add example for using TLS on public addr but keeping internal non-TLS (#10858)
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10793
2026-04-22 11:30:06 +02:00
Hui Wang
bec5e8b02f app/vmalert: properly remove empty labels value
Previously, if rule label value was set to empty string, vmalert ignored this label during labels merge with labels from data source response. In contrast, Prometheus removes data source label in this case as well. Which allows to perform label delete operation.

 This commit uses the same logic as Prometheus for resolving labels conflicts and allows to remove labels.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766
2026-04-22 10:01:42 +02:00
Max Kotliar
fbc83032cd docs/changelog: add update note about bug in metricsql
Follow up to
7029283f7d
for LTS releases
2026-04-21 20:20:09 +03:00
Max Kotliar
636fbb40e8 docs/changelog: add update note about bug in metricsql
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856

Bug introduced in https://github.com/VictoriaMetrics/metricsql/pull/63
via commit
08dd38d4a0
2026-04-21 20:16:01 +03:00
Fred Navruzov
b7c73117c3 docs/vmanomaly: update visual assets and formulations (#10859)
Update vmanomaly visual assets and improve clarification on allowed
datasources
2026-04-21 19:58:36 +03:00
Roman Khavronenko
01f56dbdf7 apptest: restore helper for default tenant
Helper `getTenant` was removed in
e0e01e46f0 assuming that new change
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10782 will
tolerate missing tenantID in the path.

While that change is still not merged - restoring the helper for tests
to remain functional.
2026-04-21 11:00:56 +02:00
Zhu Jiekun
4d7e87d098 cluster test: fix flaky 386 test of getMaxBufSizePerStorageNode, add notes
- follow-up of
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10846 to fix
test for `GOARCH=386`.
- added comment for `getMaxBufSizePerStorageNode` about 2x buffer of
netstorage.
2026-04-21 11:00:21 +02:00
Alexander Frolov
ade86ca4d0 app/vminsert: account storageNodesBucket count in per-node buffer size
Follow-up for ceda0407fb which added a regression, which could
double vminsert memory usage.

 This commit takes in account a second buffer per storageNode.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10725#issuecomment-4282256709
2026-04-20 21:26:59 +02:00
dependabot[bot]
71f4f26582 build(deps): bump github/codeql-action from 4 to 4.35.1 (#10844)
Bumps [github/codeql-action](https://github.com/github/codeql-action)
from 4 to 4.35.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/github/codeql-action/releases">github/codeql-action's
releases</a>.</em></p>
<blockquote>
<h2>v4.35.1</h2>
<ul>
<li>Fix incorrect minimum required Git version for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a>: it should have been 2.36.0, not 2.11.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3781">#3781</a></li>
</ul>
<h2>v4.35.0</h2>
<ul>
<li>Reduced the minimum Git version required for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> from 2.38.0 to 2.11.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3767">#3767</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.1">2.25.1</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3773">#3773</a></li>
</ul>
<h2>v4.34.1</h2>
<ul>
<li>Downgrade default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.24.3">2.24.3</a>
due to issues with a small percentage of Actions and JavaScript
analyses. <a
href="https://redirect.github.com/github/codeql-action/pull/3762">#3762</a></li>
</ul>
<h2>v4.34.0</h2>
<ul>
<li>Added an experimental change which disables TRAP caching when <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> is enabled, since improved incremental analysis
supersedes TRAP caching. This will improve performance and reduce
Actions cache usage. We expect to roll this change out to everyone in
March. <a
href="https://redirect.github.com/github/codeql-action/pull/3569">#3569</a></li>
<li>We are rolling out improved incremental analysis to C/C++ analyses
that use build mode <code>none</code>. We expect this rollout to be
complete by the end of April 2026. <a
href="https://redirect.github.com/github/codeql-action/pull/3584">#3584</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.0">2.25.0</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3585">#3585</a></li>
</ul>
<h2>v4.33.0</h2>
<ul>
<li>
<p>Upcoming change: Starting April 2026, the CodeQL Action will skip
collecting file coverage information on pull requests to improve
analysis performance. File coverage information will still be computed
on non-PR analyses. Pull request analyses will log a warning about this
upcoming change. <a
href="https://redirect.github.com/github/codeql-action/pull/3562">#3562</a></p>
<p>To opt out of this change:</p>
<ul>
<li><strong>Repositories owned by an organization:</strong> Create a
custom repository property with the name
<code>github-codeql-file-coverage-on-prs</code> and the type
&quot;True/false&quot;, then set this property to <code>true</code> in
the repository's settings. For more information, see <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">Managing
custom properties for repositories in your organization</a>.
Alternatively, if you are using an advanced setup workflow, you can set
the <code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable
to <code>true</code> in your workflow.</li>
<li><strong>User-owned repositories using default setup:</strong> Switch
to an advanced setup workflow and set the
<code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable to
<code>true</code> in your workflow.</li>
<li><strong>User-owned repositories using advanced setup:</strong> Set
the <code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable
to <code>true</code> in your workflow.</li>
</ul>
</li>
<li>
<p>Fixed <a
href="https://redirect.github.com/github/codeql-action/issues/3555">a
bug</a> which caused the CodeQL Action to fail loading repository
properties if a &quot;Multi select&quot; repository property was
configured for the repository. <a
href="https://redirect.github.com/github/codeql-action/pull/3557">#3557</a></p>
</li>
<li>
<p>The CodeQL Action now loads <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">custom
repository properties</a> on GitHub Enterprise Server, enabling the
customization of features such as
<code>github-codeql-disable-overlay</code> that was previously only
available on GitHub.com. <a
href="https://redirect.github.com/github/codeql-action/pull/3559">#3559</a></p>
</li>
<li>
<p>Once <a
href="https://docs.github.com/en/code-security/how-tos/secure-at-scale/configure-organization-security/manage-usage-and-access/giving-org-access-private-registries">private
package registries</a> can be configured with OIDC-based authentication
for organizations, the CodeQL Action will now be able to accept such
configurations. <a
href="https://redirect.github.com/github/codeql-action/pull/3563">#3563</a></p>
</li>
<li>
<p>Fixed the retry mechanism for database uploads. Previously this would
fail with the error &quot;Response body object should not be disturbed
or locked&quot;. <a
href="https://redirect.github.com/github/codeql-action/pull/3564">#3564</a></p>
</li>
<li>
<p>A warning is now emitted if the CodeQL Action detects a repository
property whose name suggests that it relates to the CodeQL Action, but
which is not one of the properties recognised by the current version of
the CodeQL Action. <a
href="https://redirect.github.com/github/codeql-action/pull/3570">#3570</a></p>
</li>
</ul>
<h2>v4.32.6</h2>
<ul>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.24.3">2.24.3</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3548">#3548</a></li>
</ul>
<h2>v4.32.5</h2>
<ul>
<li>Repositories owned by an organization can now set up the
<code>github-codeql-disable-overlay</code> custom repository property to
disable <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis for CodeQL</a>. First, create a custom repository
property with the name <code>github-codeql-disable-overlay</code> and
the type &quot;True/false&quot; in the organization's settings. Then in
the repository's settings, set this property to <code>true</code> to
disable improved incremental analysis. For more information, see <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">Managing
custom properties for repositories in your organization</a>. This
feature is not yet available on GitHub Enterprise Server. <a
href="https://redirect.github.com/github/codeql-action/pull/3507">#3507</a></li>
<li>Added an experimental change so that when <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> fails on a runner — potentially due to
insufficient disk space — the failure is recorded in the Actions cache
so that subsequent runs will automatically skip improved incremental
analysis until something changes (e.g. a larger runner is provisioned or
a new CodeQL version is released). We expect to roll this change out to
everyone in March. <a
href="https://redirect.github.com/github/codeql-action/pull/3487">#3487</a></li>
<li>The minimum memory check for improved incremental analysis is now
skipped for CodeQL 2.24.3 and later, which has reduced peak RAM usage.
<a
href="https://redirect.github.com/github/codeql-action/pull/3515">#3515</a></li>
<li>Reduced log levels for best-effort private package registry
connection check failures to reduce noise from workflow annotations. <a
href="https://redirect.github.com/github/codeql-action/pull/3516">#3516</a></li>
<li>Added an experimental change which lowers the minimum disk space
requirement for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a>, enabling it to run on standard GitHub Actions
runners. We expect to roll this change out to everyone in March. <a
href="https://redirect.github.com/github/codeql-action/pull/3498">#3498</a></li>
<li>Added an experimental change which allows the
<code>start-proxy</code> action to resolve the CodeQL CLI version from
feature flags instead of using the linked CLI bundle version. We expect
to roll this change out to everyone in March. <a
href="https://redirect.github.com/github/codeql-action/pull/3512">#3512</a></li>
<li>The previously experimental changes from versions 4.32.3, 4.32.4,
3.32.3 and 3.32.4 are now enabled by default. <a
href="https://redirect.github.com/github/codeql-action/pull/3503">#3503</a>,
<a
href="https://redirect.github.com/github/codeql-action/pull/3504">#3504</a></li>
</ul>
<h2>v4.32.4</h2>
<ul>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.24.2">2.24.2</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3493">#3493</a></li>
<li>Added an experimental change which improves how certificates are
generated for the authentication proxy that is used by the CodeQL Action
in Default Setup when <a
href="https://docs.github.com/en/code-security/how-tos/secure-at-scale/configure-organization-security/manage-usage-and-access/giving-org-access-private-registries">private
package registries are configured</a>. This is expected to generate more
widely compatible certificates and should have no impact on analyses
which are working correctly already. We expect to roll this change out
to everyone in February. <a
href="https://redirect.github.com/github/codeql-action/pull/3473">#3473</a></li>
<li>When the CodeQL Action is run <a
href="https://docs.github.com/en/code-security/how-tos/scan-code-for-vulnerabilities/troubleshooting/troubleshooting-analysis-errors/logs-not-detailed-enough#creating-codeql-debugging-artifacts-for-codeql-default-setup">with
debugging enabled in Default Setup</a> and <a
href="https://docs.github.com/en/code-security/how-tos/secure-at-scale/configure-organization-security/manage-usage-and-access/giving-org-access-private-registries">private
package registries are configured</a>, the &quot;Setup proxy for
registries&quot; step will output additional diagnostic information that
can be used for troubleshooting. <a
href="https://redirect.github.com/github/codeql-action/pull/3486">#3486</a></li>
<li>Added a setting which allows the CodeQL Action to enable network
debugging for Java programs. This will help GitHub staff support
customers with troubleshooting issues in GitHub-managed CodeQL
workflows, such as Default Setup. This setting can only be enabled by
GitHub staff. <a
href="https://redirect.github.com/github/codeql-action/pull/3485">#3485</a></li>
<li>Added a setting which enables GitHub-managed workflows, such as
Default Setup, to use a <a
href="https://github.com/dsp-testing/codeql-cli-nightlies">nightly
CodeQL CLI release</a> instead of the latest, stable release that is
used by default. This will help GitHub staff support customers whose
analyses for a given repository or organization require early access to
a change in an upcoming CodeQL CLI release. This setting can only be
enabled by GitHub staff. <a
href="https://redirect.github.com/github/codeql-action/pull/3484">#3484</a></li>
</ul>
<h2>v4.32.3</h2>
<ul>
<li>Added experimental support for testing connections to <a
href="https://docs.github.com/en/code-security/how-tos/secure-at-scale/configure-organization-security/manage-usage-and-access/giving-org-access-private-registries">private
package registries</a>. This feature is not currently enabled for any
analysis. In the future, it may be enabled by default for Default Setup.
<a
href="https://redirect.github.com/github/codeql-action/pull/3466">#3466</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/github/codeql-action/blob/main/CHANGELOG.md">github/codeql-action's
changelog</a>.</em></p>
<blockquote>
<h2>4.35.1 - 27 Mar 2026</h2>
<ul>
<li>Fix incorrect minimum required Git version for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a>: it should have been 2.36.0, not 2.11.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3781">#3781</a></li>
</ul>
<h2>4.35.0 - 27 Mar 2026</h2>
<ul>
<li>Reduced the minimum Git version required for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> from 2.38.0 to 2.11.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3767">#3767</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.1">2.25.1</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3773">#3773</a></li>
</ul>
<h2>4.34.1 - 20 Mar 2026</h2>
<ul>
<li>Downgrade default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.24.3">2.24.3</a>
due to issues with a small percentage of Actions and JavaScript
analyses. <a
href="https://redirect.github.com/github/codeql-action/pull/3762">#3762</a></li>
</ul>
<h2>4.34.0 - 20 Mar 2026</h2>
<ul>
<li>Added an experimental change which disables TRAP caching when <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> is enabled, since improved incremental analysis
supersedes TRAP caching. This will improve performance and reduce
Actions cache usage. We expect to roll this change out to everyone in
March. <a
href="https://redirect.github.com/github/codeql-action/pull/3569">#3569</a></li>
<li>We are rolling out improved incremental analysis to C/C++ analyses
that use build mode <code>none</code>. We expect this rollout to be
complete by the end of April 2026. <a
href="https://redirect.github.com/github/codeql-action/pull/3584">#3584</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.0">2.25.0</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3585">#3585</a></li>
</ul>
<h2>4.33.0 - 16 Mar 2026</h2>
<ul>
<li>
<p>Upcoming change: Starting April 2026, the CodeQL Action will skip
collecting file coverage information on pull requests to improve
analysis performance. File coverage information will still be computed
on non-PR analyses. Pull request analyses will log a warning about this
upcoming change. <a
href="https://redirect.github.com/github/codeql-action/pull/3562">#3562</a></p>
<p>To opt out of this change:</p>
<ul>
<li><strong>Repositories owned by an organization:</strong> Create a
custom repository property with the name
<code>github-codeql-file-coverage-on-prs</code> and the type
&quot;True/false&quot;, then set this property to <code>true</code> in
the repository's settings. For more information, see <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">Managing
custom properties for repositories in your organization</a>.
Alternatively, if you are using an advanced setup workflow, you can set
the <code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable
to <code>true</code> in your workflow.</li>
<li><strong>User-owned repositories using default setup:</strong> Switch
to an advanced setup workflow and set the
<code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable to
<code>true</code> in your workflow.</li>
<li><strong>User-owned repositories using advanced setup:</strong> Set
the <code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable
to <code>true</code> in your workflow.</li>
</ul>
</li>
<li>
<p>Fixed <a
href="https://redirect.github.com/github/codeql-action/issues/3555">a
bug</a> which caused the CodeQL Action to fail loading repository
properties if a &quot;Multi select&quot; repository property was
configured for the repository. <a
href="https://redirect.github.com/github/codeql-action/pull/3557">#3557</a></p>
</li>
<li>
<p>The CodeQL Action now loads <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">custom
repository properties</a> on GitHub Enterprise Server, enabling the
customization of features such as
<code>github-codeql-disable-overlay</code> that was previously only
available on GitHub.com. <a
href="https://redirect.github.com/github/codeql-action/pull/3559">#3559</a></p>
</li>
<li>
<p>Once <a
href="https://docs.github.com/en/code-security/how-tos/secure-at-scale/configure-organization-security/manage-usage-and-access/giving-org-access-private-registries">private
package registries</a> can be configured with OIDC-based authentication
for organizations, the CodeQL Action will now be able to accept such
configurations. <a
href="https://redirect.github.com/github/codeql-action/pull/3563">#3563</a></p>
</li>
<li>
<p>Fixed the retry mechanism for database uploads. Previously this would
fail with the error &quot;Response body object should not be disturbed
or locked&quot;. <a
href="https://redirect.github.com/github/codeql-action/pull/3564">#3564</a></p>
</li>
<li>
<p>A warning is now emitted if the CodeQL Action detects a repository
property whose name suggests that it relates to the CodeQL Action, but
which is not one of the properties recognised by the current version of
the CodeQL Action. <a
href="https://redirect.github.com/github/codeql-action/pull/3570">#3570</a></p>
</li>
</ul>
<h2>4.32.6 - 05 Mar 2026</h2>
<ul>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.24.3">2.24.3</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3548">#3548</a></li>
</ul>
<h2>4.32.5 - 02 Mar 2026</h2>
<ul>
<li>Repositories owned by an organization can now set up the
<code>github-codeql-disable-overlay</code> custom repository property to
disable <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis for CodeQL</a>. First, create a custom repository
property with the name <code>github-codeql-disable-overlay</code> and
the type &quot;True/false&quot; in the organization's settings. Then in
the repository's settings, set this property to <code>true</code> to
disable improved incremental analysis. For more information, see <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">Managing
custom properties for repositories in your organization</a>. This
feature is not yet available on GitHub Enterprise Server. <a
href="https://redirect.github.com/github/codeql-action/pull/3507">#3507</a></li>
<li>Added an experimental change so that when <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> fails on a runner — potentially due to
insufficient disk space — the failure is recorded in the Actions cache
so that subsequent runs will automatically skip improved incremental
analysis until something changes (e.g. a larger runner is provisioned or
a new CodeQL version is released). We expect to roll this change out to
everyone in March. <a
href="https://redirect.github.com/github/codeql-action/pull/3487">#3487</a></li>
<li>The minimum memory check for improved incremental analysis is now
skipped for CodeQL 2.24.3 and later, which has reduced peak RAM usage.
<a
href="https://redirect.github.com/github/codeql-action/pull/3515">#3515</a></li>
<li>Reduced log levels for best-effort private package registry
connection check failures to reduce noise from workflow annotations. <a
href="https://redirect.github.com/github/codeql-action/pull/3516">#3516</a></li>
<li>Added an experimental change which lowers the minimum disk space
requirement for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a>, enabling it to run on standard GitHub Actions
runners. We expect to roll this change out to everyone in March. <a
href="https://redirect.github.com/github/codeql-action/pull/3498">#3498</a></li>
<li>Added an experimental change which allows the
<code>start-proxy</code> action to resolve the CodeQL CLI version from
feature flags instead of using the linked CLI bundle version. We expect
to roll this change out to everyone in March. <a
href="https://redirect.github.com/github/codeql-action/pull/3512">#3512</a></li>
<li>The previously experimental changes from versions 4.32.3, 4.32.4,
3.32.3 and 3.32.4 are now enabled by default. <a
href="https://redirect.github.com/github/codeql-action/pull/3503">#3503</a>,
<a
href="https://redirect.github.com/github/codeql-action/pull/3504">#3504</a></li>
</ul>
<h2>4.32.4 - 20 Feb 2026</h2>
<ul>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.24.2">2.24.2</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3493">#3493</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="c10b8064de"><code>c10b806</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3782">#3782</a>
from github/update-v4.35.1-d6d1743b8</li>
<li><a
href="c5ffd06837"><code>c5ffd06</code></a>
Update changelog for v4.35.1</li>
<li><a
href="d6d1743b8e"><code>d6d1743</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3781">#3781</a>
from github/henrymercer/update-git-minimum-version</li>
<li><a
href="65d2efa733"><code>65d2efa</code></a>
Add changelog note</li>
<li><a
href="2437b20ab3"><code>2437b20</code></a>
Update minimum git version for overlay to 2.36.0</li>
<li><a
href="ea5f71947c"><code>ea5f719</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3775">#3775</a>
from github/dependabot/npm_and_yarn/node-forge-1.4.0</li>
<li><a
href="45ceeea896"><code>45ceeea</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3777">#3777</a>
from github/mergeback/v4.35.0-to-main-b8bb9f28</li>
<li><a
href="24448c9843"><code>24448c9</code></a>
Rebuild</li>
<li><a
href="7c51060631"><code>7c51060</code></a>
Update changelog and version after v4.35.0</li>
<li><a
href="b8bb9f28b8"><code>b8bb9f2</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3776">#3776</a>
from github/update-v4.35.0-0078ad667</li>
<li>Additional commits viewable in <a
href="https://github.com/github/codeql-action/compare/v4...v4.35.1">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github/codeql-action&package-manager=github_actions&previous-version=4&new-version=4.35.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-20 15:55:32 +03:00
dependabot[bot]
8107b02412 build(deps): bump actions/cache from 4 to 5.0.4 (#10802)
Bumps [actions/cache](https://github.com/actions/cache) from 4 to 5.0.4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/cache/releases">actions/cache's
releases</a>.</em></p>
<blockquote>
<h2>v5.0.4</h2>
<h2>What's Changed</h2>
<ul>
<li>Add release instructions and update maintainer docs by <a
href="https://github.com/Link"><code>@​Link</code></a>- in <a
href="https://redirect.github.com/actions/cache/pull/1696">actions/cache#1696</a></li>
<li>Potential fix for code scanning alert no. 52: Workflow does not
contain permissions by <a
href="https://github.com/Link"><code>@​Link</code></a>- in <a
href="https://redirect.github.com/actions/cache/pull/1697">actions/cache#1697</a></li>
<li>Fix workflow permissions and cleanup workflow names / formatting by
<a href="https://github.com/Link"><code>@​Link</code></a>- in <a
href="https://redirect.github.com/actions/cache/pull/1699">actions/cache#1699</a></li>
<li>docs: Update examples to use the latest version by <a
href="https://github.com/XZTDean"><code>@​XZTDean</code></a> in <a
href="https://redirect.github.com/actions/cache/pull/1690">actions/cache#1690</a></li>
<li>Fix proxy integration tests by <a
href="https://github.com/Link"><code>@​Link</code></a>- in <a
href="https://redirect.github.com/actions/cache/pull/1701">actions/cache#1701</a></li>
<li>Fix cache key in examples.md for bun.lock by <a
href="https://github.com/RyPeck"><code>@​RyPeck</code></a> in <a
href="https://redirect.github.com/actions/cache/pull/1722">actions/cache#1722</a></li>
<li>Update dependencies &amp; patch security vulnerabilities by <a
href="https://github.com/Link"><code>@​Link</code></a>- in <a
href="https://redirect.github.com/actions/cache/pull/1738">actions/cache#1738</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/XZTDean"><code>@​XZTDean</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/cache/pull/1690">actions/cache#1690</a></li>
<li><a href="https://github.com/RyPeck"><code>@​RyPeck</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/cache/pull/1722">actions/cache#1722</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/cache/compare/v5...v5.0.4">https://github.com/actions/cache/compare/v5...v5.0.4</a></p>
<h2>v5.0.3</h2>
<h2>What's Changed</h2>
<ul>
<li>Bump <code>@actions/cache</code> to v5.0.5 (Resolves: <a
href="https://github.com/actions/cache/security/dependabot/33">https://github.com/actions/cache/security/dependabot/33</a>)</li>
<li>Bump <code>@actions/core</code> to v2.0.3</li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/cache/compare/v5...v5.0.3">https://github.com/actions/cache/compare/v5...v5.0.3</a></p>
<h2>v.5.0.2</h2>
<h1>v5.0.2</h1>
<h2>What's Changed</h2>
<p>When creating cache entries, 429s returned from the cache service
will not be retried.</p>
<h2>v5.0.1</h2>
<blockquote>
<p>[!IMPORTANT]
<strong><code>actions/cache@v5</code> runs on the Node.js 24 runtime and
requires a minimum Actions Runner version of
<code>2.327.1</code>.</strong></p>
<p>If you are using self-hosted runners, ensure they are updated before
upgrading.</p>
</blockquote>
<hr />
<h1>v5.0.1</h1>
<h2>What's Changed</h2>
<ul>
<li>fix: update <code>@​actions/cache</code> for Node.js 24 punycode
deprecation by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/cache/pull/1685">actions/cache#1685</a></li>
<li>prepare release v5.0.1 by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/cache/pull/1686">actions/cache#1686</a></li>
</ul>
<h1>v5.0.0</h1>
<h2>What's Changed</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/actions/cache/blob/main/RELEASES.md">actions/cache's
changelog</a>.</em></p>
<blockquote>
<h1>Releases</h1>
<h2>How to prepare a release</h2>
<blockquote>
<p>[!NOTE]<br />
Relevant for maintainers with write access only.</p>
</blockquote>
<ol>
<li>Switch to a new branch from <code>main</code>.</li>
<li>Run <code>npm test</code> to ensure all tests are passing.</li>
<li>Update the version in <a
href="https://github.com/actions/cache/blob/main/package.json"><code>https://github.com/actions/cache/blob/main/package.json</code></a>.</li>
<li>Run <code>npm run build</code> to update the compiled files.</li>
<li>Update this <a
href="https://github.com/actions/cache/blob/main/RELEASES.md"><code>https://github.com/actions/cache/blob/main/RELEASES.md</code></a>
with the new version and changes in the <code>## Changelog</code>
section.</li>
<li>Run <code>licensed cache</code> to update the license report.</li>
<li>Run <code>licensed status</code> and resolve any warnings by
updating the <a
href="https://github.com/actions/cache/blob/main/.licensed.yml"><code>https://github.com/actions/cache/blob/main/.licensed.yml</code></a>
file with the exceptions.</li>
<li>Commit your changes and push your branch upstream.</li>
<li>Open a pull request against <code>main</code> and get it reviewed
and merged.</li>
<li>Draft a new release <a
href="https://github.com/actions/cache/releases">https://github.com/actions/cache/releases</a>
use the same version number used in <code>package.json</code>
<ol>
<li>Create a new tag with the version number.</li>
<li>Auto generate release notes and update them to match the changes you
made in <code>RELEASES.md</code>.</li>
<li>Toggle the set as the latest release option.</li>
<li>Publish the release.</li>
</ol>
</li>
<li>Navigate to <a
href="https://github.com/actions/cache/actions/workflows/release-new-action-version.yml">https://github.com/actions/cache/actions/workflows/release-new-action-version.yml</a>
<ol>
<li>There should be a workflow run queued with the same version
number.</li>
<li>Approve the run to publish the new version and update the major tags
for this action.</li>
</ol>
</li>
</ol>
<h2>Changelog</h2>
<h3>5.0.4</h3>
<ul>
<li>Bump <code>minimatch</code> to v3.1.5 (fixes ReDoS via globstar
patterns)</li>
<li>Bump <code>undici</code> to v6.24.1 (WebSocket decompression bomb
protection, header validation fixes)</li>
<li>Bump <code>fast-xml-parser</code> to v5.5.6</li>
</ul>
<h3>5.0.3</h3>
<ul>
<li>Bump <code>@actions/cache</code> to v5.0.5 (Resolves: <a
href="https://github.com/actions/cache/security/dependabot/33">https://github.com/actions/cache/security/dependabot/33</a>)</li>
<li>Bump <code>@actions/core</code> to v2.0.3</li>
</ul>
<h3>5.0.2</h3>
<ul>
<li>Bump <code>@actions/cache</code> to v5.0.3 <a
href="https://redirect.github.com/actions/cache/pull/1692">#1692</a></li>
</ul>
<h3>5.0.1</h3>
<ul>
<li>Update <code>@azure/storage-blob</code> to <code>^12.29.1</code> via
<code>@actions/cache@5.0.1</code> <a
href="https://redirect.github.com/actions/cache/pull/1685">#1685</a></li>
</ul>
<h3>5.0.0</h3>
<blockquote>
<p>[!IMPORTANT]
<code>actions/cache@v5</code> runs on the Node.js 24 runtime and
requires a minimum Actions Runner version of <code>2.327.1</code>.</p>
</blockquote>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="27d5ce7f10"><code>27d5ce7</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/cache/issues/1747">#1747</a>
from actions/yacaovsnc/update-dependency</li>
<li><a
href="f280785d7b"><code>f280785</code></a>
licensed changes</li>
<li><a
href="619aeb1606"><code>619aeb1</code></a>
npm run build generated dist files</li>
<li><a
href="bcf16c2893"><code>bcf16c2</code></a>
Update ts-http-runtime to 0.3.5</li>
<li><a
href="668228422a"><code>6682284</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/cache/issues/1738">#1738</a>
from actions/prepare-v5.0.4</li>
<li><a
href="e34039626f"><code>e340396</code></a>
Update RELEASES</li>
<li><a
href="8a67110529"><code>8a67110</code></a>
Add licenses</li>
<li><a
href="1865903e1b"><code>1865903</code></a>
Update dependencies &amp; patch security vulnerabilities</li>
<li><a
href="5656298164"><code>5656298</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/cache/issues/1722">#1722</a>
from RyPeck/patch-1</li>
<li><a
href="4e380d19e1"><code>4e380d1</code></a>
Fix cache key in examples.md for bun.lock</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/cache/compare/v4...v5">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/cache&package-manager=github_actions&previous-version=4&new-version=5.0.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-20 15:55:31 +03:00
andriibeee
b9fc9dc706 lib/netutil: fix IPv6 address corruption in proxy protocol v2 parser
Proxy protocol parser kept sub-slice reference for pooled bytesBuffer at readProxyProto
```
 bb := bbPool.Get()
 defer bbPool.Put(bb)   // ← buffer returned to pool AFTER function returns
...
   IP:   bb.B[0:16],  // ← BUG: sub-slice of pooled buffer!
...
 ```

 This commit properly allocates new slice for ipv6 address and copies buffer content to it.

 Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10839
2026-04-20 12:13:47 +02:00
Andrii Chubatiuk
a6fafa8387 lib/streamaggr: added vm_streamaggr_counter_resets_total counter (#10807)
### Describe Your Changes

Added `vm_streamaggr_counter_resets` metric for `rate*`, `total*`, and
`increase*` outputs, which is useful for unpredictable output behaviour
investigation.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Andrii Chubatiuk <andrew.chubatiuk@gmail.com>
Signed-off-by: hagen1778 <roman@victoriametrics.com>
Signed-off-by: Roman Khavronenko <hagen1778@gmail.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit ce227fe7d9)
2026-04-20 11:48:49 +02:00
hagen1778
3ded4e1d50 deployment/alerts: move IndexDBRecordsDrop and TooManyTSIDMisses rules to storage-related files
`IndexDBRecordsDrop` and `TooManyTSIDMisses` were mistakenly placed to `alerts-health.yml`,
which was supposed to contain rules related to all VM components. But these two rules
are related to storage components only (vmstorage and vmsingle). Moving them to corresponding
files.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit e4524eb2fb)
2026-04-20 11:48:49 +02:00
hagen1778
ed7ec57bd2 deployment/alerts: rename alerts.yml to alerts-single-node.yml
The change should reduce confusion for users where `alerts.yml`
belongs to. Before, developers could mistakenly assume that
`alerts.yml` was related to both single and cluster installations.
In result, rule `MetadataCacheUtilizationIsTooHigh` was added only
to `alerts.yml` and not copied to `alerts-cluster.yml`.

The rename change should bring more context into the file name
and reduce confusion in the future.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit b9ba5dacc3)
2026-04-20 11:48:49 +02:00
hagen1778
5866b2a683 deployment/alerts: add MetadataCacheUtilizationIsTooHigh to cluster rules
Before, this rule was only a part of single-node rule set.
But it is applicable for both: single and cluster installations.
Adding it to cluster as well.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 1a8fe4f2f8)
2026-04-20 11:48:48 +02:00
Roman Khavronenko
3f47f5b0a1 deployment/rules: add MetricNameStatsCacheUtilizationIsTooHigh alert (#10840)
The new rule `MetricNameStatsCacheUtilizationIsTooHigh` will signalize
about overutilization of Metric names usage stats tracker. See
https://docs.victoriametrics.com/victoriametrics/#track-ingested-metrics-usage

This rule can fire for deployments with high churn rate of metric names.
In cases like this, it is better to disable metric name tracking
completely, as it brings no use.

It might fire for deployments that were tracking metric names for very
long periods and this alert might be a good sign to reset the cache.

### Describe Your Changes

Please provide a brief description of the changes you made. Be as
specific as possible to help others understand the purpose and impact of
your modifications.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 2dcfbd8e19)
2026-04-20 11:48:48 +02:00
Max Kotliar
f9127ba794 docs/changelog: chore wording a bit; add a link 2026-04-17 19:34:48 +03:00
Jan Dittrich
c1e52df6ee docs: align the limit mentioned in the docs with actual flag -maxLabelsPerTimeseries value (#10826)
The docs currently wrongly states that vminsert applies a label limit
per timeseries of `30`. Currently, the limit is `40`, which is also
correctly stated in in vmcluster docs. This PR corrects this in the key
concepts docs.

```
  -maxLabelsPerTimeseries int
     The maximum number of labels per time series to be accepted. Series with superfluous labels are ignored. In this case the vm_rows_ignored_total{reason="too_many_labels"} metric at /metrics page is incremented (default 40)
```

https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10826
2026-04-17 19:18:05 +03:00
Phuong Le
4bba813668 docs/contributing: clarify test requirements in pull request checklist (#10781)
Clarify in the pull request checklist that tests are expected for
non-trivial changes and bug fixes must include tests unless a maintainer
explicitly agrees otherwise

https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10781
2026-04-17 18:30:09 +03:00
Phuong Le
c1d0c6bc0e .github: shorten PR template (#10789)
After switching squash merges to use the PR title and description, the
PR template text started leaking into final commit messages and adding
noise.

This PR removes the template and documents what a PR title and PR
description should contain instead.

See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10789
2026-04-17 18:18:37 +03:00
f41gh7
a311d1bf0f docs: changelog add missing PR links
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-04-17 11:23:11 +02:00
f41gh7
5fa144e257 vendor: update metrics and metrisql libs 2026-04-17 11:23:10 +02:00
andriibeee
3e2c562e4b app/vmauth: properly close backend response body
Previously After RoundTrip returns successfully (err == nil, res != nil), the code checks if the original client request's context was canceled. If canceled, it returns immediately without closing res.Body. 

There is a race window where:
1) RoundTrip completes successfully (res is non-nil)
2) The client cancels the request context (closes connection)
3) The context check at line 484 sees the cancellation
4) The function returns without closing res.Body

The response body holds a reference to the underlying TCP connection. Without closing it, the connection is permanently leaked along with the transport goroutines (readLoop + writeLoop or dialConnFor).

 bug was introduced at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10233

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10833
2026-04-17 10:58:03 +02:00
Yury Moladau
a25c1ed7c2 app/vmui: fix Alerting Rules page query link and time display
**"Run query" link params**  
Added correct params to "Run query" link on Alerting Rules page:
- `g0.step_input` - set to `group.interval` (in seconds)
- `g0.end_time` - set to `rule.lastEvaluation` / `alert.activeAt`
- `g0.relative_time=none` - to fix the time range

**Time display timezone**  
Changed `t.format(...)` to `t.tz().format(...)` to display time in the
user-selected timezone.

Related issue:
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10366
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10827
2026-04-17 10:58:02 +02:00
andriibeee
c698dc24a6 lib/handshake: ignore TCP healthchecks in VMSelect just like in VMInsert
TCP healthchecks on the clusternative port of vmselect logs the following warning continuously:

    VictoriaMetrics/lib/vmselectapi/server.go:204 cannot complete vmselect handshake due to network error with client "10.129.30.27:43829": cannot read hello message : cannot read message with size 11: EOF; read only 0 bytes. Check vmselect logs for errors

This is in contrast to vminsert, where it seems like there's handling for these healthchecks:
```
 if errors.Is(err, io.EOF) { 
 	// This is likely a TCP healthcheck, which must be ignored in order to prevent logs pollution. 
 	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1762 
 	return errTCPHealthcheck 
```

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10786
2026-04-16 23:01:29 +02:00
Nikolay
d463745c2e lib/promscrape: prevent unbounded scrape error body read
Previously, on non-200 HTTP status codes, lib/promscrape performed an
unbounded body read, which could potentially result in OOM.

This commit adds a maxScrapeSize limit to error response body reads,
protecting against malicious or misbehaving metrics endpoints.
2026-04-16 22:51:32 +02:00
cubic-dev-ai[bot]
61827e932b app/vminsert: correctly stop StopIngestionRateLimiter before vminsert.Stop in vmsingle shutdown
vmsingle shuts down vminsert before closing the ingestion rate limiter, even though the rate limiter API explicitly requires the opposite order to unblock callers. vminsert.Stop() waits for unmarshal workers, which can be blocked in ingestionRateLimiter.Register() when the limit is hit.
2026-04-16 22:51:32 +02:00
Yury Moladau
4c0dc940d4 app/vmui: update package dependencies (#10831)
### Describe Your Changes

Update package versions in `app/vmui/packages/vmui/package.json`.

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
2026-04-16 22:51:28 +02:00
cubic-dev-ai[bot]
2363d372a3 fix: prevent deadlock in vmrestore worker pool on context cancellation
Workers in runParallelPerPathInternal check ctxLocal.Done() before processing each work item and exit early on cancellation — without sending a result to resultCh. However, the coordinator loop always waits for exactly len(perPath) results from resultCh. If cancellation occurs before all tasks report, the read blocks indefinitely.
2026-04-16 22:51:27 +02:00
Fred Navruzov
fcdceecd12 docs/vmanomaly-v1.29.3 (#10832)
### Describe Your Changes

Update vmanomaly docs to v1.29.3

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-04-16 17:34:53 +03:00
Roman Khavronenko
e0e01e46f0 apptest: add support for specifying HTTP headers (#10830)
This change allows specifying headers for provided API calls. This
ability is required for proper testing of Tenant-via-Header feature in
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10782

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 443ea9cbc6)
2026-04-16 15:04:54 +02:00
andriibeee
204162c13c lib/awsapi: pre-populate credentials only for static creds without roleARN
0aaa741b5b  introduced a regression in lib/awsapi/config.go that causes empty credentials to be returned on the very first call to getFreshAPICredentials() when using EKS Pod Identity (or any container credential mechanism with no static access key). These empty credentials are then used for SigV4 signing -> 403 Forbidden on every remote write request.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10815
2026-04-16 11:52:56 +02:00
Max Kotliar
d4e539466e docs/changelog: fix feature indention 2026-04-15 17:34:38 +03:00
Aliaksandr Valialkin
947ea7a470 vendor: update github.com/VictoriaMetrics/VictoriaLogs from v1.50.1-0.20260415114444-d5b5febe4954 to github.com/VictoriaMetrics/VictoriaLogs v1.50.1-0.20260415124154-6b7a6357aec0
This is needed for vmalert, so it accepts LogsQL queries with 'limit' and 'offset' pipes.

See https://github.com/VictoriaMetrics/VictoriaLogs/issues/1296#issuecomment-4252036978
2026-04-15 14:45:51 +02:00
Aliaksandr Valialkin
d4c9f15834 vendor: run make vendor-update 2026-04-15 14:04:06 +02:00
Aliaksandr Valialkin
559079befd vendor: update github.com/VictoriaMetrics/VictoriaLogs from v0.0.0-20260218111324-95b48d57d032 to v1.50.1-0.20260415114444-d5b5febe4954 2026-04-15 13:56:59 +02:00
Aliaksandr Valialkin
0f2e4a99e8 docs/Makefile: avoid creating a docker image with docs server at make docs-update-version
Just run a simple bash command without the heavyweight Docker image

While at it, rely on TAG environment variable instead of PKG_TAG env variable
for `make docs-update-version`, in order to be consistent with other Make commands.
2026-04-15 13:25:58 +02:00
f41gh7
9908e54c00 docs: remove promscrape.dropOriginalLabels from relabeling-debug section
Follow-up for ef507d372b.

 It's no longer needed to manually set promscrape.dropOriginalLabels
 flag, since it's has False value by default.
2026-04-15 12:34:21 +02:00
Pablo (Tomas) Fernandez
2b5efec02d docs/guides: fix broken links (#10800)
Fix broken or moved links in guides.

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

(cherry picked from commit d3264bd78f)
2026-04-15 10:23:35 +02:00
hagen1778
0aaa5932c1 docs/articles: add new 3rd party article about stream aggregation
https://medium.com/airbnb-engineering/building-a-high-volume-metrics-pipeline-with-opentelemetry-and-vmagent-c714d6910b45
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 1f87faafec)
2026-04-15 10:23:34 +02:00
hagen1778
8d58021f20 docs/vmagent: move relabeling section higher
The change is needed to group splitting/sharding section of the documentation,
so they go one after another. This should improve readability.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 521b73dfc5)
2026-04-15 10:23:34 +02:00
hagen1778
5f7d592237 docs/vmagent: mention ability to filter scrape targets
The previous descrioption didn't mention that relabeling can be used
for filtering scrape targets. Adding this metion.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 61db79c10a)
2026-04-15 10:23:34 +02:00
hagen1778
fd50fdd9f9 docs/relabeling: restore links to articles about relableing internals
These links were removed in 134501bf99
without adding complete substitution to their content.

Restoring these links as they can be useful for readers to learn about relabeling.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 460ac6468c)
2026-04-15 10:23:34 +02:00
hagen1778
8ccd44c8cf docs/playgrounds: add aliases for old links
The old links were removed in #10754
mistakenly thinking that google didn't index it. However, it did. And users can get 404
when searching in google for VM plyagrounds.

Restoring the links via aliases. It means hugo will serve the `/playgrounds` page when
user requests `/playgrounds/victoriametrics/`.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit c42023c586)
2026-04-15 10:23:33 +02:00
Artem Fetishev
65efc5b4c3 apptest: sync code between branches and fix backup/restore range queries (#10799)
Fix app tests:

1. Sync code between vmsingle and vmcluster: it must be the same because
apptest does not differentiate between branches, it just runs pre-built
binaries
2. Simplify range queries in backup/restore test so that it does not
depend on the interval between samples to work correctly.

---------

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-04-14 07:19:29 +02:00
Max Kotliar
cccdb0e3a0 docs/changelog: fix unwanted release tag change
The tag v1.138.0 was unintentinally changed to v1.139.0 due to bug in
release script.

Reverting the change. The bug will be addressed separate.
2026-04-13 14:52:46 +03:00
f41gh7
dd66ac1cef docs: update flags with actual v1.140.0 binaries
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-04-13 11:34:09 +02:00
f41gh7
9660e3914c docs: bump version to v1.140.0
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-04-13 11:31:51 +02:00
f41gh7
82c25eb290 docs: mention new LTS releases
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-04-13 11:16:44 +02:00
Aliaksandr Valialkin
3ed18e9e55 app/vmauth/main.go: clarify comments for bufferedBody struct a bit
This is a follow-up for https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10677#discussion_r3064731250
2026-04-11 09:43:25 +02:00
Aliaksandr Valialkin
b6ad71a266 lib/protoparser/protoparserutil: fix encoding -> contentType in the description of the ReadUncompressedData function
This is a follow-up for the commit bed7cbd0a4
2026-04-11 09:01:51 +02:00
f41gh7
0d488a10e9 follow-up for d07c1c73d1
move bugifx into current release
2026-04-10 19:39:06 +02:00
Alexander Frolov
739c2a76cb lib/writeconcurrencylimiter: prevent deadlock at IncConcurrency
Previously (*writeconcurrencylimiter.Reader).Read() could permanently leak concurrency tokens from the -maxConcurrentInserts semaphore.
 
 Consider the following example:
* GetReader() acquires a token, then PutReader() unconditionally releases it.
* Read() calls DecConcurrency() before the underlying I/O and IncConcurrency() after it. If IncConcurrency() returns an error, Read() returns without holding a token.
* Each such failure permanently removes one slot from the concurrencyLimitCh semaphore. Slots leak one by one until the channel is fully drained, at which point DecConcurrency() blocks forever, deadlocking ingestion on vmstorage.

 This commit adds tracking for obtained tokens to the reader. Which prevents possible tokens leakage. 

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10784
2026-04-10 19:39:06 +02:00
f41gh7
890e4ab715 CHANGELOG.md: cut v1.140.0 release 2026-04-10 17:02:39 +02:00
f41gh7
bac6e3aa36 make docs-update-version 2026-04-10 17:00:21 +02:00
f41gh7
d627aaa9a4 make vmui-update 2026-04-10 17:00:21 +02:00
Hui Wang
aa03657879 app/vmalert: adopt additional rule states in the list rules API
In grafana, the alert list panel can use VictoriaMetrics as datasource
and call `/api/v1/rules` api with [specific
states](https://grafana.com/docs/grafana/latest/alerting/fundamentals/alert-rule-evaluation/nodata-and-error-states/#alert-instance-states).
See
https://play-grafana.victoriametrics.com/d/febljk0a32qyoa/3e68cf3?orgId=1&from=now-1h&to=now&timezone=browser&var-prometheus_datasource=P4169E866C3094E38&var-jaeger_datasource=P14D5514F5CCC0D1C&var-victorialogs_datasource=PD775F2863313E6C7&var-service_namespace=$__all&var-service_name=checkout&refresh=5m&editPanel=40.
Some states are already defined in vmalert, although with different
names. Others, such as "recovering", are currently undefined.
This pull request adopts all these states, rather than fail the request.

Above panel request also uses the `matcher` param to filter rules.
However,
[prometheus](https://prometheus.io/docs/prometheus/latest/querying/api/#rules)
also does not support this parameter and simply ignore it, so I don't
think vmalert needs to support it now.

JFYI, the grafana [Alerting
page](https://play-grafana.victoriametrics.com/alerting) does not
include any of the mentioned `state` or `matcher` parameters in rule
listing requests to the datasource. Filtering is handled by the Grafana
frontend, so most users are not affected by partial support for
filtering in backend products.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10778
2026-04-10 16:48:18 +02:00
Phuong Le
6e38f4493d ci: remove automatic Codecov reporting from test workflow (#10780)
This removes automatic Codecov reporting from VictoriaMetrics CI. This
change keeps local coverage generation available, but removes automatic
PR noise (such as
[this](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10625#issuecomment-4084390659))
and unnecessary CI overhead.
2026-04-10 16:48:18 +02:00
f41gh7
f7f1ddd94f docs: remove reverted commit changelog
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-04-10 16:35:34 +02:00
Aliaksandr Valialkin
71d5c7b31f Revert "app/vmauth: align request body buffering flags"
This reverts commit b3c03c023c.

Reason for revert: the original logic was correct from the user's perspective:

- The -maxRequestBodySizeToRetry command-line flag controls the size of the request body,
  which could be retried on backend failure. The meaining of this flag wasn't changed after
  the introduction of the -requestBufferSize flag in the commit e31abfc25c
  (see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10309 )

- The -requestBufferSize flag controls the size of the buffer for reading request body
  before sending sending it to the backend and before applying concurrency limits.

These flags are independent from user's perspective. The fact that these flags share the implementation,
sholdn't be known to the user - this is an implementation detail, which allows avoiding double buffering.

Both flags enable request buffering. If the user wants disabling of all the request buffering,
then both flags must be set to 0. That's why these flags are cross-mentioned in their -help descriptions.

Also the reverted commit had the following issues:

- It reduced the default value for the -requestBufferSize flag from 32KiB to 16KiB.
  The 32KiB value has been calculated and justified at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10309 .
  It shouldn't increase vmagent memory usage too much for typical workloads.
  For example, if vmagent handles 10K concurrent requests, then the memory overhead for the request buffering
  will be 10K*32KiB=320MiB. This is a small price for being able to efficiently handling 10K concurrent requests.

- It added a dot to the end of the https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering link
  in the description for the description of the -requestBufferSize flag. This breaks clicking the link in some environments,
  since the trailing dot is considered as a part of the url.

- It added a superflouous whitespace in front of the 'Disabling request buffering' text inside the description
  for the -requstBufferSize flag.

- It introduced an unnecessary complexity to the user by mentioning that the zero value
  at -maxBufferSize disables buffering for request reties (these things must be independent
  from the user's perspective).

- It changed the bufferedBody logic in non-trivial ways, which aren't related to the original issue.
  If these changes are needed, then they must be justified in a separate issue and must be prepared
  in a separate pull request / commit.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10675
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10677
2026-04-10 15:56:22 +02:00
Aliaksandr Valialkin
30fc6e3fd6 docs/victoriametrics/Articles.md: add https://medium.com/airbnb-engineering/building-a-high-volume-metrics-pipeline-with-opentelemetry-and-vmagent-c714d6910b45 2026-04-10 13:29:17 +02:00
Max Kotliar
49d3582be5 docs/changelog: add thank you for bugfix contribution 2026-04-10 13:08:42 +03:00
Max Kotliar
ba9c24723a docs/changelog: add thank you for the contribution 2026-04-10 13:07:18 +03:00
Noureldin
f04851dbd9 lib/storage: fixes data race at startFreeDiskSpaceWatcher
Previously, Storage.table was initialized after startFreeDiskSpaceWatcher was called.
This created a potential data race condition: if openTable took a long time to complete
and freed disk space during that window, the free disk space watcher could read an
uninitialized (or partially initialized) Storage.table, leading to an invalid memory
address or nil pointer dereference panic.

This commit properly initializes s.isReadOnly state during storage start and
starts FreeDiskSpaceWatcher after openTable.

Bug was introduced in github.com/VictoriaMetrics/VictoriaMetrics/commit/27b958ba8bc66578206ddac26ccf47b2cc3e8101

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10747
2026-04-10 08:37:01 +02:00
Hui Wang
faf4fd240c app/vmalert: align group evaluation time with the eval_offset option
Align group evaluation time with the `eval_offset` option to allow users
to manage group execution more effectively by understanding the exact
time each group will be scheduled, particularly in cases of spreading
rule execution within a window, chaining groups, or debugging data delay
issue.

If the group evaluation takes less than the group interval, but the
initial evaluation combined with the additional restore operation
exceeds the group interval, the evaluation time will be gradually
corrected in subsequent evaluations, as the interval ticker schedule
remains unchanged.

For groups without `eval_offset`, this change also ensures that all
evaluations follow the interval. Previously, the gap between the first
and second evaluations was larger than the interval. And the
`eval_delay` continues to help prevent partial responses.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10772.
2026-04-10 08:37:00 +02:00
Aliaksandr Valialkin
7653b89442 docs/victoriametrics/vmauth.md: fix link to concurrency limiting chapter
The correct link must be https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting
instead of https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limits

The incorrect link has been introduced in the commit e31abfc25c
2026-04-09 19:38:04 +02:00
Max Kotliar
b6c5ddef50 lib/{osinfo,appmetrics}: Move vm_os_info metric code to lib/appmetrics package (#10776)
Follow-up commit for
211fb08028

Address @f41gh7 review comments:
- Move code from `lib/osinfo` to `lib/appmetrics`.
- Make the logic private.
- Use metrics.WriteGaugeUint64 func.
- Remove registration logic from `app/xxx/main.go`.
- Remove `lib/osinfo` package.
2026-04-09 18:35:24 +03:00
Artem Fetishev
b13ceceb1c lib/storage: reuse nextDayMetricIDs during the first hour of the day (#10704)
At 00:00 UTC the ingested samples start to have timestamps for the new
day (in the ingested samples are always recent). Even though there was a
next-day prefill of the per-day index during the last hour of the day,
some performance degradation is still possible.

For example, in https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10698
it is manifested as `vminsert-to-vmstorage connection saturation` peaks
right after midnight.

Possible hypothesis why this is happening. At midnight,
currHourMetricIDs is empty and prevHourMetricIDs cannot be used because
it holds metricIDs for the previous day. So the ingestion logic hits
dateMetricIDsCache which may not have the metricID in its read-only
buffer and therefore should aquire lock to check its prev read-only
buffer or read-write buffer. Which creates lock contention and therefore
raises ingestion request latency.

A solution to this could be re-using the nextDayMetricIDs during the
first hour of the day. During this time, it is equivalent to
currHourMetricIDs.

---------

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
Signed-off-by: Artem Fetishev <149964189+rtm0@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-04-09 16:41:41 +02:00
Max Kotliar
ce9e0520fc apptest: Improve TestSingleVMAgentDropOnOverload stability (#10774)
Previosly the test could fail on resource constraint runners because
remoteWrite retry happens before the assertion in:

```
    waitFor(
        func() bool {
            return vmagent.RemoteWriteRequests(t, url1) == 1 &&
vmagent.RemoteWriteRequests(t, url2) == 1
        },
    )
```

Because of retry the metric jumps to two and assert never satisfied.

The commit explisitly postpones retries so there is no race condition.

Failed  CI job:

https://github.com/VictoriaMetrics/VictoriaMetrics/actions/runs/24186679213/job/70593055140

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10774

<img width="1157" height="879" alt="Screenshot 2026-04-09 at 15 30 33"
src="https://github.com/user-attachments/assets/e170ae12-cf79-4501-a57b-fbd3612d31a0"
/>
2026-04-09 16:57:57 +03:00
Max Kotliar
1217939531 docs/changelog: cleanup follow-up on e1a9901654
e1a9901654
2026-04-09 15:05:16 +03:00
Max Kotliar
ea8f410fbb docs/changelog: cleanup. follow-up on 211fb08028 commit
211fb08028
2026-04-09 15:01:59 +03:00
JAYICE
d20e296816 introduce os kernel version information metric (#10746)
The commit introduces the `vm_os_info` metric, which is exposed by all VM binaries by default. It provides visibility into the operating system version on which VictoriaMetrics is running, helping with troubleshooting environment-specific issues, like known kernel or fs bugs.

FIxes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10746

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-09 14:48:26 +03:00
Yury Moladau
1c87afeb08 app/vmui: generate CSV format using /api/v1/labels (#10771)
`Export query` button on `Raw Query` tab now fetches labels of executed query and composes export `format` based on that list of labels. It ensures that all query response labels are preserved in the CSV export. 

Also, commit removes the addition of the CSV header in the frontend. Now the header is added by the backend (see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10706).

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10667
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10771
Duplicate of: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10737

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: lawrence3699 <lawrence3699@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-09 14:18:19 +03:00
andriibeee
6ae0ca6f86 vmselect: add CSV header support for export/import (#10706)
Export (/api/v1/export/csv) now always writes a header row matching the requested format fields. Examples:

```
  # format=__timestamp__:unix_ms,__value__,job,instance
  __timestamp__:unix_ms,__value__,job,instance
  1704067200000,42.5,node,localhost:9090
```

Import (/api/v1/import/csv) gains auto-detection logic: the first row is skipped if any timestamp column fails timestamp parsing or any metric value column fails float parsing. If the first row is not detected as headers, it is parsed as data. This makes the import backward compatible. 

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10666
PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10706

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-09 14:01:01 +03:00
dependabot[bot]
1406b3caac build(deps): bump vite from 8.0.2 to 8.0.7 in /app/vmui/packages/vmui (#10761)
Bumps [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) from 8.0.2 to 8.0.7.

https://github.com/vitejs/vite/blob/v8.0.7/packages/vite/CHANGELOG.md

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-09 13:16:24 +03:00
Pablo (Tomas) Fernandez
7ec94d1d63 docs: udpate playground page (#10754)
This change reverts part of the changes in
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10686

Motivation: docs added https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10686 in most cases are too verbose, ai-generated and bringing low practical sense.

The improvement goal: remove bloat from the docs and keep them practical and useful.

What it does:
- Completely removes items from the sidebar
- Moves the content of the most important playground pages to the
`/playground/` stub (README.md). Use H2s for each playground.
- Updates and cleans the text.
- Removes the individual children pages in the playground category (keep
only the `/playgrounds/` page/stub and remove the children).
- Removes items as these don't really need much introduction or aren't
playgrounds:
  - log to logsql: a conversion tool
  - sql to logsql: same
- adds Grafana playground section

Links of child pages will become invalid. We don't preserve them as this is pretty new doc (1w on prod) and is unlikely to have already persisted links somewhere.

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit cd3d297a3d)
2026-04-09 12:10:36 +02:00
f41gh7
0f36c2ba91 follow-up for 72c9e9377c
Move changelog entry to the upcoming release section
2026-04-09 11:39:16 +02:00
Hui Wang
0a0b2a50bd app/vmalert: expose remotewrite queue_size metrics
This commit adds new metrics `vmalert_remotewrite_queue_capacity` and `vmalert_remotewrite_queue_size`, which is updated with each push and it's
frequency depends on `-remoteWrite.concurrency`,
`remoteWrite.flushInterval`

It doesn't account for the pending data within each pushers request, it
should provide a general indication of the queue usage.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10765
2026-04-09 11:23:09 +02:00
andriibeee
4d3c30483c lib/awsapi: add support for named AWS profile to ec2_sd_config
Add support for named AWS profiles in ec2_sd_config, matching Prometheus behavior.

Example:

```text
~/.aws/config:
[profile account-one]
source_profile = root
role_arn = arn:aws:iam::000000000001:role/prometheus
```

```yaml
scrape config:
- job: ec2
  ec2_sd_configs:
    - profile: account-one
```

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1685
2026-04-09 11:23:08 +02:00
f41gh7
43481ae63b vendor: run go get -u ./lib/...
go get -u ./app/...
go mod tidy -compat=1.26
go mod vendor
2026-04-09 09:34:38 +02:00
Artem Fetishev
318416db8a lib/storage: refactor storage synctests
Exctract repeated code from nextDayMetricIDs synctests into separate
funcs to make the code more readable.

The change was originally introduced in
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10704 and was
extracted into a separate PR to keep the original change simple.
2026-04-09 09:11:42 +02:00
Zhu Jiekun
ceda0407fb app/vminsert: opitimise per insert request memory buffer size
Previously, vminsert did not account for the ingest concurrency limit in buffer size calculation.
This could lead to excessively large buffers and OOM errors when the concurrency limit was reached.

 This commit fixes buffer size calculation by separating `insertCtx` and `storageNode` buffer size limits.

`storageNode` buffer size is set to a larger value, as it is allocated per configured `-storageNode`
and is independent of the concurrency limit.

`insertCtx` buffer size now accounts for the configured concurrency limit
and calculates the maximum buffer size accordingly.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10725
2026-04-09 08:47:18 +02:00
JAYICE
514540c9b7 app/vmselect: disable partial responses for cluster native requests
Previously, vmselect in cluster-native mode could return partial responses to upstream vmselect.
Since upstream vmselect expects full responses (mimicking vmstorage behavior),
partial responses must be disabled in cluster-native mode.
This prevents incomplete responses from being cached at the upstream vmselect level.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10678
2026-04-09 08:37:50 +02:00
0e4ef622
573ae368cb docs/victoriametrics/stream-aggregation: fix rate_sum link (#10756)
### Describe Your Changes

https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8349 updated the recommendation for histogram aggregation from `total` to `rate_sum`, but missed one of the links.

PR: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10756

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-04-08 13:10:36 +03:00
Zakhar Bessarab
9eb1e02171 app/vmagent/remotewrite: automatically set series limits to MaxInt32 when setting value to -1 (#9614)
Automatically set daily and hourly series limits to `MaxInt32` when `remoteWrite.maxHourlySeries` or `remoteWrite.maxDailySeries` is set to `-1`.

This change addresses a usability issue with the cardinality limiter. Users may want to enable the limiter to observe its metrics before deciding on an appropriate limit. However, the underlying bloom filter only supports `int32`, so setting large values can lead to overflow.

With this PR:
* Setting either flag to `-1` is treated as “no practical limit” and internally mapped to `math.MaxInt32`
* Values exceeding `int32` are safely clamped to `MaxInt32` to prevent overflow

This allows users to enable the limiter for estimation purposes without risking invalid configurations or runtime issues.

https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9614

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-04-08 13:03:34 +03:00
Max Kotliar
26568f00eb deployment/docker: update Go builder from Go1.26.1 to Go1.26.2
See
https://github.com/golang/go/issues?q=milestone%3AGo1.26.2%20label%3ACherryPickApproved
2026-04-08 12:44:30 +03:00
JAYICE
a8c5db6b09 lib/promscape: update last scrape result only when current scrape is successful
Previously, last scrape result was unconditionally update, despite possible scrape error.

The commit updates last scrape result only at successful scrape. It properly accounts `scrape_series_added` metric and aligns it with the same metric in Prometheus.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10653
2026-04-06 17:17:52 +02:00
Nikolay
f261a4a453 app/vmauth: align request body buffering flags
Previously introduced flag `requestBufferSize` raised default value for
in-memory buffer from 16KB to 32KB. It could increase memory usage for
vmauth. Also it made unclean how to actually disable requests buffering.

 This commit aligns flags value to the 16KB. And disables requests
buffering if any of flags value are 0 as mentioned at flags description.
If any of flags have non-default value, those value are used as max size
for request buffer. If both flags are modified - bigger value wins.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10675
2026-04-06 09:53:34 +02:00
Hui Wang
99ec1f0da7 app/vmalert: add random jitter to concurrent periodical flushers targeting the remote write destination
I expect the change to help in two ways:
1. Spreading remote write flushes over the flush interval to avoid
congestion at the remote write destination;
2. Enhance queue data consumption. Currently, all flushers may always
flush data simultaneously, resulting in periods where no flushers are
consuming data from the queue, which increases the risk of reaching the
queue limit `remoteWrite.maxQueueSize` even when a increased
`remoteWrite.concurrency`. By making the flushers more dispersed, it is
more likely that some flushers are consistently consuming data from the
queue, which should make queue management easier.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10729/
2026-04-06 09:53:34 +02:00
Hui Wang
dba05bd4b6 app/vmalert: expose new histograms to provide better visibility into remote write request sizes
The new histograms should help with debugging whether remote write
pushes are efficient(pushes can be underutilized due to small flush
interval), like in
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10693 and
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10536. This
enhanced visibility will allow related parameters such as
`-remoteWrite.maxBatchSize`, `-remoteWrite.maxQueueSize`,
`-remoteWrite.flushInterval` to be tuned accordingly.

Eventually, `vmalert_remotewrite_sent_rows_total`
and `vmalert_remotewrite_sent_bytes_total` could be deprecated, but it's also fine to leave
them as they are since they're small counters.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10727
2026-04-06 09:53:33 +02:00
sias32
460267103f dashboards: improvement alert statistics (#10571)
Changes:

- Added the number of `pending alerts` and `firing alerts`
- Improvement `transormations` for panel - FIRING over time by group and rules
- Added sort for panel - FIRING over time by rule

Signed-off-by: sias32 <sias.32@yandex.ru>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-03 21:30:06 +03:00
Max Kotliar
a54072d054 docs/guide: fix free space calculation factor in capacity planning formula
Replace 1.2 multiplier with 1.25 in disk space estimation formula.

1.2 only provides ~16.7% free space, while the docs recommend keeping
20%. Using 1.25 correctly accounts for 20% free space.

Inspired by
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10394
2026-04-03 21:20:36 +03:00
Zhu Jiekun
a0c11a7ca4 vendor: update metrics package with fix unsupported metric type for summary (#10745)
Fix `unsupported` metric type display in exposed metric metadata for
summaries and quantiles by bumping `metrics` SDK version.

This `unsupported` type exists when a summary is not updated within a
certain time window. See https://github.com/VictoriaMetrics/metrics/issues/120 and pull
request https://github.com/VictoriaMetrics/metrics/pull/121 for details.

Signed-off-by: Zhu Jiekun <jiekun@victoriametrics.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-04-03 16:07:03 +03:00
Max Kotliar
2d6f85ac69 .github: Set 21-day cooldown to dependabot updates (#10740)
Recent supply chain attacks on GitHub Actions and npm packages show the
risk of pulling dependency updates too quickly:
-
https://socket.dev/blog/trivy-under-attack-again-github-actions-compromise
-
https://www.stepsecurity.io/blog/axios-compromised-on-npm-malicious-versions-drop-remote-access-trojan
2026-04-03 15:51:33 +03:00
Evgeny
36c0adb7b8 app/vmagent: add per-URL -remoteWrite.disableMetadata
Add per-URL `-remoteWrite.disableMetadata` flag to control metadata
sending for each remote storage independently.

After v1.137.0 enabled `-enableMetadata` by default, metadata is sent to
ALL remote write targets, even those with relabeling filters that drop
most metrics. This causes unnecessary growth in
`vmagent_remotewrite_requests_total`. and significant increase in
network load for heavy filtered remote write destinations.
2026-04-03 10:33:26 +02:00
Roman Khavronenko
c9f18bd543 app/vmauth: mention that vmauth can be used with other components
A cosmetic change to highlight that vmauth can be used with other
compnents besides VM only
2026-04-03 10:33:26 +02:00
JAYICE
b222ba6a78 lib/backup/s3: retry the requests that failed with unexpected EOF
When the network between client and s3 server is unstable, the client may encounter temporary io.EOF errors when reading the response from s3 server.
Currently, the s3 sdk in vmbackup uses the default retry policy. However, this default retry policy won't retry when s3 sdk meet unexpected EOF. This means that the temporary unexpected EOF error will cause the backup task to fail.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10699
2026-04-03 10:33:25 +02:00
Fred Navruzov
1109fe0d97 docs/vmanomaly: v1.29.2 (#10741)
update docs to vmanomaly v1.29.2 release

Signed-off-by: Fred Navruzov <fred-navruzov@users.noreply.github.com>
2026-04-02 22:03:02 +03:00
Roman Khavronenko
78a6f18593 docs: mention https://victoriametrics.com/blog/victoriametrics-remote-write/ (#10726)
Add link to blogpost with detailed information about zstd+rw protocol.
This PR is based on question in community channel about implementation
details.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-04-02 16:30:29 +03:00
Max Kotliar
78ebf28523 docs/changelog: add thank for contribution 2026-04-02 15:58:37 +03:00
Max Kotliar
925916c661 vendor: update https://github.com/VictoriaMetrics/metricsql from v0.85.0 to v0.86.0
It contains https://github.com/VictoriaMetrics/metricsql/pull/63 that
reduce number of parentheses added.

It should improve prettify functinality in vmui
2026-04-02 15:42:18 +03:00
Aliaksandr Valialkin
33e9362aa0 vendor: update github.com/VictoriaMetrics/metrics from v1.42.0 to v1.43.0 2026-04-02 14:18:45 +02:00
Dmytro Kozlov
569bd1fd7b vmctl: add thanos migration mode (#10659)
Implemented dedicated thanos migration mode for vmctl to migrate data from Thanos installations to VictoriaMetrics.

Key features:
1. Raw and downsampled blocks support: Reads both raw blocks
(resolution=0) and downsampled blocks (5m/1h resolution) directly from
Thanos snapshots
2. All aggregate types: Imports count, sum, min, max, and counter
aggregates from downsampled blocks as separate metrics with resolution
and type suffixes (e.g., metric_name:5m:count)
3. Dedicated flags: Uses `--thanos-*` prefixed flags (--thanos-snapshot,
--thanos-concurrency, --thanos-filter-time-start,
--thanos-filter-time-end, --thanos-filter-label,
--thanos-filter-label-value, --thanos-aggr-types)
4. Selective aggregate import: Use `--thanos-aggr-types` to import only
specific aggregates

Usage:
```
vmctl thanos --thanos-snapshot /path/to/thanos-data --vm-addr http://victoria-metrics:8428
```

Closes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9262

Signed-off-by: Dmytro Kozlov <d.kozlov@victoriametrics.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
Co-authored-by: Max Kotliar <kotlyar.maksim@gmail.com>
2026-04-02 14:53:24 +03:00
Aliaksandr Valialkin
50f39cb9aa docs/victoriametrics/changelog/CHANGELOG.md: add a description for the change in the commit dd2d6807e4 2026-04-02 13:18:56 +02:00
Mehrdad Banikian
9b4c75c56c Add split phase metrics for filestream fsync operations (#10493)
## Summary

This PR implements split phase metrics for filestream operations as
requested in #10432.

### Changes

- Added `vm_filestream_fsync_duration_seconds_total` metric to track
fsync syscall duration separately
- Added `vm_filestream_fsync_calls_total` metric to count fsync calls
- Added `vm_filestream_write_syscall_duration_seconds_total` metric to
track write syscall duration (previously mixed with flush time)
- Refactored `MustClose()` and `MustFlush()` to use new `flush()` and
`sync()` helper methods
- Kept `vm_filestream_write_duration_seconds_total` for backward
compatibility

### Problem Solved

Previously, `vm_filestream_write_duration_seconds_total` was being
incremented in two places:
1. `statWriter.Write()` - triggered by `bw.Flush()` and `bw.Write()`
2. `Writer.MustFlush()` - which included the above process, leading to
double-counting

This made it impossible to distinguish between write syscall time and
fsync time, which is critical for diagnosing storage latency issues.

### Solution

The new metrics allow users to:
- Distinguish "flush got slower" vs "fsync got slower" using metrics
only
- No file path labels (bounded cardinality)
- No double-counting between metrics

### Testing

- Code compiles successfully
- All existing metrics are preserved for backward compatibility

Closes #10432

---------

Signed-off-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
Signed-off-by: Aliaksandr Valialkin <valyala@gmail.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@gmail.com>
2026-04-02 13:18:55 +02:00
Aliaksandr Valialkin
b354cba896 app/vmagent/remotewrite: improve the readability of the parseRetryAfterHeader() function a bit
- Use shorter name for its' arg: retryAfterString -> s. This is OK to do because the function is small enough,
so it is easier to read 's' instead of 'retryAfterString' in multiple places of the function.

- Remove the name for the returned value - retryAfterDuration, since it only confuses the reader.

This is a follow-up for the commit 5319acb8ed , which introduced this function.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6124
2026-04-02 12:56:58 +02:00
Vadim Alekseev
2c670618ce lib/timeutil: introduce backoff timer struct (#10714)
### Describe Your Changes

I noticed that the backoff timer logic is repeated across multiple
packages. I've implemented a universal wrapper to avoid duplicating this
logic. This structure is already [actively
used](2aa0ea10bb/app/vlagent/kubernetescollector/backoff_timer.go (L11))
for the Kubernetes Collector in vlagent and can be reused in vlagent's
remotewrite. I've also included a usage example in this PR so you can
evaluate its utility.

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-04-02 12:32:24 +02:00
Aliaksandr Valialkin
3cf2b33ee0 docs/victoriametrics/Articles.md: add https://mirastacklabs.ai/blog/chunk-split-caching/ 2026-04-01 22:42:23 +02:00
Aliaksandr Valialkin
734234a47e lib/storage: remove MetricNamesStatsResponse and MetricNamesStatsRecord types
These types hide public types from lib/storage/metricnamestats package.
These types do not resolve any practical issues. Instead, they add a level of indirection,
which complicates reading and understanding the code.

These types were introduced in the commit 795d3fe722
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6145
2026-04-01 22:37:43 +02:00
Aliaksandr Valialkin
05766308c6 apptest: test apps with the enabled built-in race detector in order to be able to catch data races 2026-04-01 22:16:00 +02:00
Artem Fetishev
c9aa974322 docs: bump version to v1.139.0
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-04-01 15:22:10 +02:00
Artem Fetishev
6b22bdb9f3 deplyoment/docker: bump version to v1.139.0
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-04-01 15:19:09 +02:00
Artem Fetishev
916087eca9 docs: forward port LTS v1.122.18 changelog to upstream
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-04-01 14:24:25 +02:00
Artem Fetishev
bd78ef97ca docs: forward port LTS v1.136.3 changelog to upstream
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-04-01 14:23:17 +02:00
Artem Fetishev
b33a91a64b docs: fix changelog
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-04-01 13:20:47 +02:00
Pablo (Tomas) Fernandez
87088a1c80 docs/guides: Add new guide "Set up Datasource-Managed Alerts with vmalert and Grafana" (#10691)
Create a guide to use datasource-managed alerts in Grafana

See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10528

Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Mathias Palmersheim <mathias@victoriametrics.com>
2026-03-31 18:57:35 +03:00
Jose Gómez-Sellés
e3735331ab docs: raise cloud awareness in docs (#10716)
### Describe Your Changes

Some users may not know that VictoriaMetrics Cloud provides relevant
features to manage workloads. This change add notes in relevant places
in which users may find that a managed solution is what they need.

The intention is not to push users to Cloud, but giving the information.
That's why it's always phrased like: "If you don't want to do X, Cloud
can do it for you", instead of "Start for free, etc". This is an Open
Source first project, and shall remain as such.

After this gets proper review, VictoriaLogs and other repos may follow.

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Jose Gómez-Sellés <14234281+jgomezselles@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-31 18:51:41 +03:00
Max Kotliar
6b8ea1eedc docs: bump lts tags 2026-03-28 15:50:06 +02:00
Artem Fetishev
26078b9777 docs/changelog: cut release v1.139.0
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-27 11:28:35 +01:00
Artem Fetishev
e33a49e04b docs: update version to v1.139.0
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-27 11:27:38 +01:00
Artem Fetishev
37caf636cd app/vmselect: run make vmui-update
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-27 11:18:29 +01:00
Nikolay
f6cef6e6be lib/fs: restore async deletion of NFS folders
Commit 83da33d8cf
 removed NFS directory delete retries. It was made on assumption, that
 only directory rename could cause such issues. However, both rename and
unlink uses the same "silly rename" logic
https://linux-nfs.org/wiki/index.php/Server-side_silly_rename
 and linux kernel - `fs/nfs/dir.c` `nfs_unlink` and  `nfs_rename`.

 And NFS client may treat file still open, even if it
was properly closed by application. Most probably it could be triggered, because VictoriaMetrics may
open the same file multiple times ( data read and background merges).

There is no issue with VictoriaMetrics itself, it properly closes files. But NFS-client may have delays
or cache metadata information for the files. So it could trigger silly rename behavior.

 This commit restores original behavior with deletion retries and brings
 back metrics for unsuccessful delete operations.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9842
2026-03-27 09:53:01 +01:00
dependabot[bot]
71c5ded0f5 build(deps): bump codecov/codecov-action from 5 to 6 (#10709)
Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 5 to 6.

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-27 07:43:42 +02:00
Benjamin Nichols-Farquhar
142704d382 lib/backup: speed up restores on linuxsystems (#10661)
Related to https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10680

We noticed that backup restores in our environment were much slower than
the hardware/bandwidth constraints would suggest and we traced this down
to a couple of bottlenecks. This PR attempts to address all of them.

#### Lack of pre-allocation of files, 

This was causing writes far into files to be quite slow as new blocks
needed to be continually allocated. This was particularly bad on ext4
for us, but will likely be applicable to most disks and filesystems,
you'll see the impl here is linux specific but this is mostly because I
don't have a test env for any other platform and didn't want to blindly
make changes without a validation env.

This comes with the downside of no longer being to to resume a restore
mid file, and requiring the re-downloading of parts already in the file
size the file will appear at full size from the very start. This is I
think _generally_ a good tradeoff for the restore speed gains, it is
definitely a tradeoff so I've included a flag to disable the
pre-allocation behavior and fall back to the existing part diffing
logic.

#### Fsync after each part

With many small parts in relatively few files, or in high concurrency
setups the the writerCloser fsync on each part(actually double fsync
since both `filestream.Writer.mustFlush` and
`filestream.Writer.mustClose` both fsync). Was causing slowdowns since
we would be continually queuing fsyncs.

With the pre-allocation pattern the file is only "ready" once re-named
so I moved to a per file fsync after rename.

#### Concurrent read/write 

The previous download pattern was to do a read from the remoteFs, with
whatever latency that entailed, then sequentially do a write, again with
whatever latency that entailed. This meant that throughput was limited
to `readLatency + writeLatency * blockSize`.

Similar to how `crossTypeCopy` is implemented in the backup process we
can instead use `io.pipe` to allow two goroutines to work in parallel
with a small buffer between them.

#### Pagecache avoidance 

`filestream.Writer` does quite a lot to avoid polluting the page cache,
but this is not relevent in a restore context and with large sequential
block writes its much more effecient to let the OS flush the pagecache
whenever it wants rather than doing a bunch of small buffer syscalls to
flush blocks.

Therefore this switches over to a much simplier directWriterCloser that
does direct file IO and lets the OS handle flushes while mid write.

### Performance 

Before the changes we were seeing writes speeds of only 100MBps, this
was a restore from EBS volumes, ext with 1GB/s throughput with
<img width="1613" height="586" alt="Screenshot 2026-03-16 at 1 29 46 PM"
src="https://github.com/user-attachments/assets/5d54dcb7-cb59-43e0-9247-fda8c70feb2f"
/>


After these changes in the same restore env we're seeing 600MBs flat
rates.
<img width="1611" height="471" alt="Screenshot 2026-03-16 at 1 31 33 PM"
src="https://github.com/user-attachments/assets/ea8e2eb7-533a-48fa-99e0-0b38286e5572"
/>

Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-27 07:36:03 +02:00
Fred Navruzov
c7065b960c docs/vmanomaly: fix typos in min rel dev param (#10708)
Fix a typo in the changed example for minimal relative deviation
2026-03-27 07:24:49 +02:00
Fred Navruzov
cbba08b25f docs/vmanomaly-release-v1.29.1 (#10707)
some of the docs not included in v1.29.1 docs' release

Signed-off-by: Fred Navruzov <fred-navruzov@users.noreply.github.com>
2026-03-26 22:32:57 +02:00
Max Kotliar
4aa212feb5 .github: check commit signature for both GPG and SSH 2026-03-26 19:37:56 +02:00
Artem Fetishev
ff0e43786c lib/lrucache: remove shards (#10697)
Remove shards as they only complicate things when the number of requests
per second is in the range of thousands.

Related to #10532.

---------

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-26 16:31:51 +01:00
Phuong Le
12ae6fd596 README.md: fix wrong links for Docker and Slack badges (#10705)
### Describe Your Changes

Clicking the Docker and Slack badges redirects to an intermediate page
instead of taking users directly to the intended sites. This change
fixes those links.

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-26 12:05:22 +02:00
Hui Wang
7bf71f3817 lib/protoparser/opentelemetry: support ExponentialHistogram negative buckets (#10669)
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9896#issuecomment-4037424586.
Histogram-related functions such as histogram_quantile() and the VMUI
heatmap also work with negative bucket values.

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-26 11:57:43 +02:00
Max Kotliar
9232dd5be3 docs/changelog: mention external contributor 2026-03-26 11:54:57 +02:00
andriibeee
a66bbb85ae lib/jwt: support array claim values in match_claims
This commit allows to perform JWT claim matching over 1 dimension arrays. It could
be useful from practical standpoint. Because permissions are usually assigned as a list of values.

  For example, the following config allows admin access over list of assigned roles for user:

```yaml
 match_claims:
   access.roles: "admin"
```

JWT token:
```json
 {
  "access": {
    "roles": [
      "read",
      "write",
      "admin"
   ]
 }
}
```

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10647
2026-03-26 10:24:58 +01:00
andriibeee
b8143b59fc lib/promauth: make username optional in basic_auth section
RFC-7617 allows empty password/username. Moreover, from RFC standpoint both empty values are valid as well. It should be just encoded as `:`. So this commit relaxes non-empty username restriction.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6956
2026-03-26 10:24:57 +01:00
Yury Moladau
16a7c08fdb app/vmui: update dependencies to latest compatible versions (#10696)
update dependencies to latest compatible versions

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
2026-03-25 19:35:14 +02:00
Fred Navruzov
856c0ddc6f docs/vmanomaly: release v1.29.1 (#10703)
### Describe Your Changes

vmanomaly docs upgrade to v1.29.1 (including AI assistance providers and
respective section rework on UI page)

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-03-25 19:31:38 +02:00
Ty Sarna
4d3298d3c6 lib/protoparser/prometheus: add support for OpenMetrics-specific metric types (#10689)
- Adds `info`, `gaugehistogram`, `stateset`, and `unknown` as recognized
metric type names in the Prometheus/OpenMetrics text format parser.
- Previously these valid
[OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md)
types hit the `default` case and emitted an `error`-level log on every
scrape, flooding logs and continuously triggering the `TooManyLogs`
alert.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10685

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-24 15:34:54 +02:00
Pablo (Tomas) Fernandez
26991514fa docs: add playgrounds category (#10686)
Add a new Playgrounds category to the sidebar. Each VictoriaMetrics playground is represented in a separate file.

Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Max Kotliar <kotlyar.maksim@gmail.com>
2026-03-24 15:25:26 +02:00
Max Kotliar
d773190c49 app/vmselect: enforce datasource_type=prometheus when proxying alert requests (#10668)
Grafana currently supports only Prometheus-style alerts. If other alert types
(e.g. logs or traces) are returned, it may fail with "Error loading alerts".

Grafana queries the vmalert API directly, bypassing the VictoriaMetrics datasource, so query params (such as datasource_type) cannot be enforced on the Grafana side.

To ensure compatibility, we detect Grafana requests via the User-Agent and enforce `datasource_type=prometheus`.

See:
- https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/329#issuecomment-3847585443
- https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/59
2026-03-24 14:52:37 +02:00
Artem Fetishev
de91b8370e lib/lrucache: sizeBytes should also include key length (#10679)
There are cases then the key sizeBytes is much greater than the value
sizeBytes. Therefore it is important to include the key sizeBytes into
the total.

Also fix some code comments.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-24 12:58:11 +01:00
Artem Fetishev
e12b2dca5f lib/storage: Improve indexDB error messages (#10684)
Fixes: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9499

---------

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
Signed-off-by: Nikolay <nik@victoriametrics.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-24 12:29:32 +01:00
dependabot[bot]
ac13e7cc6c build(deps-dev): bump flatted from 3.3.3 to 3.4.2 in /app/vmui/packages/vmui (#10688)
Bumps [flatted](https://github.com/WebReflection/flatted) from 3.3.3 to
3.4.2.
<details>
<summary>Commits</summary>
<ul>
<li><a
href="3bf09091c3"><code>3bf0909</code></a>
3.4.2</li>
<li><a
href="885ddcc33c"><code>885ddcc</code></a>
fix CWE-1321</li>
<li><a
href="0bdba705d1"><code>0bdba70</code></a>
added flatted-view to the benchmark</li>
<li><a
href="2a02dce7c6"><code>2a02dce</code></a>
3.4.1</li>
<li><a
href="fba4e8f2e1"><code>fba4e8f</code></a>
Merge pull request <a
href="https://redirect.github.com/WebReflection/flatted/issues/89">#89</a>
from WebReflection/python-fix</li>
<li><a
href="5fe86485e6"><code>5fe8648</code></a>
added &quot;when in Rome&quot; also a test for PHP</li>
<li><a
href="53517adbef"><code>53517ad</code></a>
some minor improvement</li>
<li><a
href="b3e2a0c387"><code>b3e2a0c</code></a>
Fixing recursion issue in Python too</li>
<li><a
href="c4b46dbcbf"><code>c4b46db</code></a>
Add SECURITY.md for security policy and reporting</li>
<li><a
href="f86d071e0f"><code>f86d071</code></a>
Create dependabot.yml for version updates</li>
<li>Additional commits viewable in <a
href="https://github.com/WebReflection/flatted/compare/v3.3.3...v3.4.2">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=flatted&package-manager=npm_and_yarn&previous-version=3.3.3&new-version=3.4.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/VictoriaMetrics/VictoriaMetrics/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-24 12:39:15 +02:00
dependabot[bot]
6911e85a01 build(deps): bump google.golang.org/grpc from 1.79.1 to 1.79.3 (#10674)
Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.79.1 to 1.79.3.

See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10674

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-24 11:37:37 +02:00
dependabot[bot]
ba9adc3967 build(deps): bump undici from 7.20.0 to 7.24.4 in /app/vmui/packages/vmui (#10673)
Bumps [undici](https://github.com/nodejs/undici) from 7.20.0 to 7.24.4.

See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10673

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-24 11:36:13 +02:00
Aliaksandr Valialkin
7c5e1df9f2 docs/victoriametrics/Articles.md: add https://www.infoq.com/news/2026/03/self-hosted-observability/ 2026-03-20 18:39:17 +01:00
andriibeee
8b22e1aed9 lib/netutil: warn when IPv6 listen address is used without -enableTCP6 (#10640)
### Describe Your Changes

Fixes #6858

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: andriibeee <154226341+andriibeee@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-18 21:02:17 +02:00
andriibeee
a58510c06a lib/jwt: fail on unsupported alg when use=sig, skip non-sig JWKS keys (#10664)
### Describe Your Changes

Fixes #10663

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-18 20:41:21 +02:00
hagen1778
e5c337e6e9 docs/articles: add https://setevoy.medium.com/freebsd-monitoring-with-victoriametrics-and-grafana-f789904f2628
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit fd0d764720)
2026-03-18 16:04:45 +01:00
Roman Khavronenko
40a821f740 docs: add unique identifier to FAQ page (#10671)
Due to a conflict with VL FAQ page identifier,
VM FAQ page stopped rendering.

This change adds unique identifier to VM FAQ page and fixes the issue.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit fe8aaa8885)
2026-03-18 15:32:41 +01:00
hagen1778
68e8717a4d docs/data-ingestion: fix typo in OtelCollector
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit b903fc29ec)
2026-03-18 15:32:41 +01:00
hagen1778
00b1ca9628 dashboards/metrics-explorer: properly reference datasource variable
Before, by mistake, datasource was referenced by input name instead
of variable name. For an unknown reason, it worked well in local setup
and on playground.

This fix is confirmed by users and continues working at local setup
and playground.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit a6833ffd08)
2026-03-18 15:32:41 +01:00
Andrii Chubatiuk
c472459922 app/vmalert: add group_limit and page_num for pagination and search for search at /api/v1/rules (#10046)
### Describe Your Changes

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9580

inspired by https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9057

improve https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10005

added changes to support pagination in VMUI alerting tab:
- added pagination panel
<img width="1431" height="197" alt="image"
src="https://github.com/user-attachments/assets/17b2c4e1-06b7-4345-8ccc-008637edd4e0"
/>
- added navigation from group modal to rule and from child modals to
group as a replacement for anchors navigation, which became impossible
after introduction of pagination
<img width="1264" height="599" alt="image"
src="https://github.com/user-attachments/assets/a803347f-e44e-4325-9b59-8656bd6a5d9b"
/>
<img width="1253" height="523" alt="image"
src="https://github.com/user-attachments/assets/70db27bd-0027-4510-9cad-0354e016d2f2"
/>


PR is rebased against [this
change](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10068)

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Andrii Chubatiuk <achubatiuk@victoriametrics.com>
Co-authored-by: Haley Wang <haley@victoriametrics.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-18 13:26:02 +02:00
Pablo (Tomas) Fernandez
b192727653 Docs: update guide "HA monitoring setup in Kubernetes via VictoriaMetrics Cluster" (#10580)
### Describe Your Changes

Updated the [HA monitoring setup in Kubernetes via VictoriaMetrics
Cluster](https://docs.victoriametrics.com/guides/k8s-ha-monitoring-via-vm-cluster/)
guide.

Changes:
- Added an introduction explaining how HA works in this guide
- Updated and verified commands used in the guide
- Replaced using Grafana UI usage in favor of using VMUI instead (it was
used to run queries, it's easier to just use the built-in VMUI instead
of installing Grafana just to use the Explore tab)
- Removed Grafana screenshots and replaced them with VMUI
- Tested on a modern version of GKE
- Added explanations for `replicationFactor`, de-duplication, and
`isPartial`
- Added next steps
- Added VMUI screenshots


### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-03-18 10:58:39 +02:00
Yury Moladau
9992098b76 app/vmui: fix autocomplete dropdown closing on Raw Query page (#10665)
### Describe Your Changes

Fixed an issue where the autocomplete dropdown did not close after
selecting an option on the Raw Query page.

**How to reproduce**

* Open the Raw Query page
* Trigger autocomplete
* Select any option from the dropdown
* Before the fix, the dropdown stayed open after selection


### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-18 10:46:44 +02:00
Aliaksandr Valialkin
3d4aba6cbb docs/victoriametrics/Articles.md: add https://clovisc.medium.com/monitoring-pipeline-with-blackbox-exporter-prometheus-victoriametrics-and-vmalert-0ab020c7202a 2026-03-18 02:42:58 +01:00
Aliaksandr Valialkin
6bf5e89975 docs/victoriametrics/Articles.md: add https://apprecode.com/blog/a-complete-guide-to-victoriametrics-a-prometheus-comparison-and-kubernetes-monitoring-implementation 2026-03-18 02:41:40 +01:00
JAYICE
d32dec117d app/vmselect: retry with new connection when previous rpc fail on a broken connection
This commit adds a rpc retry by dialing a new connection instead of
getting an old one from the connection pool when the previous rpc error
is `io.EOF`.

It helps prevent broken connections from remaining for too long and
causing failed requests and partial responses during `vmstorage` rolling
restart period

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10314
2026-03-17 10:57:13 +01:00
andriibeee
baadc2b8cd lib/storage, lib/mergeset: properly account inmemoryPart refCount
Previously inmemoryPart refCount was not properly decremented.

Previous behavior:
* createInmemoryPart called newPartWrapperFromInmemoryPart and returns a partWrapper with refCount=1
* multiple parts are merged in mustMergeInmemoryPartsFinal, which creates a new merged part
* the source partWrappers are never decRef'd
* Since refCount never reaches 0, putInmemoryPart and (*part).MustClose are never called 

 This commit properly decrements refCount at mustMergeInmemoryPartsFinal. 

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10086
2026-03-17 10:54:42 +01:00
Br1an
d487a42e47 lib/promscrape/discovery/yandexcloud: add folder_ids option
This commit adds a new `folder_ids` field in
`yandexcloud_sd_configs` that allows users to specify Yandex Cloud
folder IDs directly, bypassing the organization->cloud->folder hierarchy
traversal.

Previously, the Yandex Cloud service discovery required traversing the
entire resource hierarchy (organizations -> clouds -> folders ->
instances) to discover instances. This works when the Service Account
has permissions at all levels. However, some Service Accounts may only
have permissions at the folder level, causing discovery to fail when it
cannot access organization or cloud resources.

With this change, users can now configure folder IDs directly:

```yaml
yandexcloud_sd_configs:
  - service: compute
    folder_ids:
      - folder-id-1
      - folder-id-2
```

When `folder_ids` is specified, the discovery skips the hierarchy
traversal and directly queries instances from the specified folders.
This is a backward-compatible change - when `folder_ids` is not
specified, the existing behavior is preserved.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10587
2026-03-17 10:54:41 +01:00
Artem Fetishev
bd8b08f466 docs: run make docs-update-flags
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-16 17:00:13 +01:00
Artem Fetishev
3722597e63 docs: bump version to v1.138.0
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-16 16:54:26 +01:00
Artem Fetishev
f38a4eb4ec deplyoment/docker: bump version to v1.138.0
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-16 16:49:06 +01:00
Artem Fetishev
465b0c4e5b docs/CHANGELOG.md: update changelog with LTS release notes
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-16 15:23:32 +01:00
Arie Heinrich
2abc3cf548 all: spelling fixes in code comments (#10650)
fixing spelling issues in comments and text strings

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

(cherry picked from commit 14090c5a07)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-03-16 11:14:47 +01:00
Arie Heinrich
4f4cc27d73 docs: spelling fixes (#10649)
fix spelling in docs (potential removal of empty spaces as default)

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Arie Heinrich <arie.heinrich@outlook.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
(cherry picked from commit 66d47f23e4)
2026-03-16 11:13:54 +01:00
Roman Khavronenko
074951a4f0 docs: add AI tools section to the docs (#10642)
The new section is placed in root directory and is supposed to promote
information about the following tools:
* MCP servers for Logs, Traces and Metrics
* List of available agentic skills

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Signed-off-by: Roman Khavronenko <hagen1778@gmail.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
(cherry picked from commit eacdb80ed7)
2026-03-16 11:13:54 +01:00
Roman Khavronenko
f95eb12277 docs: minor wording updates in storage section (#10633)
The change suppose to make it more clear for understanding and stress
attention on important things.

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Signed-off-by: Roman Khavronenko <hagen1778@gmail.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
(cherry picked from commit 504cf31dab)
2026-03-16 11:13:54 +01:00
Roman Khavronenko
f308843156 dashboards: add dashboard for exploring stored metrics (#10617)
The new Grafana dashboard uses the following APIs:
- /api/v1/status/tsdb
- /api/v1/status/metric_names_stats

It shows the list of metric names, the request count and the last time
they were "used". Clicking on metric name allows exploring its
cardinality.

Based on https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9832

-----------

The PR contains a few unrelated changes:
* rename of folder for prometheus datasource to remove the duplicated
word
* fix for vmalert's access to the datasource, as before it wasn't able
to write/read properly

-------------

The dashboard screen cast:

https://github.com/user-attachments/assets/01dda5d9-14e5-4f5a-b795-a838abec4f5e

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Haley Wang <haley@victoriametrics.com>
(cherry picked from commit 34d190b32a)
2026-03-16 11:13:53 +01:00
Roshan Banisetti
29b11dc22e app/vmui: show seriesCountByMetricName when label is in focus in Cardinality Explorer (#10638)
### Describe Your Changes

When a label is set as focus label in the Cardinality Explorer, the
"Metric names with the highest number of series" table was hidden. This
change makes it visible alongside the focus label values table.

### How to reproduce

  1. Go to Explore → Cardinality Explorer
2. Enter a selector like `{namespace!=""}` and set Focus label to
`namespace`
  3. Click Execute Query

**Before:** Only "Values for 'namespace' label..." table is shown
**After:** "Metric names with the highest number of series" table is
also shown

<img width="1512" height="723"
alt="b2a8395a1577b31f58ae00f87e29eb87ca98eabfd0b3c0d9185be8f3a9789b5f"
src="https://github.com/user-attachments/assets/50c7f67a-1cfc-40d0-8e99-7750a933ee45"
/>

Fixes #10630

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Roshan1299 <banisettirosh@gmail.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 44fa216bb5)
2026-03-16 11:13:53 +01:00
JAYICE
852a6fa822 dashboard: refine top10 instances by sample panel in vmagent (#10655)
### Describe Your Changes

fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10654

<img width="1995" height="846" alt="image"
src="https://github.com/user-attachments/assets/673afd18-9d64-43d3-9ec2-38508847a851"
/>

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

(cherry picked from commit 4589442345)
2026-03-16 11:13:53 +01:00
Artem Fetishev
68e3caf340 docs: cut release v1.138.0
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-13 16:17:17 +00:00
Artem Fetishev
c622e01391 make docs-update-version
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-13 17:04:09 +01:00
Artem Fetishev
08afaa8858 make vmui-update
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-13 15:48:50 +00:00
Max Kotliar
4dc18fcd57 docs: chore vmauth jwt related documentation
fix tags
add available_from
add cross links
2026-03-13 15:40:55 +02:00
Andrii Chubatiuk
11c7b5f5e5 lib/backup/s3remote: overwrite source tags, while syncing parts from one s3 location to another
in case of conflicting tags while syncing latest backup with other backup types by default s3 keeps original ones. Commit changes default behaviour, which enables replacing original tags

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics-enterprise/issues/1004
2026-03-13 13:09:45 +01:00
f41gh7
0badd61054 docs/changelog: mention vmbackupmanager bugfix at changelog
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10639
2026-03-13 10:28:32 +01:00
Nikolay
966d1a5dea lib/jwt: support regex value claim matching
This commit adds regex value matching for JWT claims matching.

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10584 Fixes
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10628
2026-03-13 10:14:28 +01:00
dependabot[bot]
47d27b69e6 build(deps): bump immutable from 5.1.4 to 5.1.5 in /app/vmui/packages/vmui (#10586)
Bumps [immutable](https://github.com/immutable-js/immutable-js) from
5.1.4 to 5.1.5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/immutable-js/immutable-js/releases">immutable's
releases</a>.</em></p>
<blockquote>
<h2>v5.1.5</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix Improperly Controlled Modification of Object Prototype
Attributes ('Prototype Pollution') in immutable</li>
<li>Upgrade devtools and use immutable version by <a
href="https://github.com/jdeniau"><code>@​jdeniau</code></a> in <a
href="https://redirect.github.com/immutable-js/immutable-js/pull/2158">immutable-js/immutable-js#2158</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/immutable-js/immutable-js/compare/v5.1.4...v5.1.5">https://github.com/immutable-js/immutable-js/compare/v5.1.4...v5.1.5</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/immutable-js/immutable-js/blob/main/CHANGELOG.md">immutable's
changelog</a>.</em></p>
<blockquote>
<h2>5.1.5</h2>
<ul>
<li>Fix Improperly Controlled Modification of Object Prototype
Attributes ('Prototype Pollution') in immutable</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="b37b855686"><code>b37b855</code></a>
5.1.5</li>
<li><a
href="16b3313fdf"><code>16b3313</code></a>
Merge commit from fork</li>
<li><a
href="fd2ef4977e"><code>fd2ef49</code></a>
fix new proto key injection</li>
<li><a
href="6734b7b2af"><code>6734b7b</code></a>
fix Prototype Pollution in mergeDeep, toJS, etc.</li>
<li><a
href="6f772de1e4"><code>6f772de</code></a>
Merge pull request <a
href="https://redirect.github.com/immutable-js/immutable-js/issues/2175">#2175</a>
from immutable-js/dependabot/npm_and_yarn/rollup-4.59.0</li>
<li><a
href="5f3dc61fd0"><code>5f3dc61</code></a>
Bump rollup from 4.34.8 to 4.59.0</li>
<li><a
href="049a594410"><code>049a594</code></a>
Merge pull request <a
href="https://redirect.github.com/immutable-js/immutable-js/issues/2173">#2173</a>
from immutable-js/dependabot/npm_and_yarn/lodash-4.1...</li>
<li><a
href="2481a77331"><code>2481a77</code></a>
Merge pull request <a
href="https://redirect.github.com/immutable-js/immutable-js/issues/2172">#2172</a>
from mrazauskas/update-tstyche</li>
<li><a
href="eb047790b4"><code>eb04779</code></a>
Bump lodash from 4.17.21 to 4.17.23</li>
<li><a
href="b973bf3b62"><code>b973bf3</code></a>
format</li>
<li>Additional commits viewable in <a
href="https://github.com/immutable-js/immutable-js/compare/v5.1.4...v5.1.5">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=immutable&package-manager=npm_and_yarn&previous-version=5.1.4&new-version=5.1.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/VictoriaMetrics/VictoriaMetrics/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-12 18:09:19 +02:00
dependabot[bot]
9ff1734e0b build(deps): bump rollup from 4.52.5 to 4.59.0 in /app/vmui/packages/vmui (#10556)
Bumps [rollup](https://github.com/rollup/rollup) from 4.52.5 to 4.59.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/rollup/rollup/releases">rollup's
releases</a>.</em></p>
<blockquote>
<h2>v4.59.0</h2>
<h2>4.59.0</h2>
<p><em>2026-02-22</em></p>
<h3>Features</h3>
<ul>
<li>Throw when the generated bundle contains paths that would leave the
output directory (<a
href="https://redirect.github.com/rollup/rollup/issues/6276">#6276</a>)</li>
</ul>
<h3>Pull Requests</h3>
<ul>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6275">#6275</a>:
Validate bundle stays within output dir (<a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
</ul>
<h2>v4.58.0</h2>
<h2>4.58.0</h2>
<p><em>2026-02-20</em></p>
<h3>Features</h3>
<ul>
<li>Also support <code>__NO_SIDE_EFFECTS__</code> annotation before
variable declarations declaring function expressions (<a
href="https://redirect.github.com/rollup/rollup/issues/6272">#6272</a>)</li>
</ul>
<h3>Pull Requests</h3>
<ul>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6256">#6256</a>:
docs: document PreRenderedChunk properties including isDynamicEntry and
isImplicitEntry (<a
href="https://github.com/njg7194"><code>@​njg7194</code></a>, <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6259">#6259</a>:
docs: Correct typo and improve sentence structure in docs for
<code>output.experimentalMinChunkSize</code> (<a
href="https://github.com/millerick"><code>@​millerick</code></a>, <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6260">#6260</a>:
fix(deps): update rust crate swc_compiler_base to v47 (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6261">#6261</a>:
fix(deps): lock file maintenance minor/patch updates (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6262">#6262</a>:
Avoid unnecessary cloning of the code string (<a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6263">#6263</a>:
fix(deps): update minor/patch updates (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6265">#6265</a>:
chore(deps): lock file maintenance (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6267">#6267</a>:
fix(deps): update minor/patch updates (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6268">#6268</a>:
chore(deps): update dependency eslint-plugin-unicorn to v63 (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6269">#6269</a>:
chore(deps): update dependency lru-cache to v11 (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6270">#6270</a>:
chore(deps): lock file maintenance (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6272">#6272</a>:
forward NO_SIDE_EFFECTS annotations to function expressions in variable
declarations (<a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
</ul>
<h2>v4.57.1</h2>
<h2>4.57.1</h2>
<p><em>2026-01-30</em></p>
<h3>Bug Fixes</h3>
<ul>
<li>Fix heap corruption issue in Windows (<a
href="https://redirect.github.com/rollup/rollup/issues/6251">#6251</a>)</li>
<li>Ensure exports of a dynamic import are fully included when called
from a try...catch (<a
href="https://redirect.github.com/rollup/rollup/issues/6254">#6254</a>)</li>
</ul>
<h3>Pull Requests</h3>
<ul>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6251">#6251</a>:
fix: Isolate and cache <code>process.report.getReport()</code> calls in
a child process for robust environment detection (<a
href="https://github.com/alan-agius4"><code>@​alan-agius4</code></a>, <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/rollup/rollup/blob/master/CHANGELOG.md">rollup's
changelog</a>.</em></p>
<blockquote>
<h2>4.59.0</h2>
<p><em>2026-02-22</em></p>
<h3>Features</h3>
<ul>
<li>Throw when the generated bundle contains paths that would leave the
output directory (<a
href="https://redirect.github.com/rollup/rollup/issues/6276">#6276</a>)</li>
</ul>
<h3>Pull Requests</h3>
<ul>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6275">#6275</a>:
Validate bundle stays within output dir (<a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
</ul>
<h2>4.58.0</h2>
<p><em>2026-02-20</em></p>
<h3>Features</h3>
<ul>
<li>Also support <code>__NO_SIDE_EFFECTS__</code> annotation before
variable declarations declaring function expressions (<a
href="https://redirect.github.com/rollup/rollup/issues/6272">#6272</a>)</li>
</ul>
<h3>Pull Requests</h3>
<ul>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6256">#6256</a>:
docs: document PreRenderedChunk properties including isDynamicEntry and
isImplicitEntry (<a
href="https://github.com/njg7194"><code>@​njg7194</code></a>, <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6259">#6259</a>:
docs: Correct typo and improve sentence structure in docs for
<code>output.experimentalMinChunkSize</code> (<a
href="https://github.com/millerick"><code>@​millerick</code></a>, <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6260">#6260</a>:
fix(deps): update rust crate swc_compiler_base to v47 (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6261">#6261</a>:
fix(deps): lock file maintenance minor/patch updates (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6262">#6262</a>:
Avoid unnecessary cloning of the code string (<a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6263">#6263</a>:
fix(deps): update minor/patch updates (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6265">#6265</a>:
chore(deps): lock file maintenance (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6267">#6267</a>:
fix(deps): update minor/patch updates (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6268">#6268</a>:
chore(deps): update dependency eslint-plugin-unicorn to v63 (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6269">#6269</a>:
chore(deps): update dependency lru-cache to v11 (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6270">#6270</a>:
chore(deps): lock file maintenance (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6272">#6272</a>:
forward NO_SIDE_EFFECTS annotations to function expressions in variable
declarations (<a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
</ul>
<h2>4.57.1</h2>
<p><em>2026-01-30</em></p>
<h3>Bug Fixes</h3>
<ul>
<li>Fix heap corruption issue in Windows (<a
href="https://redirect.github.com/rollup/rollup/issues/6251">#6251</a>)</li>
<li>Ensure exports of a dynamic import are fully included when called
from a try...catch (<a
href="https://redirect.github.com/rollup/rollup/issues/6254">#6254</a>)</li>
</ul>
<h3>Pull Requests</h3>
<ul>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6251">#6251</a>:
fix: Isolate and cache <code>process.report.getReport()</code> calls in
a child process for robust environment detection (<a
href="https://github.com/alan-agius4"><code>@​alan-agius4</code></a>, <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6252">#6252</a>:
chore(deps): update dependency lru-cache to v11 (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot])</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6253">#6253</a>:
chore(deps): lock file maintenance minor/patch updates (<a
href="https://github.com/renovate"><code>@​renovate</code></a>[bot], <a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
<li><a
href="https://redirect.github.com/rollup/rollup/pull/6254">#6254</a>:
Fully include dynamic imports in a try-catch (<a
href="https://github.com/lukastaegert"><code>@​lukastaegert</code></a>)</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="ae846957f1"><code>ae84695</code></a>
4.59.0</li>
<li><a
href="b39616e917"><code>b39616e</code></a>
Update audit-resolve</li>
<li><a
href="c60770d7aa"><code>c60770d</code></a>
Validate bundle stays within output dir (<a
href="https://redirect.github.com/rollup/rollup/issues/6275">#6275</a>)</li>
<li><a
href="33f39c1f20"><code>33f39c1</code></a>
4.58.0</li>
<li><a
href="b61c40803b"><code>b61c408</code></a>
forward NO_SIDE_EFFECTS annotations to function expressions in variable
decla...</li>
<li><a
href="7f00689ec9"><code>7f00689</code></a>
Extend agent instructions</li>
<li><a
href="e7b2b85af0"><code>e7b2b85</code></a>
chore(deps): lock file maintenance (<a
href="https://redirect.github.com/rollup/rollup/issues/6270">#6270</a>)</li>
<li><a
href="2aa5da9baf"><code>2aa5da9</code></a>
fix(deps): update minor/patch updates (<a
href="https://redirect.github.com/rollup/rollup/issues/6267">#6267</a>)</li>
<li><a
href="4319837c54"><code>4319837</code></a>
chore(deps): update dependency lru-cache to v11 (<a
href="https://redirect.github.com/rollup/rollup/issues/6269">#6269</a>)</li>
<li><a
href="c3b6b4bdc4"><code>c3b6b4b</code></a>
chore(deps): update dependency eslint-plugin-unicorn to v63 (<a
href="https://redirect.github.com/rollup/rollup/issues/6268">#6268</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/rollup/rollup/compare/v4.52.5...v4.59.0">compare
view</a></li>
</ul>
</details>
<details>
<summary>Install script changes</summary>
<p>This version modifies <code>prepare</code> script that runs during
installation. Review the package contents before updating.</p>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rollup&package-manager=npm_and_yarn&previous-version=4.52.5&new-version=4.59.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/VictoriaMetrics/VictoriaMetrics/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-12 18:09:19 +02:00
dependabot[bot]
0c86073a74 build(deps): bump minimatch in /app/vmui/packages/vmui (#10555)
Bumps and [minimatch](https://github.com/isaacs/minimatch). These
dependencies needed to be updated together.
Updates `minimatch` from 3.1.2 to 3.1.5
<details>
<summary>Commits</summary>
<ul>
<li><a
href="7bba97888a"><code>7bba978</code></a>
3.1.5</li>
<li><a
href="bd259425b2"><code>bd25942</code></a>
docs: add warning about ReDoS</li>
<li><a
href="1a9c27c757"><code>1a9c27c</code></a>
fix partial matching of globstar patterns</li>
<li><a
href="1a2e084af5"><code>1a2e084</code></a>
3.1.4</li>
<li><a
href="ae24656237"><code>ae24656</code></a>
update lockfile</li>
<li><a
href="b100374922"><code>b100374</code></a>
limit recursion for **, improve perf considerably</li>
<li><a
href="26ffeaa091"><code>26ffeaa</code></a>
lockfile update</li>
<li><a
href="9eca892a4e"><code>9eca892</code></a>
lock node version to 14</li>
<li><a
href="00c323b188"><code>00c323b</code></a>
3.1.3</li>
<li><a
href="30486b2048"><code>30486b2</code></a>
update CI matrix and actions</li>
<li>Additional commits viewable in <a
href="https://github.com/isaacs/minimatch/compare/v3.1.2...v3.1.5">compare
view</a></li>
</ul>
</details>
<br />

Updates `minimatch` from 9.0.5 to 9.0.9
<details>
<summary>Commits</summary>
<ul>
<li><a
href="7bba97888a"><code>7bba978</code></a>
3.1.5</li>
<li><a
href="bd259425b2"><code>bd25942</code></a>
docs: add warning about ReDoS</li>
<li><a
href="1a9c27c757"><code>1a9c27c</code></a>
fix partial matching of globstar patterns</li>
<li><a
href="1a2e084af5"><code>1a2e084</code></a>
3.1.4</li>
<li><a
href="ae24656237"><code>ae24656</code></a>
update lockfile</li>
<li><a
href="b100374922"><code>b100374</code></a>
limit recursion for **, improve perf considerably</li>
<li><a
href="26ffeaa091"><code>26ffeaa</code></a>
lockfile update</li>
<li><a
href="9eca892a4e"><code>9eca892</code></a>
lock node version to 14</li>
<li><a
href="00c323b188"><code>00c323b</code></a>
3.1.3</li>
<li><a
href="30486b2048"><code>30486b2</code></a>
update CI matrix and actions</li>
<li>Additional commits viewable in <a
href="https://github.com/isaacs/minimatch/compare/v3.1.2...v3.1.5">compare
view</a></li>
</ul>
</details>
<br />


Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/VictoriaMetrics/VictoriaMetrics/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-12 18:09:19 +02:00
dependabot[bot]
f432fa476f build(deps): bump crazy-max/ghaction-import-gpg from 6 to 7 (#10572)
Bumps
[crazy-max/ghaction-import-gpg](https://github.com/crazy-max/ghaction-import-gpg)
from 6 to 7.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/crazy-max/ghaction-import-gpg/releases">crazy-max/ghaction-import-gpg's
releases</a>.</em></p>
<blockquote>
<h2>v7.0.0</h2>
<ul>
<li>Node 24 as default runtime (requires <a
href="https://github.com/actions/runner/releases/tag/v2.327.1">Actions
Runner v2.327.1</a> or later) by <a
href="https://github.com/crazy-max"><code>@​crazy-max</code></a> in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/241">crazy-max/ghaction-import-gpg#241</a></li>
<li>Switch to ESM and update config/test wiring by <a
href="https://github.com/crazy-max"><code>@​crazy-max</code></a> in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/239">crazy-max/ghaction-import-gpg#239</a></li>
<li>Bump <code>@​actions/core</code> from 1.11.1 to 3.0.0 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/232">crazy-max/ghaction-import-gpg#232</a></li>
<li>Bump <code>@​actions/exec</code> from 1.1.1 to 3.0.0 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/242">crazy-max/ghaction-import-gpg#242</a></li>
<li>Bump brace-expansion from 1.1.11 to 1.1.12 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/221">crazy-max/ghaction-import-gpg#221</a></li>
<li>Bump minimatch from 3.1.2 to 3.1.5 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/240">crazy-max/ghaction-import-gpg#240</a></li>
<li>Bump openpgp from 6.1.0 to 6.3.0 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/233">crazy-max/ghaction-import-gpg#233</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/crazy-max/ghaction-import-gpg/compare/v6.3.0...v7.0.0">https://github.com/crazy-max/ghaction-import-gpg/compare/v6.3.0...v7.0.0</a></p>
<h2>v6.3.0</h2>
<ul>
<li>Bump openpgp from 5.11.2 to 6.1.0 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/215">crazy-max/ghaction-import-gpg#215</a></li>
<li>Bump cross-spawn from 7.0.3 to 7.0.6 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/212">crazy-max/ghaction-import-gpg#212</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/crazy-max/ghaction-import-gpg/compare/v6.2.0...v6.3.0">https://github.com/crazy-max/ghaction-import-gpg/compare/v6.2.0...v6.3.0</a></p>
<h2>v6.2.0</h2>
<ul>
<li>Bump <code>@​actions/core</code> from 1.10.1 to 1.11.1 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/209">crazy-max/ghaction-import-gpg#209</a></li>
<li>Bump braces from 3.0.2 to 3.0.3 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/203">crazy-max/ghaction-import-gpg#203</a></li>
<li>Bump ip from 2.0.0 to 2.0.1 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/196">crazy-max/ghaction-import-gpg#196</a></li>
<li>Bump micromatch from 4.0.4 to 4.0.8 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/207">crazy-max/ghaction-import-gpg#207</a></li>
<li>Bump openpgp from 5.11.0 to 5.11.2 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/205">crazy-max/ghaction-import-gpg#205</a></li>
<li>Bump tar from 6.1.14 to 6.2.1 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/198">crazy-max/ghaction-import-gpg#198</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/crazy-max/ghaction-import-gpg/compare/v6.1.0...v6.2.0">https://github.com/crazy-max/ghaction-import-gpg/compare/v6.1.0...v6.2.0</a></p>
<h2>v6.1.0</h2>
<ul>
<li>Bump <code>@​actions/core</code> from 1.10.0 to 1.10.1 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/186">crazy-max/ghaction-import-gpg#186</a></li>
<li>Bump <code>@​babel/traverse</code> from 7.17.3 to 7.23.2 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/191">crazy-max/ghaction-import-gpg#191</a></li>
<li>Bump debug from 4.1.1 to 4.3.4 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/190">crazy-max/ghaction-import-gpg#190</a></li>
<li>Bump openpgp from 5.10.1 to 5.11.0 in <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/pull/192">crazy-max/ghaction-import-gpg#192</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/crazy-max/ghaction-import-gpg/compare/v6.0.0...v6.1.0">https://github.com/crazy-max/ghaction-import-gpg/compare/v6.0.0...v6.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="2dc316deee"><code>2dc316d</code></a>
Merge pull request <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/issues/242">#242</a>
from crazy-max/dependabot/npm_and_yarn/actions/exec-3...</li>
<li><a
href="5812792d2b"><code>5812792</code></a>
chore: update generated content</li>
<li><a
href="ceb906ede8"><code>ceb906e</code></a>
build(deps): bump <code>@​actions/exec</code> from 1.1.1 to 3.0.0</li>
<li><a
href="a9dffd9307"><code>a9dffd9</code></a>
Merge pull request <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/issues/241">#241</a>
from crazy-max/node24</li>
<li><a
href="36d49fcb3c"><code>36d49fc</code></a>
node 24 as default runtime</li>
<li><a
href="50c4e4f047"><code>50c4e4f</code></a>
Merge pull request <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/issues/233">#233</a>
from crazy-max/dependabot/npm_and_yarn/openpgp-6.3.0</li>
<li><a
href="c78fe49862"><code>c78fe49</code></a>
chore: update generated content</li>
<li><a
href="8dbbb1e8e5"><code>8dbbb1e</code></a>
Merge pull request <a
href="https://redirect.github.com/crazy-max/ghaction-import-gpg/issues/221">#221</a>
from crazy-max/dependabot/npm_and_yarn/brace-expansio...</li>
<li><a
href="fc715b05fd"><code>fc715b0</code></a>
build(deps): bump openpgp from 6.1.0 to 6.3.0</li>
<li><a
href="99469162d0"><code>9946916</code></a>
build(deps): bump brace-expansion from 1.1.11 to 1.1.12</li>
<li>Additional commits viewable in <a
href="https://github.com/crazy-max/ghaction-import-gpg/compare/v6...v7">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=crazy-max/ghaction-import-gpg&package-manager=github_actions&previous-version=6&new-version=7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-12 17:22:18 +02:00
Max Kotliar
11fbaa0fd2 lib/jwt: mark deprecated properties needed only for vmgateway 2026-03-12 16:00:42 +02:00
Max Kotliar
f2a1f74c1b docs: add guides for vmauth jwt authentication (#10129)
### Describe Your Changes

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9439

Commit adds two guides:
- One sets up keyclock, vmcluster, vmauth, grafana, and demo how to log
in to grafana using OIDC and use the jwt token to limit metrics fetched
by grafana datasource from vmcluster.
- Second demo on how to configure vmagent so it gets jwt token and uses
it during remote write requests.

To see guides locally run, checkout the branch, run `make docs-debug`,
open browser `http://localhost:1313`.

vmauth jwt related PRs should be merged into
[vmauth-jwt](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/vmauth-jwt)
brench, and when everything is ready, merged into master.

Debug notes for the guides:
https://github.com/VictoriaMetrics/debug-notes/tree/main/guides/vmauth-jwt

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Pablo Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-12 15:49:03 +02:00
f41gh7
30afa6b11c vendor: update metrics package
Related to https://github.com/VictoriaMetrics/metrics/issues/85
2026-03-12 09:42:28 +01:00
Max Kotliar
78c0d64b6b lib/encoding: fix integer overflow in UnmarshalBytes (#10629)
Poison varint: MaxUint64 encoded as varint (0xFFFFFFFFFFFFFFFF). 
The bounds check uint64(nSize)+n overflows to 9, bypassing the guard. 
Then int(MaxUint64)=-1 makes src[10:9] which panics.
2026-03-11 12:12:19 +01:00
Max Kotliar
b07c67a9f6 lib/jwt: Verifier support jwks kid (#10611)
### Describe Your Changes

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10606

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Nikolay <nik@victoriametrics.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-11 00:20:23 +02:00
Nikolay
9a632170b2 app/vmauth: remove data-race at default_url proxy
Previously there was a data-race, when targetURL was concurrently
 updated in case of default url route.

 This commit fixes data-race and adds concurrency to the routing tests.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10626
2026-03-10 21:07:30 +01:00
Fred Navruzov
c69fb4d337 docs/vmanomaly - release v1.29.0 (#10620)
### Describe Your Changes

Documentation updates following `v1.29.0` release of `vmanomaly`

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-03-10 19:42:21 +02:00
Roman Khavronenko
8b963f207b docs/vmalert: add more clarification on config reload procedure 2026-03-10 12:57:04 +01:00
Roman Khavronenko
bb48ab5b67 app/vmauth: add request duration to access log
Request duration could be useful for tracking access logs too. For
example, track referrers for all slow requests.

While there, added tests to track log structure changes.

Related to https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5936
2026-03-10 12:57:03 +01:00
andriibeee
64d9d9af2a lib/promauth: support headers in oauth2 token_url requests
OAuth2 token source lib doesn't allow to define request headers explicitly.
This commit  adds a custom transport to mitigate it. New transport modifies http.Request by making a shallow copy of it and setting additional headers.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8939
2026-03-10 10:11:33 +01:00
JAYICE
cece53334e lib/filestream: properly account vm_filestream_write_duration_seconds_total metric
Previously vm_filestream_write_duration_seconds_total will be increased in two places:
*  statWriter.Write()
* Writer.MustFlush(). It will eventually call statWriter.Write(), hence double counting vm_filestream_write_duration_seconds_total

For reference, vm_filestream_read_duration_seconds_total will be increased only in statReader.Read to track read syscall.

 This commit removes latency tracking from MustFlush method.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10564
2026-03-10 10:11:32 +01:00
f41gh7
bbe507d7ee app/vmauth: add match_claims JWT routing
This commit adds claims matching for jwt token auth.

It allows to perform match for any jwt token json field with nested traversal.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10584
2026-03-10 10:51:06 +02:00
Max Kotliar
7320c26cb5 docs/changelog: port v1.136.1 changelog to master 2026-03-09 20:33:24 +02:00
Yury Moladau
a2439464ec app/vmui: rename debug tools buttons for clarity
Replace ambiguous button labels such as "Submit" and "Apply" with
clearer wording to indicate that these actions only preview results and
do not modify the deployment configuration.

Related issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10453
2026-03-09 14:31:15 +01:00
Max Kotliar
601f10c0bb app/vmauth: Implement OpenID Connect Discovery support
Add support for [OpenID Connect
Discovery](https://openid.net/specs/openid-connect-discovery-1_0.html#IANA)
as an alternative way to obtain verification keys and rotate them
automatically.

`jwt` configuration should allow **exactly one** of the following
verification modes: `public_keys`, `oidc`, `skip_verify`. These options
must be mutually exclusive.

Example: OIDC configuration

```yaml
users:
- jwt:
    oidc:
      issuer: http://identity-provider.com
```

When `oidc` is enabled:

1. On startup, `vmauth` fetches:

   ```
   {issuer}/.well-known/openid-configuration
   ```
2. Extracts `jwks_uri`.
3. Fetches [JWK
keys](https://openid.net/specs/draft-jones-json-web-key-03.html#ExampleJWK)
from `jwks_uri`.
4. Uses discovered keys to verify JWT tokens.

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10585

Failure handling:
* If discovery fails at startup:
  * No keys are available.
  * The user is skipped.
* Discovery runs periodically in background (e.g., every 1 minute).
* If keys become available later, authentication should start working
automatically.
* If keys were previously fetched and the identity provider becomes
unavailable:
  * Cached keys must be preserved.
  * Authentication continues using cached keys.

#### JWT Requirements in OIDC Mode

When `oidc` is enabled:

* `iss` claim becomes
[mandatory](https://openid.net/specs/openid-connect-core-1_0.html#IDToken).
* `iss` [must
match](https://openid.net/specs/openid-connect-core-1_0.html#RotateEncKeys):
  * `oidc.issuer` from config.
  * `issuer` returned in the OpenID configuration document.
* JWT header must contain `kid`.
* `kid` must be used to select the appropriate key from JWKS.
* Tokens without `kid` must be rejected.
* Tokens without `iss` must be rejected.

Rationale
* Enables automatic key rotation.
* Eliminates manual public key configuration.
* Maintains compatibility with standard OIDC providers.

---------

Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-09 14:31:14 +01:00
Ihar Statkevich
2a1b1d4282 vmui: use increase_pure instead of rate for histogram heatmaps
- VMUI Explore Metrics uses `rate` for histogram bucket queries, which
skips the first observation
in each bucket because `rate` requires two data points to calculate a
per-second rate.
- Replace `rate` with `increase_pure`, which assumes counters start from
0 and correctly shows
the first observation when a new bucket appears.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10365
2026-03-09 11:46:37 +01:00
Artem Fetishev
92763f750a lib/storage: remove 1 cpu special case from storage tests
The test should not fail now on systems with 1 cpu because partition
indexDBs are not rotated. See #8948.

Also removed two TODOs from the test to keep it simple.
2026-03-09 11:46:37 +01:00
Nikolay
1860f659d6 app/vmauth: reduce memory allocations for JWT token parsing
This commit adds in-memory pool for jwt tokens. It reduces memory
 allocations and GC pressure.

 Benchmark results:
```
                                         ? before_optimisation.txt ?       after_optimisation.txt        ?
                                         ?         sec/op          ?   sec/op     vs base                ?
JWTRequestHandler/full_template-10                     65.82µ ± 2%   26.87µ ± 2%  -59.18% (p=0.000 n=10)
JWTRequestHandler/token_without_claim-10               734.4n ± 1%   543.9n ± 0%  -25.94% (p=0.000 n=10)
JWTRequestHandler/expired_token-10                    1560.0n ± 0%   681.2n ± 1%  -56.33% (p=0.000 n=10)
geomean                                                4.225µ        2.151µ       -49.08%

                                         ? before_optimisation.txt ?        after_optimisation.txt        ?
                                         ?          B/op           ?     B/op      vs base                ?
JWTRequestHandler/full_template-10                    33.60Ki ± 0%   16.52Ki ± 0%  -50.85% (p=0.000 n=10)
JWTRequestHandler/token_without_claim-10              1.605Ki ± 0%   1.105Ki ± 0%  -31.14% (p=0.000 n=10)
JWTRequestHandler/expired_token-10                    3.267Ki ± 0%   1.045Ki ± 0%  -68.01% (p=0.000 n=10)
geomean                                               5.606Ki        2.672Ki       -52.34%

                                         ? before_optimisation.txt ?       after_optimisation.txt       ?
                                         ?        allocs/op        ? allocs/op   vs base                ?
JWTRequestHandler/full_template-10                      224.0 ± 0%   172.0 ± 0%  -23.21% (p=0.000 n=10)
JWTRequestHandler/token_without_claim-10                17.00 ± 0%   13.00 ± 0%  -23.53% (p=0.000 n=10)
JWTRequestHandler/expired_token-10                      30.00 ± 0%   11.00 ± 0%  -63.33% (p=0.000 n=10)
geomean                                                 48.52        29.08       -40.06%
```

follow-up for f8a101e45e

related issue
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10492
2026-03-09 11:43:20 +01:00
f41gh7
012a269f78 deployment/docker: update Go builder from Go1.26.0 to Go1.26.1
See https://github.com/golang/go/issues?q=milestone%3AGo1.26.1%20label%3ACherryPickApproved
2026-03-09 11:39:59 +01:00
f41gh7
c2a1a6f55e lib/httpserver: fixes tests after 686c9a21ff 2026-03-05 16:13:40 +01:00
andriibeee
90ad81ab62 lib/httpserver: handle preflight HTTP requests properly
Previously OPTIONS HTTP requests for CORS preflight checks would trigger
the original request handler. This pull request fixes that behavior to
align with https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5563
2026-03-05 16:00:26 +01:00
Hui Wang
b2b52f01cd docs: polish opentelemetry integration doc 2026-03-05 16:00:25 +01:00
Artem Fetishev
b4852d97d0 app/vmselect: Disable Graphite Tag Series HTTP endpoints (#10579)
Disabling is done by making the the handlers for `/tags/tagSeries` and
`/tags/tagMultiSeries` to return `501 (Not Implemented)` status code
along with the error message saying that the API has been disabled and
will be removed in future.

See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10544.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-05 14:36:39 +01:00
Artem Fetishev
aa5283495c docs: Update docs to reflect partition index changes (#10582)
Now that indexDB is per-partition, the indexDB-related docs need to be
updated. Specifically the how the indexDB is cleaned up when it becomes
outside the `-retentionPeriod`.

Follow-up for #8134.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
Signed-off-by: Aliaksandr Valialkin <valyala@gmail.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-04 18:49:23 +01:00
Nikolay
54a6e35936 lib/jwt: remove memory allocation from token parsing
This commit adds `Reset()` method to the Token struct.
It allows to re-use `Token` object, which reduces memory allocations
needed for parsing `Token` and CPU pressure on GarbageCollector.

 Additionally, it adds fastjson parser, which allows efficiently perform
 claims matching based on dynamic value input.

 Benchmark stats:

```
                                         │ profiles/jwt_parse_before.txt │    profiles/jwt_parse_after.txt     │
                                         │            sec/op             │   sec/op     vs base                │
TokenParse/simple-10                                       3375.0n ± 41%   335.6n ± 4%  -90.05% (p=0.000 n=10)
TokenParse/gateway_labels_and_filters-10                   4259.0n ±  6%   423.3n ± 5%  -90.06% (p=0.000 n=10)
TokenParse/scope_as_slice_string-10                        3781.5n ±  2%   374.7n ± 5%  -90.09% (p=0.000 n=10)
TokenParse/access_claim_string-10                          2974.5n ±  1%   290.9n ± 4%  -90.22% (p=0.000 n=10)
TokenParse/vmauth_related_fields-10                        4340.5n ±  2%   389.2n ± 2%  -91.03% (p=0.000 n=10)
geomean                                                     3.709µ         359.8n       -90.30%

                                         │ profiles/jwt_parse_before.txt │       profiles/jwt_parse_after.txt        │
                                         │             B/op              │     B/op      vs base                     │
TokenParse/simple-10                                        5.195Ki ± 0%   0.000Ki ± 0%  -100.00% (p=0.000 n=10)
TokenParse/gateway_labels_and_filters-10                    6312.00 ± 0%     16.00 ± 0%   -99.75% (p=0.000 n=10)
TokenParse/scope_as_slice_string-10                         6312.00 ± 0%     16.00 ± 0%   -99.75% (p=0.000 n=10)
TokenParse/access_claim_string-10                           4.789Ki ± 0%   0.000Ki ± 0%  -100.00% (p=0.000 n=10)
TokenParse/vmauth_related_fields-10                         6.327Ki ± 0%   0.000Ki ± 0%  -100.00% (p=0.000 n=10)
geomean                                                     5.693Ki                      ?                       ¹ ²
¬π summaries must be >0 to compute geomean
² ratios must be >0 to compute geomean

                                         │ profiles/jwt_parse_before.txt │      profiles/jwt_parse_after.txt       │
                                         │           allocs/op           │ allocs/op   vs base                     │
TokenParse/simple-10                                          39.00 ± 0%    0.00 ± 0%  -100.00% (p=0.000 n=10)
TokenParse/gateway_labels_and_filters-10                     53.000 ± 0%   1.000 ± 0%   -98.11% (p=0.000 n=10)
TokenParse/scope_as_slice_string-10                          54.000 ± 0%   1.000 ± 0%   -98.15% (p=0.000 n=10)
TokenParse/access_claim_string-10                             41.00 ± 0%    0.00 ± 0%  -100.00% (p=0.000 n=10)
TokenParse/vmauth_related_fields-10                           57.00 ± 0%    0.00 ± 0%  -100.00% (p=0.000 n=10)
geomean                                                       48.23                    ?                       ¹ ²
```

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10492
2026-03-04 17:32:15 +01:00
Max Kotliar
bf67bc2275 .github: remove copilot instruction since we use cubic AI for code review
Copilot results were far from good, so we switched to Cubic AI.
2026-03-04 14:37:20 +02:00
Artem Fetishev
1c7a2761f5 lib/storage: add an apptest for Graphite tag registration (#10558)
Add an apptest for `/graphite/tags/tagSeries` and `/graphite/tags/tagMultiSeries` URLs path to test the time series registration in the index. This PR is a preparation for disabling these paths (#10544). For now just testing that they actually work as described in https://graphite.readthedocs.io/en/stable/tags.html#adding-series-to-the-tagdb.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-03-04 07:46:46 +01:00
Hui Wang
66ec4f5207 app/vmalert: support negative values for the group eval_offset option
There are following main use cases for `eval_offset`:
1. To ensure rules are evaluated at an exact offset, so the results have
the exact timestamp the user wants.
2. The source data for a certain rule is delivered at a specific time
point, so rules need to be executed after that time point to get correct
results. For example, [chaining
groups](https://docs.victoriametrics.com/victoriametrics/vmalert/#chaining-groups).
3. A group contains some heavy rules that can take a few minutes to
finish. To guarantee a single evaluation can complete in time and not
delay the next run, the user may want to schedule the group to be
executed within [intervalStart, intervalEnd-avgTotalEvaluationDuration].

Negative value can be convenient for case3, as users only need to set
group `eval_offset: -avgTotalEvaluationDuration(a bigger value than the
real duration to leave some buffer would be better)`.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10424
2026-03-03 12:07:25 +01:00
Hui Wang
dfd73d0f4f dashboard: fix expressions in vmauth memory usage panel (#10574)
vmauth doesn’t use fastcache or expose `vm_cache_size_bytes`, so having
`vm_cache_size_bytes` makes the expression evaluate to null.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10574/
2026-03-03 12:07:25 +01:00
hklhai
e3ecbf622f app/{vmagent,vminsert}: properly attach host label for datadog-sketches
Due to bug introduced at initial datadog-sketches API implementation, `host` label was incorrectly obtained from `Tags` structure. While actually it's present directly at root of protobuf message.

 This commit properly attaches `host` label in such case.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
2026-03-03 12:07:25 +01:00
Max Kotliar
cc35604fb9 docs/changelog: sync lts changelogs 2026-03-02 20:20:50 +02:00
Max Kotliar
14a5ad2327 docs: bump version to v1.137.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-02 16:12:01 +02:00
Max Kotliar
c60b9da806 deplyoment/docker: bump version to v1.137.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-03-02 16:05:56 +02:00
Hui Wang
8501b48552 vmalert: prevent a subsequent small remote write requests if the previous one takes too long
If the data flush to the remote write destination takes longer than the
periodic flush interval (default 2s), the ticker channel will contain a
stale tick, causing the ticker case to be selected too early with an
empty or small amount of data inside `wr`, resulting in a wasted remote
write request with one or two time series(if `ts, ok := <-c.input` was
also randomly selected beforehand).

We could also consider resetting the ticker after drain the stale tick
to ensure `wr` always accumulates data for the full flush interval, but
that seems more trivial to me.
2026-03-02 11:28:39 +01:00
Zakhar Bessarab
de98ae8a6e lib/backup/actions: do not set s3ACL by default
Disable ACL default configuration as ACL is not always supported by
S3-compatible storages (for example, linode does not support it in some
regions). So it requires users to disable it manually to make it work.
Moreover, it is not a recommended way of objects access configuration
anymore as ACLs for buckts is disabled by default. Currently, it is
recommended to use policies for access controls. See -
https://docs.aws.amazon.com/AmazonS3/latest/userguide/about-object-ownership.html

Fixes: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10539
2026-03-02 11:28:39 +01:00
Roman Khavronenko
d104e21343 docs: add availability mark for access_log feature in vmauth (#10567)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-03-02 11:28:38 +01:00
Roman Khavronenko
0661207f6f app/vmauth: support printing access logs per user
Add new option per-user to print access logs. Such logs
contain limited amount of information to prevent exposing
sensitive data.

Access logs can be enabled/disabled via hot-reload and could
help locating clients that incorrectly use or abuse vmauth.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5936
2026-03-02 11:10:45 +01:00
Artem Fetishev
099346b511 apptest: Fix flaky tests
Cluster apptests failed from time to time with the following error:

```
timed out while waiting for inserted rows to be sent to vmstorage
cluster
```

due to incorrect calculation of inserted row count before and after
insertion. This PR fixes it by putting the "before" count calculation
before the send() operation.
2026-03-02 10:41:52 +01:00
Max Kotliar
bf50dcecfb docs/changelog: fix link 2026-02-27 20:02:01 +02:00
Max Kotliar
37c08a3b70 docs: cut release v1.137.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-27 19:57:07 +02:00
Max Kotliar
fa58ded4c7 docs: update version to v1.137.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-27 19:19:08 +02:00
Max Kotliar
3440c51f33 app/vmselect: run make vmui-update
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-27 18:54:16 +02:00
Max Kotliar
63fb0c98fb go.mod: fix govulncheck
govulncheck ./...
=== Symbol Results ===

Vulnerability #1: GO-2026-4559
    Sending certain HTTP/2 frames can cause a server to panic in
    golang.org/x/net
  More info: https://pkg.go.dev/vuln/GO-2026-4559
  Module: golang.org/x/net
    Found in: golang.org/x/net@v0.50.0
    Fixed in: golang.org/x/net@v0.51.0
2026-02-27 14:46:17 +02:00
Hui Wang
bb0c60fb8f vmselect: revert rollup result cache for instant queries that contain rate function (#10553)
See reason in
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10098#issuecomment-3895011084
2026-02-27 14:38:19 +02:00
Nikolay
f8d9ab6588 lib/promauth: check client certificate rotation during requests
Previously, the client certificate was only refreshed during the TLS
handshake, which occurs when establishing a new connection. This meant
the remote HTTP server had to close the existing connection for the
client to pick up an updated (e.g. expired) certificate. As a
workaround, connection keep-alive could be disabled, but that
significantly increased request latency.

This commit adds a certificate check during HTTP RoundTrip. If the
client certificate has changed, the RoundTripper recreates the transport
and its connection pool. This behavior is already implemented for CA
certificate changes.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10393
2026-02-27 13:21:56 +01:00
Max Kotliar
e5bba92f78 docs/changelog: add update note for multitenant api endpoint 2026-02-27 13:46:00 +02:00
Artem Fetishev
32c1854d0e lib/storage: rename cache methods to match unified format (#10534)
Per @valyala's request, rename storage cache methods to adhere the
following format:

```
get[Value]By[Key]FromCache
put[Value]By[Key]ToCache
```

Also move `s.metricIDCache` methods from `indexDB` to `Storage` because
this cache exists at the `Storage` level.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-27 10:43:01 +01:00
John Allberg
85a7fbf099 publish SPDX SBOM attestations for container images (#10474)
Enable BuildKit-native SPDX SBOM and provenance attestations by setting
`--sbom=true --provenance=true` in `docker buildx build` within
`publish-via-docker`.

- Set `--provenance=true --sbom=true` in `publish-via-docker` for both
Alpine and scratch variants
- Add SBOM section to SECURITY.md with inspection and Trivy scan
instructions
- Update Release-Guide.md
- Add changelog entry

Verified end-to-end: pushed test image to GHCR, confirmed SBOM
attestation via `docker buildx imagetools inspect`, and Trivy scan via
`trivy image --sbom-sources oci` succeeded (with 0 vulnerabilities :-)).

Fixes #10473 

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: John Allberg <john@ayoy.se>
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-27 10:50:38 +02:00
Hui Wang
865ce73601 docs: add a note for vmctl remote read stream mode (#10548)
Samples in Mimir (or Prometheus) are stored in chunks, which are
compressed efficiently using algorithms rather than being stored as
independent samples, see details in [this
article](https://prometheus.io/blog/2019/10/10/remote-read-meets-streaming/)
and [this talk](https://www.youtube.com/watch?v=b_pEevMAC3I).
When using a small `--remote-read-step-interval`, particularly `minute`,
a single chunk may contain samples that exceed the requested time
window, and all the returned chunks contain overlapping samples.
Consequently, vmctl will read and migrate many duplicate samples into
VictoriaMetrics.

In tests, `--remote-read-step-interval=minute
--remote-read-use-stream=true` with raw sample `scrape_interval: 10s`
and remote read time range of 24h can write ~20x duplication.
But I assume the minute interval is rarely used with a large time range
and duplicates are fine in VictoriaMetrics due to deduplication, so we
don't need to disallow using it.
```
## --remote-read-step-interval=minute --remote-read-use-stream=false
## total samples: **15696611(the real number)**
2026/02/26 22:10:25 VictoriaMetrics importer stats:
  idle duration: 50.080851955s;
  time spent while importing: 32.108903417s;
  total samples: 15696611;
  samples/s: 488855.41;
  total bytes: 735.8 MB;
  bytes/s: 22.9 MB;
  import requests: 79;
  import requests retries: 0;
2026/02/26 22:10:25 Total time: 32.112912208s

## --remote-read-step-interval=day --remote-read-use-stream=true
## total samples: 15878869
2026/02/26 22:20:37 VictoriaMetrics importer stats:
  idle duration: 960.698874ms;
  time spent while importing: 6.338309625s;
  total samples: 15878869;
  samples/s: 2505221.41;
  total bytes: 278.6 MB;
  bytes/s: 44.0 MB;
  import requests: 80;
  import requests retries: 0;
2026/02/26 22:20:37 Total time: 6.340023167s

## --remote-read-step-interval=hour --remote-read-use-stream=true
## total samples: 21824000
2026/02/26 22:13:14 VictoriaMetrics importer stats:
  idle duration: 5.238827666s;
  time spent while importing: 7.274528s;
  total samples: 21824000;
  samples/s: 3000057.19;
  total bytes: 394.4 MB;
  bytes/s: 54.2 MB;
  import requests: 110;
  import requests retries: 0;
2026/02/26 22:13:14 Total time: 7.278895084s

## --remote-read-step-interval=minute --remote-read-use-stream=true
## total samples: **353800724(353800724/15696611~22.5)**
2026/02/26 22:18:41 VictoriaMetrics importer stats:
  idle duration: 1m45.09105431s;
  time spent while importing: 1m51.716730125s;
  total samples: 353800724;
  samples/s: 3166944.86;
  total bytes: 6.8 GB;
  bytes/s: 61.3 MB;
  import requests: 1769;
  import requests retries: 0;
2026/02/26 22:18:41 Total time: 1m51.721834958s
```
2026-02-27 10:46:08 +02:00
Roman Khavronenko
b230851a00 dashboards: remove $instance from drilldown link (#10518)
For unknown reason, $instance variable can't be passed unescaped via
dashboard link. In result, clicking on the line on panel opens a new tab
where panel fails to render.

This happens when `$instance=$__all`. The rendered link becomes
`&var-instance=.*` which then gets double-escaped in the query and
yields no result. This behavior can be verified at
https://play-grafana.victoriametrics.com/.

I've tried to properly unescape the variable using
https://grafana.com/docs/grafana/latest/visualizations/dashboards/variables/variable-syntax
but found no solution.

Hence, proposing to remove this filter from drilldown.

------------



https://github.com/user-attachments/assets/faf76d63-7739-48d7-8ce6-3d567e77003c

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Signed-off-by: Roman Khavronenko <hagen1778@gmail.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-02-27 10:42:01 +02:00
Max Kotliar
402164cd3a dashboards: add job\instance filters to alerts statistics dashboard (#10549)
### Describe Your Changes

Add `job` and `instance` filters to the `VictoriaMetrics - Alert
statistics` dashboard. This allows users running multiple independent
[vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/)
instances to filter and analyze alerts statistics per specific instance,
making it easier to identify issues in a particular vmalert deployment.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-27 09:41:46 +02:00
Max Kotliar
2301e754e4 app/vmauth: userinfo returns jwt as name (#10546)
### Describe Your Changes

Previously it would return empty string if jwt auth method is
configured. The empty string complicates reading logs.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-26 16:57:33 +02:00
Max Kotliar
84de17126f docs: reorganize OpenTelemetry documentation into integrations and data-ingestion (#10520)
### Describe Your Changes

Move OpenTelemetry-related documentation under docs/integrations and
docs/data-ingestion to establish a clear, scalable structure.

As OpenTelemetry support expands, we need a dedicated place to document
protocol details, implementation specifics, and known limitations, such
as:

- Delta temporality not working with downsampling. See
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10014#issuecomment-3697509266.
- Negative histogram buckets being discarded by VictoriaMetrics. See
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9896.

The new structure separates concerns:

- `docs/integrations/` — protocol overview, implementation details, and
limitations.
- `docs/data-ingestion/` — OpenTelemetry Collector configuration and
ingestion setup.

This aligns OpenTelemetry documentation with the existing structure used
across other integrations and ingestion methods.

New pages and links preserve backward compatiblity

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-26 16:56:15 +02:00
Max Kotliar
3c26f9e7f9 docs/changelog: chore changelog 2026-02-26 14:53:00 +02:00
Roman Khavronenko
15b6f725f4 Docs: add integration with bindplace (#10543)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-02-26 14:40:55 +02:00
Max Kotliar
d6358b0eed docs: refine vmauth jwt documentation 2026-02-26 14:39:06 +02:00
Pablo (Tomas) Fernandez
7af52ab24b Docs: Update guide "Kubernetes monitoring with VictoriaMetrics Cluster" (#10410)
### Describe Your Changes

- Updated GKE version to a more current 1.34+
- Updated guide to more modern Helm and Kubectl versions
- Tested updated instructions on GKE 1.34.1-gke.3971001 (and a local k3s
instance) successfully
- Removed revision from Grafana values for helm chart (confirmed it
pulls the latest revision)
- Split the helm chart values (`guide-vmcluster-vmagent-values.yaml`)
into more readable chunks and added explanations next to each chunk
- Added and updated expected outputs. Some were missing and others were
outdated
- Updated Grafana dashboards screenshots since they changed from the
last revision
- Updated Grafana repo to use community org (old grafana chart was
deprecated
on Jan 30th -
[source](https://community.grafana.com/t/helm-repository-migration-grafana-community-charts/160983))
- Minor corrections and typo fixes. Improved flow
- Added a section at the end pointing readers where they can go next.

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Vadim Rutkovsky <vadim@vrutkovs.eu>
2026-02-26 14:08:52 +02:00
hagen1778
a859da548e docs: update best recommendations for swap
* simplify wording
* add link to Grafana dashboards where they're mentioned

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 570a9ef627)
2026-02-26 11:43:29 +01:00
Maxime Grenu
f3f32841f0 docs/vmctl: fix invalid MetricsQL numeric literal in monitoring example (#10494)
## Summary

Fix an invalid MetricsQL numeric literal in the vmctl monitoring
documentation.

## Problem

The PromQL/MetricsQL example query for monitoring vm-native migration
data transfer speed used `1Mb` as a divisor:

```promql
rate(vmctl_vm_native_migration_bytes_transferred_total[5m]) / 1Mb
```

However, `Mb` is **not** a valid MetricsQL numeric suffix. According to
the [MetricsQL
documentation](https://docs.victoriametrics.com/victoriametrics/metricsql/#numeric-values):

> Numeric values can have `K`, `Ki`, `M`, `Mi`, `G`, `Gi`, `T` and `Ti`
suffixes.

The suffix `Mb` does not exist — only `M` (mega, 10^6) and `Mi` (mebi,
2^20 = 1,048,576) are valid.

## Fix

Replace `1Mb` with `1Mi` (1 mebibyte = 1,048,576 bytes), which is the
standard binary unit for memory/storage transfer measurements in
computing, and update the comment to reflect `MiB/s` instead of `MB/s`.

## Files Changed

- `docs/victoriametrics/vmctl/vmctl.md`: fixed the invalid literal `1Mb`
→ `1Mi` and updated the comment

---------

Signed-off-by: Maxime Grenu <maxime.grenu@gmail.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
Co-authored-by: Vadim Alekseev <vadimaleksv@gmail.com>
Co-authored-by: Yury Moladau <yurymolodov@gmail.com>
Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
(cherry picked from commit 40e27fc2c8)
2026-02-26 11:43:28 +01:00
hagen1778
8628ba4a1e deployment: include alert-statistics in default dashboards
Having this dashboard by default simplifies its maintainance.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit befbf9afca)
2026-02-26 11:43:28 +01:00
hagen1778
92f9eace09 dashboards: review alert-statistics dashboard
* add meaningful description, it is required for publishin on grafana.com
* remove dependency on `victoriametrics-metrics-datasource` as it is not used

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 65d0a8e129)
2026-02-26 11:43:28 +01:00
Hui Wang
bf0ad2edf2 metricsql: add function histogram_fraction()
This commit improves compatibility with promql by introducing a missing function `histogram_fraction`.
 
 histogram_fraction is a shortcut for `histogram_share(upperLe, buckets) - histogram_share(lowerLe, buckets)`

histogram_count, histogram_sum or histogram_avg will not be added to metricsQL, as they only operate on Prometheus native histogram, which doesn't have _count and _sum series like the classic histogram or Victoriametrics histogram. For classic histogram, _count and _sum series can be used directly.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5346.
2026-02-26 09:43:06 +01:00
Aliaksandr Valialkin
90fdc312dd lib/httpserver: prefer gzip over zstd compression for http responses if the client indicates it supports both methods
This is needed because some clients and proxies improperly handle zstd-compressed responses.
See https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/455 .

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10535
2026-02-25 22:16:14 +01:00
f41gh7
dfea395d0a app/vmselect: prom handler: LabelValues: decode UTF8-encoded label name
This commit enhances UTF-8 decoding for `/label//values` API by making it compatible
with Prometheus labelName encodoing.

 If the label is encoded according to the Prometheus UTF8 encoding scheme
(https://github.com/prometheus/proposals/blob/main/proposals/0028-utf8.md),
decode it before doing the search.

Every label value that starts with "U__" is considered to be
UTF8-encoded, according to the spec.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10446
2026-02-25 21:18:30 +01:00
Nikolay
0d66e816bd lib/promscrape: reduce CPU and memory usage for originalLabels
This commit optimizes the storage of originalLabels. Previously, they
were stored as a clone of the discovered labels, which required many
small allocations and added high pressure on the garbage collector.

Now originalLabels are stored as zstd-compressed JSON ([]byte). Since
they are rarely requested, the overhead of zstd decompression and
json.Unmarshal is negligible.

This optimization reduces memory usage for storing originalLabels by 3x
and CPU usage by 2x.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9952
2026-02-25 21:10:52 +01:00
Nikolay
65756111a0 lib/timerpool: remove misleading panic
After golang 1.23 it's safe to ignore timer.Reset True value.

According to the spec:

 For a chan-based timer created with NewTimer, as of Go 1.23,
 any receive from t.C after Reset has returned is guaranteed not
 to receive a time value corresponding to the previous timer
settings;

 If the program has not received from t.C already and the timer is
 running, Reset is guaranteed to return true.
 Before Go 1.23, the only safe way to use Reset was to call [Timer.Stop]
and explicitly drain the timer first.

 Golang 1.23 changed timer implementation from sync and async. And it
made possible that chan send and timer.Stop could happen in the same
time.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9721
2026-02-25 21:10:52 +01:00
Max Kotliar
286845daf5 docs: add available_from for vmauth\jwt feature
Follow-up on
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10499
2026-02-25 15:25:02 +02:00
Max Kotliar
8bf2bdb366 app/vmauth: implement upstream request templating based on JWT vm_access claim
For proposal and implementation check out https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10492

address review comments

* simplify placeholder logic with pre-defined data structure
* add validation helper functions
* consolidate JWT placeholders parsing logic
* slightly reduce memory allocations for query templating
* do not allow templating for client request url params

Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-02-25 14:54:24 +02:00
hagen1778
1d856bb3ba docs: add change lines after 673b2ca7db
Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit d467faf739)
2026-02-25 11:28:14 +01:00
sias32
0a3192bc3c dashboards/deployment: add links for vmalert (#10509)
### Describe Your Changes

1. Dashboard: Adding a link to an alert for quick access to it
(alert-statisticl)
2. Rules: Replace localhost with $externalURL to take the address from
the --external.url flag

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: sias32 <sias.32@yandex.ru>
(cherry picked from commit 673b2ca7db)
2026-02-25 11:28:14 +01:00
hagen1778
79a0815d93 app/vmalert: fix typo Minium => Minimum
Follow-up after a6200cc83d

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 40ccf0c333)
2026-02-25 09:28:24 +01:00
hklhai
5de17bc530 Improve Influx parsing error message when raw newline (\n) appears inside quoted fieldvmagent: Improve Influx parsing error message when raw newline (\n)… (#10524)
# Investigation & Root Cause --- InfluxDB Line Protocol Parsing with Raw
Newline (`\n`)

This document describes the investigation process and root cause
analysis for Influx Line Protocol parsing errors in VictoriaMetrics when
a **raw newline (`\n`) byte appears inside a quoted field value**.

------------------------------------------------------------------------

## Background

According to the Influx Line Protocol specification:

-   Each point must be represented as a single line.
-   The newline character (`\n`) separates points.
-   Literal newline bytes are not allowed inside quoted field values.

Therefore, any raw newline byte (`0x0A`) inside a quoted string makes
the line invalid.

------------------------------------------------------------------------

## Related Issue

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10067

------------------------------------------------------------------------

## Expected Behavior

VictoriaMetrics should reject Influx Line Protocol lines that contain a
raw newline inside a quoted field value, since this violates the
protocol specification.

The parsing failure itself is correct.

------------------------------------------------------------------------

## Actual Behavior

VictoriaMetrics rejects the line with the following error:

cannot parse field value for "...": missing closing quote for quoted
field value

While technically correct, the error message does not clearly indicate
that the root cause is a raw newline inside the quoted field value.

------------------------------------------------------------------------

## Minimal Reproducer

The issue can be reproduced without Telegraf or Jolokia:

``` bash
printf 'test value="hello
world"\n' | curl -X POST http://localhost:8428/write --data-binary @-
```

This produces:

cannot parse field value for "value": missing closing quote for quoted
field value

The failure occurs because the value contains an actual newline byte
(0x0A), not the escaped sequence `\n`.

------------------------------------------------------------------------

## Environment Setup

The issue was reproduced using the following stack:

-   VictoriaMetrics v1.127.0
-   InfluxDB 1.8
-   Spring Boot + Jolokia
-   Telegraf 1.36.2

Telegraf collects JVM `SystemProperties`, including:

``` json
"line.separator": "\n"
```

After JSON unmarshalling, this becomes a real newline byte in memory.

Detailed reproduction steps can be found here:

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10067#issuecomment-3896175100

------------------------------------------------------------------------

## Observed Serialized Line

Using breakpoint debugging in:

    lib/bytesutil/bytebuffer.go:58

The `ReadFrom` function reads and assembles an Influx line containing:

    SystemProperties.line.separator="
    ",

The quoted field contains an actual newline byte before the closing
quote.

This breaks the single-line assumption of Influx Line Protocol.

VictoriaMetrics splits on `\n`, resulting in:

-   A truncated first line
-   A missing closing quote
-   Parsing failure

------------------------------------------------------------------------

## Important Clarification

This issue is **not** caused by the escaped sequence `"\\n"`.

The failure occurs only when the serialized Influx line contains an
actual newline byte (`0x0A`) inside the quoted value.

Escaped `\n` (two characters: `\` and `n`) is valid.

------------------------------------------------------------------------

## Root Cause

-   Telegraf serializes a field containing a real newline byte.
-   Influx Line Protocol forbids literal newline characters inside
    quoted fields.
-   VictoriaMetrics correctly treats `\n` as a line separator.
-   The parser then encounters an incomplete quoted field and reports
    "missing closing quote".

The parsing behavior is correct per specification.

------------------------------------------------------------------------

## Proposed Improvement

The parsing logic should remain unchanged.

However, the error message can be improved to better indicate the root
cause.

Suggested error message:

invalid Influx line protocol: missing closing quote for quoted field
value;
this may be caused by a raw newline (`\n`) inside the quoted field value

This makes the failure immediately actionable and easier to diagnose.

------------------------------------------------------------------------

## Summary

-   The failure is caused by a raw newline byte inside a quoted field
    value.
-   This violates the Influx Line Protocol specification.
-   VictoriaMetrics correctly rejects the line.
-   The error message should explicitly mention the possibility of a raw
    newline (`\n`) inside the quoted field.

Signed-off-by: hklhai <hkhai@outlook.com>
Co-authored-by: Max Kotliar <kotlyar.maksim@gmail.com>
2026-02-24 20:43:14 +02:00
Max Kotliar
2e6f40226b app/vmstorage: increase min free disk space from 10M to 100M (#10529)
### Describe Your Changes

The free disk space check is not continuous but occurs periodically. In
high-load environments with large ingestion rates, the system can exceed
the remaining 10MB between checks. This can lead to a situation where
disk space is exhausted before the next check occurs, causing panic.

Increase the default value 10x to cover the case.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9561

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-24 18:07:47 +02:00
Roman Khavronenko
d2a033453e app/vmselect: properly apply extra filters for tenant tokens for /api/v1/label/../values (#10503)
Previosly, extra filters were ignored for
`/api/v1/label/vm_account_id/values` or
`/api/v1/label/vm_project_id/values` calls. In result, even if user's
visibility was limited by applying
`?extra_filters[]={vm_account_id="1"}` param they could get the list of
all available tenants in the system.

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-02-24 15:39:28 +01:00
hagen1778
ccfd0d17ad app/vmalert: rename MiniMum => Minimum
Follow-up after a5811d3c3b

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit a6200cc83d)
2026-02-24 15:37:43 +01:00
Fedor Kanin
bedeb1aa08 docs/vmalert: fix a typo by replacing maxiMum with maximum (#10516)
### Describe Your Changes

Fix a typo by replacing `maxiMum` with `maximum` in Markdown docs and
CLI flags help.

Resolve #10515

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

(cherry picked from commit a5811d3c3b)
2026-02-24 15:37:43 +01:00
JAYICE
6a22fd828a document: enrich the description of buckets_limit (#10465)
### Describe Your Changes

fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10417

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

(cherry picked from commit 5962b47c31)
2026-02-24 15:37:42 +01:00
Roman Khavronenko
b7f98a503a docs: re-visit Troubleshooting docs (#10512)
* remove ToC in the beginning, as it duplicates right-bar functionality
and is easier to make a mistake with. For example, it didn't have the
ZFS section in it
* simplify wording where it was possible
* reference new tools VM got in recent releases
* re-prioritize tips order based on personal experience

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Signed-off-by: Roman Khavronenko <hagen1778@gmail.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
(cherry picked from commit 9a4edc738a)
2026-02-24 15:37:42 +01:00
Roman Khavronenko
9027da630d dashboards: filter out zero value for Major page faults panel (#10517)
Components like vmselect and vminsert rarely touch disk, so most of the
time their values are 0. Filtering out 0 values makes the panel cleaner.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 30d01e9cae)
2026-02-24 15:37:42 +01:00
Artem Fetishev
8a2f1e3a21 lib/uint64set: move set un/marshal methods from Storage to uint64set (#10521)
A refactoring that moves the uint64set.Set marshaling and unmarshaling from lib/storage/storage.go to lib/uint64set. Also added function docs and tests.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-24 11:29:10 +01:00
Zhu Jiekun
8813fcdaa4 flaky test: disable GC during sync.Pool test (#10523)
Disable GC when testing sync.Pool `Get` and `Put` logic, so the items in pool won't be recycled too fast.

Follow-up for 785daff65d.
2026-02-24 10:35:56 +01:00
Fred Navruzov
3351bd8186 docs/vmanomaly - strip bad chars from filenames (#10525)
### Describe Your Changes

Strip spaces and `=` from filenames as suggested in #10522 

now
```shellhelp
find ./docs |egrep '[ =]'
```
returns no such files

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-24 10:06:12 +02:00
Max Kotliar
0fe604d5d7 .github: Run apptests on separate pool of runners
It should prvent apptest timeouts due to runners saturation. When
apptests are run with other tests and linters they do not have enough
CPU to complete in time and often times out.

If one re-runs the apptests shortly after they are likely to pass
because the same runner has enough resources available (other job
finished).

Remove GOGC=10 as the runner has enough memory (16Gb)  to run apptests.

I did some tests and obeserve drop in overal test duration from 4.5m to
3.30-3m.
2026-02-23 14:17:56 +02:00
Vadim Rutkovsky
5cb6b101f1 dashboards: operator dashboard should extract version from metrics (#10502)
### Describe Your Changes

Use vm_app_version to determine operator version instead of static text

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

Signed-off-by: Vadim Rutkovsky <vadim@vrutkovs.eu>
2026-02-23 13:32:32 +02:00
Roman Khavronenko
4b39db71c9 docs: add dedicated opentelemetry section to docs (#10491)
The new section is supposed to contain otel related information for all
products, like VT, VM, VL.

It also supposed to be visible for readers right away, without need to
dig for info in each product.

It contains basic information and is supposed to act as a router to more
detailed info in each product.

While there, also updated VM-related otel info.

---------

Depends on
https://github.com/VictoriaMetrics/victoriametrics-datasource/pull/458

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 4d06e34b66)
2026-02-23 10:24:44 +01:00
Aliaksandr Valialkin
cb320c8f50 vendor: update github.com/valyala/fastjson from v1.6.9 to v1.6.10
This fixes the issue mentioned at https://github.com/VictoriaMetrics/VictoriaLogs/issues/1042#issuecomment-3936084518
2026-02-21 13:21:21 +01:00
Pablo (Tomas) Fernandez
d60e1291a3 Docs: Update guide "Getting started with VM Operator" (#10429)
### Describe Your Changes

- Add an introduction with a brief explanation of the operator and its
benefits as an intro
- Make some steps more explicit, instead of just linking to the VM
cluster guide
- Separate config/chart values files from kubectl apply (instead of
using heredoc and in-line yaml)
- Update screenshots and add figcaptions where needed
- Update Kubernetes and tools versions to newer releases
- Remove revision numbers from the Grafana config to install the latest
revision
- Added a section to configure scraping of Kubernetes resources (nodes,
pods, etc.)
- Tested updated instructions on GKE 1.33 and 1.34 (and a local k3s
instance) successfully
- Added and updated expected outputs. Some were missing and others were
outdated
- Updated Grafana dashboards screenshots since they changed from the
last revision
- Minor corrections and typo fixes. Improved flow
- Added a section at the end pointing readers to where they can go next.

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-20 22:39:03 +02:00
Pablo (Tomas) Fernandez
796a796e5a Docs: update guide "Headlamp Kubernetes UI and VictoriaMetrics" (#10462)
### Describe Your Changes

- Updated introduction
- Added proper steps
- Tested intructions on headlamp desktop version and the in-cluster web
ui
- Added images to guide user
- Mentioned that the test connection button does not work (it probes a
`-healthy` endpoint that is not supported by VM). The plugin still
works, it's just the test button that fails
- Added links to the single and cluster installation guides

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-20 22:39:03 +02:00
Pablo (Tomas) Fernandez
323306454b Docs: Update Guide "How to delete or replace metrics in VictoriaMetrics" (#10500)
### Describe Your Changes

- Rewrote the introduction
- Added list of endpoints for single node, cluster, and cloud
- Added tips for working with VictoriaMetrics running on Kubernetes
- Flushed out explanations for each step
- Added reference links for all required endpoints
- Tested every command

### Checklist

The following checks are **mandatory**:

- [X] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [X] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-20 22:39:02 +02:00
Max Kotliar
27bcf67f7e docs/changelog: add regexp example to bugfix description 2026-02-20 16:28:17 +02:00
Max Kotliar
32cb85de91 docs: tiny corrections 2026-02-20 16:22:06 +02:00
Max Kotliar
607665e365 docs/changelog: chore changelog 2026-02-20 13:23:24 +02:00
Nikolay
6cda714a82 lib/storage: properly search tenants
Commit 610b328e5a introduced a bug in the
date range search logic. If the first searched date for a given tenant
did not match, the search could proceed incorrectly.

This commit fixes the SearchTenants API by correctly advancing the date
passed to table.Seek.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10422
2026-02-20 12:01:11 +01:00
Roman Khavronenko
def0829dc0 docs: clarify details on dump_request_on_errors
* add example of the produced log, so users could understand the impact;
* stress once again about sensetive data exposure when
dump_request_on_errors is enabled.
2026-02-20 11:56:48 +01:00
Roman Khavronenko
416c949cfc app/vmauth: clarify the error message for all failed backends
This change adds some context to the error when all backend failed. From
support cases it seems like without the context users might not know
what to do with this error message. Clarification advises them to check
the prev error messages.
2026-02-20 11:56:48 +01:00
Yury Moladau
b3a8257886 app/vmui: fix label escaping for cardinality and autocomplete (#10498)
This PR fixes handling of label names containing special characters
(e.g. `.`, `/`, `-`).

Changes:
- Fixed escaping logic for cardinality requests.
- Fixed autocomplete insertion to escape label names in query selectors.

Related issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10485
2026-02-20 11:56:48 +01:00
Vadim Alekseev
65d6a4e303 lib/regexutil: prevent panic error parsing regexp: expression nests too deeply
Previously regex simplify function made an attempt to parse string representation of simplified regex.
And it could produce runtime panic due to std lib specification:

```
// Simplify returns a regexp equivalent to re but without counted repetitions
// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
// The resulting regexp will execute correctly but its string representation
// will not produce the same parse tree, because capturing parentheses
// may have been duplicated or removed.
```
 
 This commit ignores simplified regex parsing error and returns back original regex. 
It results into possible missing simplification of some niche regex patterns. 
But it's extremely rare cases rarely seen in production. So the tradeoff is acceptable. 

Fixes victoriaMetrics/victoriaLogs/issues/1112
2026-02-20 11:56:47 +01:00
f41gh7
1ad947a5d4 apptest: follow-up for 8a80538357
Properly account metadata ingestion records. Previously test didn't
account metadata records at all. Because it relied on lib/promscrape
code flag value, while binary obtained flag value as an argument.
2026-02-20 10:55:08 +01:00
Max Kotliar
b4b971acd5 docs: make docs-update-flags should rely on git tag (#10490)
### Describe Your Changes

As requested by @valyala changing the behvior of `make
docs-update-flags` from relying on git worktree, specific git remotes to
the git tags. Same way as `make publish-release` works.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-19 18:52:03 +02:00
Max Kotliar
8a80538357 lib/prommetadata: enable metrics metadata ingestion and storing by default (#10489)
### Describe Your Changes

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2974

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-19 18:48:20 +02:00
Aliaksandr Valialkin
0b2c4da8c9 all: run go fix -reflecttypefor 2026-02-19 14:05:14 +01:00
Aliaksandr Valialkin
574f697009 vendor: update github.com/valyala/fastjson from v1.6.8 to v1.6.9
This should help reducing memory usage at https://github.com/VictoriaMetrics/VictoriaLogs/issues/1042
2026-02-19 13:29:13 +01:00
Benjamin Nichols-Farquhar
3762a7b03e lib/backup implement cross-type backup copies
While server side copies when using the same backup origin and
destination are always most efficient there are times when moving
between backup locations is required.

Right now vmbackup throws an error in these cases. 

While its true that a user could always do a fresh backup from a
snapshot rather than copy an old backup, this requires access to storage
data locations and a running vmstorage instance, something that is not
_generally_ required for otherwise moving backups around in remote
locations using vmbackup.

This is a small change that makes the moving of backups from one
location to another transparent to users, without having to consider if
those locations are the same or different. This both simplifies backup
migrations and unlocks using vmbackup for more complex operations.

Specifically this came up in my use case because we want to orchestrate
the down-scaling of EBS volumes backing our vmstorage cluster, which
requires some complex backup operations, one of which being taking a
backup from s3 to a local filesystem.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10401
2026-02-18 21:47:09 +01:00
Aliaksandr Valialkin
f31dae5fff lib/httpserver: escape the error string before sending it in the response to the client
See https://github.com/VictoriaMetrics/VictoriaMetrics/security/code-scanning/353
2026-02-18 20:40:18 +01:00
Aliaksandr Valialkin
91f20d8247 vendor: update github.com/VictoriaMetrics/VictoriaLogs from v0.0.0-20260125191521-bc89d84cd61d to v0.0.0-20260218111324-95b48d57d032 2026-02-18 20:40:13 +01:00
Aliaksandr Valialkin
e8a473fe3a lib/timeutil: avoid losing the precision at decimalExp when converting it from int64 to int
This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/security/code-scanning/354
2026-02-18 20:25:09 +01:00
Aliaksandr Valialkin
379dcc6aa8 vendor: run make vendor-update 2026-02-18 19:48:26 +01:00
Aliaksandr Valialkin
a2657e2f8f all: run go fix -omitzero 2026-02-18 19:37:17 +01:00
Aliaksandr Valialkin
99cb28ff7a all: run go fix -minmax 2026-02-18 19:20:09 +01:00
Aliaksandr Valialkin
74e2bcd772 all: run go fix -newexpr 2026-02-18 19:07:24 +01:00
Aliaksandr Valialkin
7e7e59f57d all: run go fix -fmtappendf 2026-02-18 19:02:36 +01:00
Aliaksandr Valialkin
bd381663a0 all: run go fix -mapsloop 2026-02-18 19:02:36 +01:00
Aliaksandr Valialkin
f5db835039 app/{vminsert,vmselect}/main.go: reduce the difference between enterprise and public code a bit
See db46a6a322

This is a follow-up for the commit 2771d67661
2026-02-18 19:02:35 +01:00
Aliaksandr Valialkin
b308d666c0 all: run go fix -slicescontains 2026-02-18 19:02:35 +01:00
Artem Fetishev
6b428c5835 lib/storage: shard dateMetricIDCache (#10486)
Use the same sharded implementation as in metricIDCache. The change is
basically a copy-paste. The only difference is that the rotation period
remains `1h` instead `1m` in order not to break the fix for #10064.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-18 18:19:24 +01:00
Aliaksandr Valialkin
8a9504c094 all: run go fix -slicessort 2026-02-18 15:01:28 +01:00
Aliaksandr Valialkin
f0550b387d all: run go fix -any 2026-02-18 14:58:15 +01:00
Aliaksandr Valialkin
13f2bf6f44 lib/protoparser/protoparserutil: read request body to chunked buffer instead of contiguous byte slice
This should reduce memory reallocations and fragmentation when reading large request bodies from slow clients.
This also should reduce memory usage a bit because of the reduced memory fragmentation.

Updates https://github.com/VictoriaMetrics/VictoriaLogs/issues/1042
2026-02-18 14:51:24 +01:00
Aliaksandr Valialkin
43a7e4e5de vendor: update github.com/VictoriaMetrics/fastcache from v1.13.2 to v1.13.3 2026-02-18 14:30:34 +01:00
Aliaksandr Valialkin
460359b297 vendor: update github.com/valyala/fastjson from v1.6.7 to v1.6.8 2026-02-18 14:30:34 +01:00
Aliaksandr Valialkin
58b29fb52c go.mod: update github.com/VictoriaMetrics/metrics from v1.41.1 to v1.41.2, and github.com/VictoriaMetrics/metricsql from v0.84.10 to v0.85.0 2026-02-18 14:30:34 +01:00
Aliaksandr Valialkin
de397c212e app/vminsert: run go fix -rangeint 2026-02-18 14:30:33 +01:00
Aliaksandr Valialkin
fb6133ac1d app/vmauth: consistently use for i := range N instead of for i := 0; i < N; i++ 2026-02-18 14:30:33 +01:00
Aliaksandr Valialkin
57bd95119b app/vmctl: run go fix -rangeint 2026-02-18 14:30:32 +01:00
Aliaksandr Valialkin
071d3670d4 lib: run go fix -rangeint 2026-02-18 14:30:32 +01:00
Aliaksandr Valialkin
4fb3dd651a lib/persistentqueue: run go fix -rangeint 2026-02-18 14:30:31 +01:00
Aliaksandr Valialkin
d773ee04ee lib/streamaggr: run go fix -rangeint 2026-02-18 14:30:31 +01:00
Aliaksandr Valialkin
1bbb4843ff lib/promscrape: run go fix -rangeint 2026-02-18 14:30:30 +01:00
Aliaksandr Valialkin
d5f0b0a97a lib/encoding: run go fix -rangeint 2026-02-18 14:30:30 +01:00
Aliaksandr Valialkin
f5d87045a0 lib/mergeset: run go fix -rangeint 2026-02-18 14:30:30 +01:00
Aliaksandr Valialkin
7baef19a43 lib/storage: run go fix -rangeint 2026-02-18 14:30:29 +01:00
Aliaksandr Valialkin
84cf09d4e5 apptest: run go fix -rangeint 2026-02-18 14:30:29 +01:00
Aliaksandr Valialkin
ae701ad0c5 app/vmselect: run go fix -rangeint 2026-02-18 14:30:28 +01:00
Aliaksandr Valialkin
932dc6c1ea app/vmauth: run go fix -rangeint 2026-02-18 14:30:28 +01:00
Aliaksandr Valialkin
d522496dd7 app/vmalert: run go fix -rangeint 2026-02-18 14:30:27 +01:00
Aliaksandr Valialkin
422f4060e8 app/vmagent: run go fix -rangeint 2026-02-18 14:30:27 +01:00
Max Kotliar
1415917014 dashboards/vmauth: Add Client request buffering latency panel (#10412)
### Describe Your Changes

In https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10310 ability
to [buffer request
body](https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering)
was added to `vmauth`. This PR adds a new panel `Request body buffering
latency` to `vmauth` dashboard.

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10309

<img width="1504" height="680" alt="Screenshot 2026-02-07 at 00 28 46"
src="https://github.com/user-attachments/assets/ba98b06f-de2c-4d4c-96bb-e5c20049cebc"
/>

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: Hui Wang <haley@victoriametrics.com>
2026-02-18 15:27:13 +02:00
Max Kotliar
6a233f20ff package/release: Add github-verify-release job (#10476)
### Describe Your Changes

The job ensure that:
- the draft release with given `$(TAG)` exists
- the release has excpected `$(GITHUB_ASSETS_COUNT)` number of uploaded
assets
- All the assets were uploaded succesfully.

It also adds helper job `github-get-release` which finds a draft release
by `$(TAG)` and stores into file `/tmp/vm-github-release-$(TAG)` file.

The `github-delete-release1 job is decoupled from the file produced by
`github-create-release job`. So it could be run at any time from any
machine.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-18 15:06:20 +02:00
Artem Fetishev
c09350e0f5 lib/storage: metricIDCache cache follow-up for e5c8581bad (#10468) (#10479)
This is a follow-up PR for e5c8581bad (#10468):

- Extract the bucket size into a constant and document it
- Make benchmark constant metricIDCache-specific
- Add the same benchmark for dateMetricIDCache to compare it with metricIDCache.  See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10479 for benchmark results.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-17 16:13:21 +01:00
Max Kotliar
261853a206 go.mod: update metrics module (#10470)
### Describe Your Changes

VictoriaMetrics binaries will now expose some process-level metrics when
run on macOS.

See:
- https://github.com/VictoriaMetrics/metrics/issues/75
- https://github.com/VictoriaMetrics/metrics/pull/107

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-16 19:52:38 +02:00
Max Kotliar
d7d823f7d7 docs/changelog: correctly place feater into tip section 2026-02-16 19:43:25 +02:00
Max Kotliar
d1c8bfcc52 app/vmauth: authenticate by jwt token (#10435)
### Describe Your Changes

Adds JWT authentication support to vmauth with signature verification
and tenant-based access control. For now, public_keys have to set
explisitly in the config, OIDC discovery will be added in upcoming PRs.

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10445

Key Features

- JWT Configuration: Added `jwt_token` field to user config supporting
RSA/ECDSA public keys or skip_verify mode (for testing purposes).
- Token Validation: Verifies JWT signatures, checks expiration, and
extracts vm_access claims
- Compatible with vmgateway: jwt tokens issued for vmgateway should work
with vmauth too.

Examples

```yaml
users:
- jwt_token:
    public_keys:
    - |
      -----BEGIN PUBLIC KEY-----
      MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA...
      -----END PUBLIC KEY-----
  url_prefix: "http://victoria-metrics:8428/"
```

```yaml
users:
- jwt_token:
    skip_verify: true
  url_prefix: "http://victoria-metrics:8428/"
```


Constraints

- JWT tokens cannot be mixed with other auth methods (bearer_token,
username, password)
- Requires at least one public key OR skip_verify=true
- Limited to single JWT user (multiple JWT users will be supported in
the future)

Next steps
- Multiple `jwt_token` support. 
- Claim matching
- Claim based routing
- OIDC\JWKS support

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Co-authored-by: Pablo (Tomas) Fernandez <46322567+TomFern@users.noreply.github.com>
2026-02-16 19:43:25 +02:00
Max Kotliar
5bbf15f47e docs: start v1.136 lts line 2026-02-16 19:19:18 +02:00
Max Kotliar
856ebe6774 docs: bump version to v1.136.0 2026-02-16 17:43:46 +02:00
Max Kotliar
ac404c4f8e deplyoment/docker: bump version to v1.136.0
Signed-off-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-16 17:43:46 +02:00
Max Kotliar
3fde20112a docs/changelog: update changelog with LTS release notes 2026-02-16 17:31:39 +02:00
Max Kotliar
bf188db618 deployment/docker: Fix publish final fips images from rc 2026-02-16 14:19:22 +02:00
Yury Moladau
91620eaed5 app/vmui: bump package versions (#10291)
Updated project dependencies to the latest versions.

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
2026-02-14 20:11:23 +02:00
Max Kotliar
5572cecf0d docs/changelog: cut v1.136.0 2026-02-13 19:58:34 +02:00
Max Kotliar
61a3a6ffce app/vmselect: run make vmui-update 2026-02-13 19:45:18 +02:00
Artem Fetishev
b3b6ad3231 lib/storage: optimize metricIDCache sharding (#10468)
Exploit uint64set data structure peculiarities (adjacent elements are
stored in
64KiB buckets) to optimize metricIDCache memory footprint.

As the result the cache utilizes 87% less memory and is up to 90%
faster. See
[benchstat.txt](https://github.com/user-attachments/files/25294076/benchstat.txt).

Follow-up for #10388 and #10346.

Thanks to @valyala for the optimization idea.

---------

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-13 18:31:13 +02:00
Nikolay
581657afb7 lib/storage: properly report metrics for the last partition
Previously, on the last day of a month, storage could report empty
metrics for the last partition. This could happen if a new empty
partition was created in updateNextDayMetricIDs or if time series with
future timestamps were ingested.

This commit adds a check to ensure the last partition belongs to the
current month. Since this is typically the most actively used partition,
it should be treated as the last one.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10387
2026-02-13 11:25:08 +01:00
Max Kotliar
1dd2ac6373 docs/changelog: chore tip before release 2026-02-13 10:33:31 +02:00
f41gh7
7cc4dc440a go.mod: update metricsql
It contains fix for https://github.com/VictoriaMetrics/metricsql/issues/60

Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-02-12 23:50:46 +01:00
Artem Fetishev
c70a2557e6 Makefile: rename integration-test to apptest (#10461)
Follow-up for 73015bccb9

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-12 19:09:03 +01:00
Roman Khavronenko
e5caf30583 docs: simplify wording in the top section (#10451)
The purpose of the change is to make better first impression for readers
by removing all unnecessary verbosity. As with status pages, try to
increase the density of useful information.

The initial idea was borrowed from @func25

---------------

<img width="961" height="649" alt="image"
src="https://github.com/user-attachments/assets/2a91ded5-17cf-49ad-a589-45b634af991a"
/>

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Signed-off-by: Roman Khavronenko <hagen1778@gmail.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-12 19:28:16 +02:00
Max Kotliar
b3f3f5b192 docs: mention top query by memory usage
Follow up on
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10391
2026-02-12 17:54:49 +02:00
Stephan Burns
2a4d4044c5 Add restarts annotation to remaining dashboards (#10439)
### Describe Your Changes

Added annotation to show restarts.

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: Stephan Burns <34520077+Sleuth56@users.noreply.github.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
2026-02-12 16:40:33 +02:00
Aliaksandr Valialkin
65f3a1ca52 dashboards/vm/vmauth.json: run make dashboards-sync after the commit 9774fe8df1 according to dashboards/README.md
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10437
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10438
2026-02-12 14:24:50 +01:00
Mathias Palmersheim
aecc2ec9b2 Change user count query so it accounts for multiple replicas of vmauth (#10438)
### Describe Your Changes

Fixes issue where multiple replicas of vmauth cause the user count to be
inflated for vmauth see #10437

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-12 14:24:49 +01:00
Artem Fetishev
34d5f21669 Makefile: make vet and golangci-lint to also check synctests
Follow-up for 3d6f353430

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-12 13:21:52 +01:00
Zhu Jiekun
aa6882d9ba vminsert: proper reset labelsBuf for OpenTelemetry ingestion to avoid high memory usage
Ensure proper expansion and reset of `buf` size for OpenTelemetry
ingestion. This pull request does:
1. Flush data in `wctx` when `buf` is over 4MiB.
2. Do not return `wctx` with `buf` larger than 4MiB while the actual
in-use length is less than 1MiB to the pool.

Previously, when a small number of requests carried a large volume of
time series or labels, `buf` was over-expanded and recycled to the pool,
resulting in an excessive memory usage issue.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10378
2026-02-12 12:49:49 +01:00
Roman Khavronenko
2350dfdbb8 docs: fix the broken image for single-node (#10460)
See
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10449#issuecomment-3890326179

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-02-12 12:49:49 +01:00
Pablo (Tomas) Fernandez
9c92484ac8 Docs: Update guide "How to use OpenTelemetry with VictoriaMetrics and VictoriaLogs" (#10396)
This is part of the effort to upgrate and validate the [Guides in the
docs](https://docs.victoriametrics.com/guides/).

Doc page:
https://docs.victoriametrics.com/guides/getting-started-with-opentelemetry/

Functionally, nothing should change. Aside from the fix that prevented
one of the example applications to run, the rest of the commands in the
guide should be equivalent to the original.

Header anchor links do not change with this update. I added a few
headers but the existing headers anchors should remain unchanged to
prevent breaking existing links.

- Tested on a more modern version of GKE to validate it still works OK
(1.34.1-gke.3971001)
- Changed wording of some sections to improve flow and readability
- Added some missing steps/troubleshooting
- Add tips annotations for cardinality explorer and setup references to
make them stand apart form the main content
- Use `kubectl port-forward svc/...` instead of `kubecl port-forward
pod` (service selectors vs pod names) in some test commands to make
instructions simpler
- Updated OpenTelemetry version to fix error that prevented
`app.go-collector.example` sample code from running
- Replaced the "Visit these links" part in the second program (with the
fast/slow endpoints) with curl commands
- Updated the first VMUI test link to show table instead of graph while
testing OpenTelemetry ingestion (default graph view can be confusing as
there metric value for `k8s_container_ready` doesn't really show any
values)
- Minor typos, grammar check, and consistency (Kubernetes vs kubernetes,
Helm vs Helm, Collector vs collector, etc)
2026-02-12 12:49:48 +01:00
Aliaksandr Valialkin
c8ec70b00a .github/workflows/test.yml: use Go version in the cache key for golangci-lint
This should fix issues like in the https://github.com/VictoriaMetrics/VictoriaMetrics/actions/runs/21943547755/job/63375204688 :

    package requires newer Go version go1.26 (application built with go1.25)
2026-02-12 12:21:55 +01:00
Roman Khavronenko
a3edbf3721 lib/storage: use child trace during index searches
This change only affects query trace. It correctly uses the branched
query trace in callback function, so in trace it is placed in the right
actions branch.

Bug was introduced in
c705da74f6
2026-02-12 12:21:38 +01:00
JAYICE
4b4f031479 document: add description about time-based kafka commit
fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10420
2026-02-12 12:09:38 +01:00
Roman Khavronenko
2dda0d0b98 docs: mention Perses on integrations list (#10442)
While there, attempted to simplify wording in perses doc.
2026-02-12 12:09:38 +01:00
Roman Khavronenko
70fa1798bf docs: add diagrams for single and cluster components (#10449)
This PR adds diagram for single-node and updates diagram for cluster
version. Both diagram go with excalidraw source attached, so they can be
updated in future.

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10398
2026-02-12 12:09:38 +01:00
Roman Khavronenko
90482d76a0 docs: excalidraw vmagent diagram
Source vmagent diagram to excalidraw, so it can be easily updated in
future.

-----------------

<img width="936" height="671" alt="image"
src="https://github.com/user-attachments/assets/1dfc9cb5-0323-4e0d-881c-3c76ccda578f"
/>

<img width="922" height="706" alt="image"
src="https://github.com/user-attachments/assets/42297ede-5986-451c-83fc-c11dba9560e3"
/>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2026-02-12 12:09:38 +01:00
Phuong Le
77ce5229fa ci: scope Go artifact cache restore fallback by Go version
Fixes
https://github.com/VictoriaMetrics/VictoriaMetrics/actions/runs/21921172620/job/63301435721
2026-02-12 12:09:38 +01:00
Zhu Jiekun
67d864d8dd docs: mentioning VictoriaTraces in vmalert's doc (#10457) 2026-02-12 12:09:37 +01:00
Aliaksandr Valialkin
24c95c2393 lib/promscrape: follow-up for the commit 22696f378c
- Return back the check that the size of the scraped response doesn't exceed the maxScrapeSize
  at the client.ReadData(). Without this check the scraped response may be truncated to maxScrapeSize+1
  bytes, which can result in decompression error. The decompression error in this case
  hides the original errror about too big response side. This complicates troubleshooting by users.

- Stop decompressing the scraped response as soon as the decompressed response size exceeds maxScrapeSize.
  This protects from excess memory usage needed for holding the decompressed response with sizes exceeding
  the maxScrapeSize.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10320
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9481
2026-02-12 11:49:08 +01:00
Aliaksandr Valialkin
1b612bd4b2 vendor: run make vendor-update 2026-02-11 17:54:35 +01:00
Aliaksandr Valialkin
e58ccaecdb deployment/docker/Makefile: update Go builder from Go1.25.7 to Go1.26.0
See https://go.dev/doc/go1.26
2026-02-11 17:37:58 +01:00
Vadim Alekseev
febcd00e64 .github/workflows: use Go version from go.mod (#1092) 2026-02-11 16:12:03 +01:00
Artem Fetishev
da2f211de9 lib/storage: use workingsetcache for tfss loops cache again (#10427)
lrucache causes huge cpu usage in some caches. See #10297.

There was a hypothesis that this was due to too short ttl in lrucache.
Setting it to 1h (the default workingsetcache eviction period) but it did not
completely eliminate the problem. The CPU utilization was not huge but still high.
See #10416.

Thus reverting back fix such deployments. This solution is temporary
because the cache consumes at least 32MB. There is one instance per
indexDB which means that if the retention is 3y then the total memory
utilized by this cache will be over 1GB and most of it will be unused.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-11 15:24:07 +01:00
Max Kotliar
2f0ad23b01 .github: pin go version to 1.25 to fix CI (#10448)
Go1.26 has been recently released and was picked up by CI actions.

The tests and linter actions start to fail with:

GOEXPERIMENT=synctest go vet ./lib/...
go: unknown GOEXPERIMENT synctest

This happens because Go 1.26 remove synctest experiment.

Changelog:
This package was first available in Go 1.24 under GOEXPERIMENT=synctest,
with a slightly different API. The experiment has now graduated to
general availability. The old API is still present if
GOEXPERIMENT=synctest is set, but will be removed in Go 1.26.

https://go.dev/doc/go1.25#library

### Describe Your Changes

Please provide a brief description of the changes you made. Be as
specific as possible to help others understand the purpose and impact of
your modifications.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-11 15:43:34 +02:00
Max Kotliar
7f2efdd12c docs/changelog: cleanup after merge 2026-02-11 15:23:03 +02:00
Yury Moladau
eb14f1f3c1 app/vmui: add label autocomplete context-aware by applying existing label matchers (#10399)
* Add context-aware label autocomplete by applying existing label
matchers (e.g. namespace/job) when fetching labels and label values.
* Update `package.json` dependencies.
* Update `vite.config.ts` to ensure correct API requests in playground
mode (`start:playground`).

Related issue: #9269

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
2026-02-11 15:22:48 +02:00
JAYICE
a3792a3565 vmui: add Queries with most memory to execute section in Top Queries page (#10391)
### Describe Your Changes

fix  https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9330

<img width="5088" height="1674" alt="image"
src="https://github.com/user-attachments/assets/4364cfae-8c56-417d-9d1c-6a219fa8802c"
/>


### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

Signed-off-by: JAYICE <1185430411@qq.com>
2026-02-11 14:54:46 +02:00
Hui Wang
8b9d960254 docs: remove incorrect description on -search.logSlowQueryStats (#10447)
>Query statistics logging is enabled by default {{% available_from
"v1.129.0" %}} with a threshold of 5s.
2026-02-11 14:49:48 +02:00
Fred Navruzov
6abeb29475 docs/vmanomaly: fix-non-canonical-url-reader-docs (#10444)
### Describe Your Changes

fix non-canonical link to MetricsQL

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-11 13:06:42 +02:00
Max Kotliar
44cf691dc1 docs: use canonical link if life of sample diagram 2026-02-11 12:52:04 +02:00
Max Kotliar
6dbeeb4e64 lib/jwt: address code review comments (#10428)
### Describe Your Changes

Addressing code revoew comments from
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10426, kept them
separate to isolate copy-paste change from follow up changes

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-10 18:57:40 +02:00
Max Kotliar
957291a705 lib/jwt: opensource jwt library (#10426)
### Describe Your Changes

It was
[decided](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9439#issuecomment-3612299461)
that OIDC authentication in vmauth will be part of open source repo.

That requires opensourcing lib/jwt. PR does not contain any changes in
logic, just copy-paste from enterprise repository.

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9439

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-10 18:51:17 +02:00
Aliaksandr Valialkin
c62f1becf7 lib/backup/actions: properly validate the size for the last part during the restoring from backup
This issue has been found by https://www.cubic.dev/codebase-scan/7b15eebd-abc2-4604-9523-7f9bec5f67f6?violationId=324521b6-50fb-502d-8981-980bd9fd44ab
2026-02-10 15:17:57 +01:00
Aliaksandr Valialkin
62128a686c lib/protoparser/protoparserutil: limit the maximum size of the snappy-encoded data block, which can be read from the remote client
This is a follow-up for the commit 51b44afd34

This issue has been found by https://www.cubic.dev/codebase-scan/7b15eebd-abc2-4604-9523-7f9bec5f67f6?violationId=5a8fb3b7-1086-5d11-bb06-1f0864bd56ff
2026-02-10 15:05:08 +01:00
Aliaksandr Valialkin
f68771b3c2 lib/protoparser/protoparserutil: re-use byte buffers in readUncompressedData() with the capacity up to 1MiB
The expected size of the data ingestion request body accepted by VictoriaMetrics / VictoriaLogs / VictoriaTraces
exceeds 64KiB, and is close to 1MiB. That's why it is better to re-use byte buffers with capacities up to 1MiB,
even if less than 25% of their capacity was used the last time.

This should reduce the number of GC cycles at high data ingestion rate when the request body sizes
are distributed at both sided of the 16KiB ... 64KiB range.
This is a follow-up for 09d2ce36e8

Updates https://github.com/VictoriaMetrics/VictoriaLogs/issues/1042
2026-02-10 13:06:34 +01:00
Fred Navruzov
8fe9b5e5a0 docs/vmanomaly: fix iframe params (#10421)
### Describe Your Changes

fix iframe params in embedded playgrounds on /anomaly-detection/ui/ ,
anomaly-detection/quickstart/ pages

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-10 12:42:15 +02:00
Hui Wang
95e939f258 app/vmselect: properly count vm_deduplicated_samples_total{type="select"}metric
Previously `vm_deduplicated_samples_total{type="select"}` didn't take in account identical samples.

This commit takes it in account in the same way as `vm_deduplicated_samples_total{type="merge"}` metric.

Related to  https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10384.
2026-02-10 10:27:58 +01:00
Roman Khavronenko
422df65b40 docs: update metadata API reference across the docs
* mention support of multitenancy in metadata
* add a basic alerting rule for tracking cache utilization
* clarify cleanup policy of metadata cache
2026-02-10 10:27:58 +01:00
Roman Khavronenko
62a8f1e64b app/vmagent: clarify global nature of remoteWrite.label cmd-line flag
Before, by mistake, -remoteWrite.label flag was referenced in one part
of the doc as per-remoteWrite-url flag. In fact, -remoteWrite.label is
global and applies labels to all remoteWrite URLs unconditionally.

This commit tries to clarify it in docs:
* update the life-of-a-sample diagram to change the labels applying
logic
* add hint how to add a label via `extra_label`
* removes duplicated description for -remoteWrite.label flag

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10373
2026-02-10 10:27:58 +01:00
Pablo (Tomas) Fernandez
df979b551b Update guide/k8s-monitoring-via-vm-single (#10372)
This is the first PR on a proposed series of updates to the guides.

I started with this one because:

It's on the top ten guides according to Google Analytics
It's a good starting point for me to get familiar with VM on Kubernetes
I plan to work through the rest of the guides in the following days
(coordinating the effort with JJ).

Changelog for this guide:

- Updated GKE version to a more current 1.34+
- Updated guide to more modern Helm and Kubectl versions
- Tested updated instructions on GKE 1.34.1-gke.3971001 (and a local k3s
instance) successfully
- Removed revision from Grafana values for helm chart (confirmed it
pulls the latest revision)
- Split the helm chart values into more readable chunks and added
explanations next to each chunk
- Added and updated expected outputs. Some were missing and others were
outdated
- Updated Grafana dashboards screenshots since they changed from the
last revision
- Updated Grafana repo to use community org (old grafana chart was
deprecated
on Jan 30th -
[source](https://community.grafana.com/t/helm-repository-migration-grafana-community-charts/160983))
- Minor corrections and typo fixes
- Added a section at the end pointing readers where they can go next.
2026-02-10 10:27:57 +01:00
f41gh7
9c22d25ed2 vmselect: handle NaN values when merging blocks
`vmselect` merges samples from multiple replicas using an optimistic
deduplication path.

c7f52992e7/app/vmselect/netstorage/netstorage.go (L593-L595)

This is useful when `replicationFactor > 1`. However, identical series
containing NaN values from different replicas are treated as different
(due to `NaN != NaN`), forcing the slower fallback path unnecessarily.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10384
2026-02-10 10:27:51 +01:00
Max Kotliar
6e3193bfc6 dashboards: add source code data link to logging rate panel (#10406)
### Describe Your Changes

Add Source Code data link (link to bar or line in graph to see) that
points directly to a source code file on Github. `VictoriaMetrics -
cluster`, `VictoriaMetrics - single-node`, and `VictoriaMetrics -
vmagent` dashboards were updated. I did not add it to other panels since
they do not have Drilldown section at all.

Also, fixed a misplaced Drilldown link in `VictoriaMetrics -
single-node` dashboard.

Proxy service code is here
https://github.com/VictoriaMetrics/location2source/

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-10 10:26:30 +02:00
Fred Navruzov
c6973e6fe7 docs/vmanomaly: v1.28.6-1.28.7 (#10419)
### Describe Your Changes

- Updated docs to reflect v1.28.6-v1.28.7 changes
- Fixed typos and misaligned section content
- Embedded playgrounds into documentation (data querying, vmanomaly
experiment)

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-10 10:21:09 +02:00
Max Kotliar
d2329a3d1c dashboards: Rename "Concurrent flushes on disk" panel to "Concurrent inserts" (#10409)
### Describe Your Changes

The new title better aligns with the code of
[writeconcurrencylimiter](d9dabea303/lib/writeconcurrencylimiter/concurrencylimiter.go (L140)),
the panel description and the metric used in the query.

Previously, the panel title suggested that it reflected only disk write
performance. During an incident investigation, this led to a wrong
assumption that the panel was unrelated to client-side performance.

In reality, the metric [includes the full write
path](98e320842c/lib/vminsertapi/server.go (L263)):
time spent reading data from the TCP connection, processing it, and
acknowledging the block. The updated title reflects this behavior more
accurately and reduces the risk of misinterpretation during incident
analysis.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-02-09 19:42:18 +02:00
Aliaksandr Valialkin
06d214a0ea docs/victoriametrics/Single-server-VictoriaMetrics.md: add https://docs.victoriametrics.com/VictoriaMetrics.html seen in the wild according to the 404 pages report in Google Analytics 2026-02-09 17:00:23 +01:00
Aliaksandr Valialkin
3708fa3d24 docs/victoriametrics/Cluster-VictoriaMetrics.md: add https://docs.victoriametrics.com/Cluster-VictoriaMetrics/ alias seen in wild according to the 404 pages report in Google Analytics 2026-02-09 17:00:22 +01:00
Aliaksandr Valialkin
d0a1e63425 docs/victoriametrics/MetricsQL.md: add https://docs.victoriametrics.com/MetricsQL/ alias seen in wild according to the 404 pages report in Google Analytics 2026-02-09 17:00:22 +01:00
Jayice
234125f2fa app/vmselect: align graphite render API process timeout to query deadline
Previosly the error returned on timeout suggested a memory leak, which
could confuse a user. In reality timeout could happen if vmselect is
overloaded or the query takes a lot of time to process. The commit
aligns rss. RunParallel with query deadline set either via flag
`-search.maxQueryDuration` or the `timeout` query argument. The logged
warn message is adjusted to suggest resource increase or timeout
increase.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8484

Signed-off-by: JAYICE <jayice.zhou@qq.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
2026-02-09 14:04:34 +02:00
Artem Fetishev
a169757765 docs/changelog: add known issue to v1.132.0 release notes
Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-09 11:06:43 +01:00
JAYICE
b8cf667ec3 app/vmagent: improve kafka consumer performance
Previously, the Kafka consumer in vmagent committed offsets per message
(manual commit). At high message rates, this could overload the commit
path (coordinator, __consumer_offsets topic, and network).

This commit introduces time-based manual commits with a controlled window:
* enable.auto.commit remains false by default.
* After a successful TryPush (data accepted into the buffer before the
  vmagent queue/backend), vmagent adds the message to pending offsets.
* Offsets are committed periodically (every second), as well as during
  shutdown and partition rebalance.

This keeps the commit point tied to TryPush (stronger guarantees than
auto-commit) while significantly reducing commit QPS.

Auto-commit is also time-based, but it advances offsets based on poll()
delivery rather than application-level processing. This means offsets
may be committed before data is actually accepted by the vmagent
pipeline, slightly increasing the risk of data loss on crash or restart.

This change does not make the Kafka consumer fully transactional
end-to-end. Buffers in vmagent/vminsert/vmstorage still imply possible
data loss on hard stops. However, it provides stronger guarantees than
auto-commit, since commits are based on TryPush rather than poll().

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10395
2026-02-06 13:17:19 +01:00
Aliaksandr Valialkin
7179178e22 docs/victoriametrics/integrations/zabbixconnector.md: add an alias - https://docs.victoriametrics.com/victoriametrics/integrations/zabbix/ - seen in the Internet
Visits to this page are seen in Google Analytics reports.
2026-02-05 23:53:01 +01:00
Aliaksandr Valialkin
cdf20fcc4f deployment/docker: update base Alpine Docker image from 3.23.2 to 3.23.3
See https://www.alpinelinux.org/posts/Alpine-3.20.9-3.21.6-3.22.3-3.23.3-released.html
2026-02-05 19:49:05 +01:00
Aliaksandr Valialkin
277823cc32 deployment/docker: update Go builder from Go1.25.6 to Go1.25.7
See https://github.com/golang/go/issues?q=milestone%3AGo1.25.7%20label%3ACherryPickApproved
2026-02-05 19:47:13 +01:00
Aliaksandr Valialkin
98e320842c docs/victoriametrics: add links on how to tune VictoriaMetrics for IoT and industrial monitoring cases with low churn rate for time series
The link is https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#index-tuning-for-low-churn-rate
Put this link to the docs which mention IoT and industrial monitoring, so users could figure out
how to optimize VictoriaMetrics for these cases.
2026-02-05 17:24:19 +01:00
Aliaksandr Valialkin
3f4c3e96fe lib/protoparser/protoparserutil: do not store byte slices with more than 75% of unused space in the pool
Keeping such byte slices in the pool may increase memory usage when processing a small share of requests
with much bigger sizes than the average processed request.

This should help reducing memory usage at https://github.com/VictoriaMetrics/VictoriaLogs/issues/1042
2026-02-04 15:31:56 +01:00
Max Kotliar
37fbe7ecac docs: update changelog with LTS release notes 2026-02-02 18:46:01 +02:00
Max Kotliar
2a37b9472d docs: bump version to v1.135.0 2026-02-02 18:38:37 +02:00
Max Kotliar
a9fd74f8de deplyoment/docker: bump version to v1.135.0 2026-02-02 18:28:46 +02:00
f41gh7
728fa1479d follow-up for 60cadfbad1
Respect the default value of http.DefaultTransport.Proxy. Previously,
it could be unintentionally overridden with a nil value.

This commit aligns Proxy configuration across all created transports.
2026-02-02 16:39:59 +01:00
Zane DeGraffenried
22fe0af915 lib/promauth: fix oauth http client overwriting default proxy with nil
Previously, default `Proxy` was unconditionally replaced with config value, which could be nil. 
It made impossible to use  default http client proxy env variables.

This commit adds check in oauth http client builder that only overwrites the
transport proxy if a custom proxy url function is defined.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10385
2026-02-02 15:41:24 +01:00
Vadim Alekseev
8b56d1614d app/vminsert/common: reduce allocations when writing metadata
Bug was introduced at 5a587f2006, while porting change from cluster branch.

This commit properlyslice `mms
[]metricsmetadata.Row` slice . Previously, every WriteMetadata call triggered a
slice allocation.
This shouldn't significantly impact overall performance, so I haven't
included benchmarks.

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10392
2026-02-02 15:41:16 +01:00
Nikolay
ced4506d9e lib/promscrape: properly expose kubernetes_sd dialer metrics (#10381)
Commit 35b31f904d introduced a bug, where
dialer metrics for Kubernetes discovery were overwritten.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10382
2026-02-02 14:51:46 +01:00
Nikolay
ffb696a44c docs: mention downsampling export API behavior
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10326
2026-02-02 14:51:45 +01:00
Artem Fetishev
8f80fb601d lib/storage: retruce number of shards in metricIDCache
This should reduce cpu utilization while still removing the storage connection saturation.

Follow-up for 6bc809813b (#10346)

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-02-01 18:16:07 +01:00
Max Kotliar
acffe5c11e docs/changelog: cut v1.135.0 2026-01-30 14:12:31 +02:00
Max Kotliar
ab9d65e905 docs: run make docs-update-flags 2026-01-30 14:10:10 +02:00
Max Kotliar
077e270eff docs: run make docs-update-flags 2026-01-30 14:05:02 +02:00
Max Kotliar
cbe0c6cf8b app/vmselect: run make vmui-update 2026-01-30 13:59:13 +02:00
Zakhar Bessarab
49ddd228bf lib/pushmetrics: allow enabling push metrics via config
This is needed in order to allow using lib/pushmetrics for vmctl as it does not use go native flags.

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

app/vmctl: add metrics for the migrations

- add flags to allow setting up metrics push
- add metrics to track progress of the migration for all modes
- add metrics for generic backoff and limiter packages

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2026-01-30 13:06:51 +02:00
f41gh7
cf820cfffb follow-up for a2271284
Remove duplicate line at app/vmui/Makefile
2026-01-30 11:29:24 +01:00
Andrei Baidarov
4b4c330ff5 lib/storage: shard metricIdCache
The current implementation has a bottleneck – a single mutex to access
`prev`/`next` metric sets. Each rotation results in storage utilization
spikes since lock-free `curr` is almost empty, and cache needs to
promote metrics from `prev` to `next`.

This is an attempt to reduce contention by spliting cache into separate
shards.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10367
2026-01-30 11:20:51 +01:00
Hui Wang
91f8bbc3d4 app/vmalert: do not skip sending alert notifications to -notifier.url if remote write requests fail
Note: remote write request won't fail immediately if `-remoteWrite.url`
is unreachable, as vmalert maintains a remote write queue (with capacity
controlled by `-remoteWrite.maxQueueSize(default 1e5)`) and uses a
separate process to batch and push queued data.

vmalert uses error group to print error messages associated with a
single group together, which should assist the group owner in reviewing
relevant error messages.
With this pull request, the error message would be like:
```
2026-01-30T08:26:46.641Z	error	app/vmalert/rule/group.go:395	group "group2": errors(3): 
rule "rule1": remote write failure: failed to push timeseries - queue is full (1 entries). Queue size is controlled by -remoteWrite.maxQueueSize flag
rule "rule1": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-1/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-1/api/v2/alerts"; response body: 
rule "rule1": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-2/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-2/api/v2/alerts"; response body: 
2026-01-30T08:26:46.641Z	error	app/vmalert/rule/group.go:395	group "group2": errors(3): 
rule "rule2": remote write failure: failed to push timeseries - queue is full (1 entries). Queue size is controlled by -remoteWrite.maxQueueSize flag
rule "rule2": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-2/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-2/api/v2/alerts"; response body: 
rule "rule2": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-1/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-1/api/v2/alerts"; response body: 
2026-01-30T08:26:52.229Z	error	app/vmalert/rule/group.go:395	group "group1": errors(3): 
rule "rule1": remote write failure: failed to push timeseries - queue is full (1 entries). Queue size is controlled by -remoteWrite.maxQueueSize flag
rule "rule1": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-1/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-1/api/v2/alerts"; response body: 
rule "rule1": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-2/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-2/api/v2/alerts"; response body: 
2026-01-30T08:26:52.229Z	error	app/vmalert/rule/group.go:395	group "group1": errors(3): 
rule "rule2": remote write failure: failed to push timeseries - queue is full (1 entries). Queue size is controlled by -remoteWrite.maxQueueSize flag
rule "rule2": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-2/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-2/api/v2/alerts"; response body: 
rule "rule2": notifier failure: failed to send alerts to addr "http://non-existing-alertmanager-1/api/v2/alerts": invalid SC 502 from "http://non-existing-alertmanager-1/api/v2/alerts"; response body: 
```

Related PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10376
2026-01-30 11:20:50 +01:00
JAYICE
1076cc54c0 expose topN average memory bytes consumption queries in /api/v1/status/top_queries (#10350)
part of https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9330

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

---------

Signed-off-by: JAYICE <1185430411@qq.com>
2026-01-30 10:58:30 +02:00
Zakhar Bessarab
3e62bbbb40 app/vmbackupmanager: allow disabling scheduled backups
This commit adds a new flag `disableScheduledBackups` for `vmbackupmanager. Which disables any scheduled backups. It could be useful to keep vmbackupmanager running and serving API calls only.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10364
2026-01-29 13:45:54 +01:00
Vadim Rutkovsky
7838f85df6 docs: update examples and documentation after nodes/proxy permission removed
Updated helm-charts and operators no longer come with nodes/proxy
permissions for vmagent/vmsingle roles. In the examples using kubelet's
proxy endpoint we should explicitly create ClusterRoles /
ClusterRoleBinding to grant access.

See https://github.com/VictoriaMetrics/operator/pull/1754 and
https://github.com/VictoriaMetrics/helm-charts/pull/2676

Ref: https://github.com/VictoriaMetrics/operator/issues/1753
2026-01-29 13:37:14 +01:00
Hui Wang
0251e902ad app/vmalert: ensure alert restore retrieve the correct previous alert state if the group takes long time to evaluate
The new `ALERTS_FOR_STATE` may be retrieved during restore when:
1. a group contains multiple heavy rules, alerting rule A may have
already been executed and its state metrics successfully uploaded to the
datasource by the time all rules within the group have finished
executing;
2. the datasource makes data queryable very quickly, for instance, when
users configure a small value for `-search.latencyOffset`.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
2026-01-29 13:37:13 +01:00
Hui Wang
e8fdf05cfc vmalert: disallow setting the -notifier.url command-line flag to a null value
Previously, running a vmalert with an empty notifier.url does not produce an error and leads to vmalert which will never send a notification successfully.

 This commit properly validates notifier.url empty value.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10355
2026-01-28 14:10:11 +01:00
Artem Fetishev
932a47acfe lib/storage: follow-up for making searchAndMerge profile-friendly
Follow-up for c705da74f6
2026-01-28 14:10:11 +01:00
Hui Wang
722586c623 app/vmselect/promql: fix gaps at changes() functions
After changing the scrape interval from a smaller value (e.g., 30s) to a larger value (e.g., 60s), the changes() function starts to yield non-zero values even when the underlying values have not changed.

 This commit keeps unchanged series values when a large gap occurs between samples or when the scrape interval decreases.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10280
2026-01-28 14:10:10 +01:00
Max Kotliar
611e09a421 docs: Update vmctl flags in docs with a command (#10357)
### Describe Your Changes

The commit extends make docs-update-flags command so it updates vmctl
flags as well. It creates one md file with global flags and several
files per supported mode.


### Checklist

The following checks are **mandatory**:

- [ ] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-01-28 14:13:20 +02:00
Max Kotliar
c4c9ac9e6b docs: add avilable_from to request body buffering vmauth doc
Follow-up for
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10310 and
e31abfc25c
2026-01-28 12:51:00 +02:00
Nikolay
98c378089d lib/fs: properly check for partially deleted directories (#10342)
Commit 83da33d8cf introduced a check to
detect directories partially removed via IsPartiallyRemovedDir.

However, the check was performed using the full path, while de.Name()
returns only the current entry name (without the path). As a result, the
check always succeeded and the function did not behave as intended.
2026-01-28 10:45:22 +01:00
Nikolay
8baa370ad9 lib/fs: add fs.disableMincore flag
This flag allows disabling the mincore() syscall introduced in
50fc48ac47. On older ZFS filesystems,
mincore() may trigger a bug related to ZFSÕs own in-memory cache. Mixing
reads from mmap()ed files and direct disk reads can corrupt the ZFS ARC
cache and lead to data read corruption.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327
2026-01-27 20:38:31 +01:00
Nikolay
e6ac25d79d lib/storage: properly aggregate per IndexedDB cache stats
Commit f62893c151 added an attempt to fix
stats for `tagFiltersCache`, `metricIDCache`, and `dateMetricIDCache`.
Instead of aggregated stats, it returned the largest cache stats by
cache size.

This resulted in possible counter decreases for counter metric types. It
made aggregated metrics less usable.

This commit changes cache stats aggregation by metric type:
* size-related gauge metrics are returned based on max cache size usage
* metric counters are reported as a sum of all counters

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10275
2026-01-27 20:38:31 +01:00
Artem Fetishev
3e3d9959bc lib/storage: make pt and legacy idbs visible in golang profiles
Rewrite the searchAndMerge so that golang profiles could show exactly
how much resources is consumed by each idb type.
2026-01-27 20:38:30 +01:00
Aliaksandr Valialkin
2606aa6e2f lib/{mergeset,storage}: add a comment explaining why the strange construct with anonymous function is needed
This is a follow-up for the commit 2a0e382a99

Updates https://github.com/VictoriaMetrics/VictoriaLogs/issues/1020
2026-01-27 19:46:20 +01:00
Jiekun
1ac14e69dc chore: add build version information to the home page for consistency with other projects
The build version added to:
- victoria-metrics
- vmagent
- vmalert

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10249

Co-authored-by: Hui Wang <haley@victoriametrics.com>
Signed-off-by: Zhu Jiekun <jiekun@victoriametrics.com>
2026-01-27 18:37:24 +02:00
Jayice
286ea13aa5 introduce new alert for vmagent persistenqueue capacity 2026-01-27 18:18:49 +02:00
Aliaksandr Valialkin
6ddbd2242a docs/victoriametrics/README.md: remove obsolete docs about staleness markers during deduplication after the commit 7bd5d19f62
Staleness markers are ignored on the deduplication interval if there are other numeric samples exist on that interval.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5587
2026-01-27 16:09:05 +01:00
Aliaksandr Valialkin
f2395bb807 lib/storage/dedup.go: remove obsolete comment from DeduplicateSamples - it doesnt keep stale NaNs on purpose after the commit 7bd5d19f62
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5587
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
2026-01-27 16:09:04 +01:00
Max Kotliar
2959322d36 docs/changelog: fix ordering after merging pr.
related pr https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10320
2026-01-27 16:38:54 +02:00
Jayice
cf93c70fba lib/promscrape: apply promscrape.maxScrapeSize to decompressed data
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9481
2026-01-27 16:38:54 +02:00
Artur Minchukou
bd8c5e9d1f app/vmui: fix build of vmui by handling playground env variable correctly (#10354)
### Describe Your Changes

Fixed build of vmui by handling playground env variable correctly.

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-01-27 16:25:00 +02:00
Yury Moladau
dd765cf3ce vmui: fix "Percentage from total" for multiple metrics in Cardinality Explorer (#10323)
### Describe Your Changes

In the Cardinality Explorer, when filtering, a "Percentage from total"
stat appears. This stat is documented as "the share of these series in
the total number of time series".

This works for pages for individual metrics. However, if using a filter
that returns *multiple* metrics, the value of "Percentage from total"
will only account for the size of the *first* metric. One can have a
filter that returns, say, 10k time series (out of, say, 100k in the VM
cluster), and if the first metric returned has 1k time series, then
"Percentage from total" will show 1%, not 10%.

This PR fixes that calculation.

Credits to @PleasingFungus for the original fix (PR #10288).

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).

Signed-off-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
Co-authored-by: PleasingFungus <PleasingFungus@users.noreply.github.com>
2026-01-27 15:38:30 +02:00
Artur Minchukou
22adfdc680 app/vmui: move node from ci to docker and update build steps (#10299)
### Describe Your Changes

Moved node from CI to make command and update build steps.

### Checklist

The following checks are **mandatory**:

- [x] My change adheres to [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [x] My change adheres to [VictoriaMetrics development
goals](https://docs.victoriametrics.com/victoriametrics/goals/).
2026-01-27 15:24:04 +02:00
Nikolay
2dc2229123 follow-up after e35a9a366c
Commit e35a9a366c changed the order of wg.Add calls in the Graphite transform package. Previously, all wg.Add calls were made upfront, but after that change it became possible for wg.Wait to exit earlier than expected.

This commit fixes the issue by spawning all background goroutines first and starting the goroutine that calls wg.Wait afterward.
2026-01-27 13:50:43 +01:00
Aliaksandr Valialkin
c8c3cf472a all: consistently use sync.WaitGroup.Go() instead of sync.WaitGroup.Add(1) + sync.WaitGroup.Done()
This improves code readability a bit.
2026-01-27 00:46:28 +01:00
JAYICE
ecf9eb454c app/vmagent: support configuring different -remoteWrite-queues per url
Previously vmagent had remoteWrite.queues as a global setting that was be applied to every persistentqueue. However, it could be useful to specify remotewrite.queues per remotewrite.url.

Considering each rw might have different workload(latency, throughput, and availability), so it will be more flexible for tuning if we can set remoteWrite.queues separately for specific rw.

This commit, makes `-remoteWrite-queues` configurable per remoteWrite.url. 

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10270
2026-01-26 20:14:08 +01:00
Max Kotliar
1d2c317320 docs/changelog: chore changelog
- rename `these docs` link to a more explisit link
- Add thank you for contribution.
2026-01-26 18:45:24 +02:00
Max Kotliar
79c17e30c9 docs: run make docs-update-flags 2026-01-26 18:45:24 +02:00
Zhu Jiekun
39477438cf lib/promscrape: ceiling the last scrape size
ceiling the last scrape size as an integer in bytes or kilobytes to
avoid misleading dots.

fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10307
2026-01-26 12:47:21 +01:00
Max Kotliar
bdba3c81e9 app/vmauth: fix backend healthcheck for url prefixes defined inside url_map
Previously health checks for url prefixes defined inside `url_map` were
not properly stopped. See STR in
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10334#issuecomment-3791401822

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10334
2026-01-26 11:50:39 +01:00
Hangjie Mo
610b328e5a lib/storage: properly search searchTenantsOnDate
Initial implementation of searchTenantsOnDate used a index scan for the given prefix (index prefix + tenant + date).
It did not check whether the date prefix was actually outside the current date.

This commit adds the missing date check and makes the tenant search results accurate.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10295
2026-01-26 11:34:19 +01:00
Aliaksandr Valialkin
8c884f8117 app/vmauth: allow buffering request body before proxying it to the backend
This should help reducing load on backends when many concurrent clients
send requests over slow networks (for example, when many IoT devices send metrics
to vmauth over slow connections).

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10309

This commit is based on top of https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10310
Thanks to @makasim for the initial idea.
2026-01-26 03:14:23 +01:00
Aliaksandr Valialkin
f4a9633eb2 app/vmauth: properly increment vmauth_user_concurrent_requests_limit_reached_total and vmauth_unauthorized_user_concurrent_requests_limit_reached_total metrics when the request is rejected because of the concurrency limit
These metrics must be incremented when the request couldn't be processed because of the configured per-user concurrency limit.
The commit 76176ac1d3 moved the counter increase to the place when the current request
is put in the wait queue because of the concurrency limit is reached. This is incorrect, since such requests
can still be successfully processed during -maxQueueDuration . This also contradicts the docs at https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting

There is a small practical sense in counting the number of times the concurrency limit is reached,
while the request is successfully processed during the -maxQueueDuration after that.

Add missing alerting rule for rejected unauthorized requests because of the concurrency limit.

Add missing grouping by instance for per-user counter of rejected queries because of the concurrency limit.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
2026-01-25 21:45:00 +01:00
Aliaksandr Valialkin
7cfc3b2527 app/vmauth: put comments into the correct places after the commit 5f67f04f6b 2026-01-25 21:45:00 +01:00
Aliaksandr Valialkin
695937816c vendor: update github.com/VictoriaMetrics/VictoriaLogs from v1.36.2-0.20251008164716-21c0fb3de84d to v0.0.0-20260125191521-bc89d84cd61d 2026-01-25 20:25:17 +01:00
Aliaksandr Valialkin
da1e7af054 LICENSE: update the end copyright year from 2025 to 2026 2026-01-25 20:14:35 +01:00
Aliaksandr Valialkin
4f1270c7f2 lib/logger: count both printed and suppressed logs at vm_log_messages_total metric
This simplifies troubleshooting by investigating the vm_log_messages_total metric
when logs are unavailable. The logs may be unavailable when the -loggerLevel command-line
flag is set to value other than INFO. The logs may be unavailable when clients
use Monitoring of Monitoring service ( https://victoriametrics.com/products/mom/ ),
which provides metrics, but doesn't provide logs from VictoriaMetrics components
running at the client side.

Add `is_printed` label to the `vm_log_messages_total` metric in order to detect whether
the given log has been suppressed or printed.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10304

While at it, make more readable the description for the TooManyLogs alert,
which is based on the vm_log_messages_total metric.
Also return back the `level!="info"` instead of `level="error"` filter
in the query for this alerting rule, in order to be consistent with queries
at the official dashboards for VictoriaMetrics components.
TODO: investigate too high warnings rate at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2760
and fix it at the source of these warnings instead of modifying the query
for the TooManyLogs alert.
2026-01-25 17:50:51 +01:00
2760 changed files with 219322 additions and 118224 deletions

0
.codex Normal file
View File

View File

@@ -1,23 +0,0 @@
# Project Overview
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
## Folder Structure
- `/app`: Contains the compilable binaries.
- `/lib`: Contains the golang reusable libraries
- `/docs/victoriametrics`: Contains documentation for the project.
- `/apptest/tests`: Contains integration tests.
## Libraries and Frameworks
- Backend: Golang, no framework. Use third-party libraries sparingly.
- Frontend: React.
## Code review guidelines
Ensure the feature or bugfix includes a changelog entry in /docs/victoriametrics/changelog/CHANGELOG.md.
Verify the entry is under the ## tip section and matches the structure and style of existing entries.
Chore-only changes may be omitted from the changelog.

View File

@@ -4,6 +4,8 @@ updates:
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "gomod"
directory: "/"
schedule:
@@ -23,6 +25,8 @@ updates:
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "npm"
directory: "/app/vmui/packages/vmui"
schedule:

View File

@@ -1,10 +1,3 @@
### Describe Your Changes
**PLEASE REMOVE LINE BELOW BEFORE SUBMITTING**
Please provide a brief description of the changes you made. Be as specific as possible to help others understand the purpose and impact of your modifications.
### Checklist
The following checks are **mandatory**:
- [ ] My change adheres to [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development goals](https://docs.victoriametrics.com/victoriametrics/goals/).
Before creating the PR, make sure you have read and followed the [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).

View File

@@ -53,19 +53,24 @@ jobs:
arch: amd64
- os: openbsd
arch: amd64
- os: netbsd
arch: amd64
- os: windows
arch: amd64
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version: stable
go-version-file: 'go.mod'
- run: go version
- name: Build vmcluster for ${{ matrix.os }}-${{ matrix.arch }}
run: make vmcluster-${{ matrix.os }}-${{ matrix.arch }}

View File

@@ -9,7 +9,7 @@ jobs:
tip-lint:
runs-on: 'ubuntu-latest'
steps:
- uses: 'actions/checkout@v6'
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# needed for proper diff
fetch-depth: 0

View File

@@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0 # we need full history for commit verification
@@ -27,11 +27,21 @@ jobs:
exit 0
fi
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
# Check raw commit objects for a "gpgsig" header as a fast early signal for
# contributors. Both GPG and SSH signatures use this header.
# This avoids relying on %G? which returns N for SSH commits.
# This check is not a security enforcement — unsigned commits cannot be merged
# anyway due to the GitHub repository merge policy.
unsigned=""
for sha in $(git rev-list $RANGE); do
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
unsigned="$unsigned $sha"
fi
done
if [ -n "$unsigned" ]; then
echo "Found unsigned commits:"
echo "$unsigned"
exit 1
fi
echo "All commits in PR are signed (G or E)"
echo "All commits in PR are signed (GPG or SSH)"

View File

@@ -15,24 +15,26 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@master
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version: stable
go-version-file: 'go.mod'
cache: false
- run: go version
- name: Cache Go artifacts
uses: actions/cache@v4
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: |
~/.cache/go-build
~/go/pkg/mod
~/go/bin
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
- name: Check License
run: make check-licenses

View File

@@ -29,34 +29,35 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache: false
go-version: stable
go-version-file: 'go.mod'
- run: go version
- name: Cache Go artifacts
uses: actions/cache@v4
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: |
~/.cache/go-build
~/go/bin
~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
with:
languages: go
- name: Autobuild
uses: github/codeql-action/autobuild@v4
uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
with:
category: 'language:go'

View File

@@ -16,19 +16,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
path: __vm
- name: Checkout private code
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }}
path: __vm-docs
- name: Import GPG key
uses: crazy-max/ghaction-import-gpg@v6
uses: crazy-max/ghaction-import-gpg@2dc316deee8e90f13e1a351ab510b4d5bc0c82cd # v7.0.0
id: import-gpg
with:
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}

View File

@@ -32,26 +32,27 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version: stable
go-version-file: 'go.mod'
- run: go version
- name: Cache golangci-lint
uses: actions/cache@v4
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: |
~/.cache/golangci-lint
~/go/bin
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
- name: Run check-all
run: |
@@ -65,49 +66,46 @@ jobs:
strategy:
matrix:
scenario:
- 'test-full'
- 'test-full-386'
- 'test'
- 'test-386'
- 'test-pure'
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version: stable
go-version-file: 'go.mod'
- run: go version
- name: Run tests
run: GOGC=10 make ${{ matrix.scenario}}
run: make ${{ matrix.scenario}}
- name: Publish coverage
uses: codecov/codecov-action@v5
with:
files: ./coverage.txt
integration:
name: integration
runs-on: ubuntu-latest
apptest:
name: apptest
runs-on: apptest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version: stable
go-version-file: 'go.mod'
- run: go version
- name: Run integration tests
run: make integration-test
- name: Run app tests
run: make apptest

View File

@@ -32,38 +32,44 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Node
uses: actions/setup-node@v6
- name: Cache node_modules
id: cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
node-version: '24.x'
path: app/vmui/packages/vmui/node_modules
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
restore-keys: |
vmui-deps-${{ runner.os }}-
- name: Cache node-modules
uses: actions/cache@v4
with:
path: |
app/vmui/packages/vmui/node_modules
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
restore-keys: vmui-artifacts-${{ runner.os }}-
- name: Install dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: make vmui-install
- name: Run lint
id: lint
run: make vmui-lint
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run tests
id: test
run: make vmui-test
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run typecheck
id: typecheck
run: make vmui-typecheck
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Annotate Code Linting Results
uses: ataylorme/eslint-annotate-action@v3
uses: ataylorme/eslint-annotate-action@d57a1193d4c59cbfbf3f86c271f42612f9dbd9e9 # 3.0.0
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
report-json: app/vmui/packages/vmui/vmui-lint-report.json

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS
Copyright 2019-2025 VictoriaMetrics, Inc.
Copyright 2019-2026 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
TAR_OWNERSHIP ?= --owner=1000 --group=1000
GOLANGCI_LINT_VERSION := 2.7.2
GOLANGCI_LINT_VERSION := 2.9.0
.PHONY: $(MAKECMDGOALS)
@@ -251,7 +251,7 @@ fmt:
gofmt -l -w -s ./apptest
vet:
GOEXPERIMENT=synctest go vet ./lib/...
go vet -tags 'synctest' ./lib/...
go vet ./app/...
go vet ./apptest/...
@@ -260,28 +260,28 @@ check-all: fmt vet golangci-lint govulncheck
clean-checkers: remove-golangci-lint remove-govulncheck
test:
GOEXPERIMENT=synctest go test ./lib/... ./app/...
go test -tags 'synctest' ./lib/... ./app/...
test-race:
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
go test -tags 'synctest' -race ./lib/... ./app/...
test-386:
GOARCH=386 go test -tags 'synctest' ./lib/... ./app/...
test-pure:
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
test-full:
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386:
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test:
$(MAKE) apptest
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
apptest:
$(MAKE) all vmctl vmbackup vmrestore
$(MAKE) vminsert-race vmselect-race vmstorage-race vmagent-race vmctl-race vmbackup-race vmrestore-race
go test ./apptest/... -skip="^Test(Single|Legacy).*"
integration-test-legacy: all vmbackup vmrestore
apptest-legacy: vminsert-race vmselect-race vmstorage-race vmbackup-race vmrestore-race
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
VERSION=v1.132.0; \
@@ -298,17 +298,17 @@ integration-test-legacy: all vmbackup vmrestore
go test ./apptest/tests -run="^TestLegacyCluster.*"
benchmark:
GOEXPERIMENT=synctest go test -bench=. ./lib/...
go test -bench=. ./app/...
go test -run=NO_TESTS -bench=. ./lib/...
go test -run=NO_TESTS -bench=. ./app/...
benchmark-pure:
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
CGO_ENABLED=0 go test -bench=. ./app/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
vendor-update:
go get -u ./lib/...
go get -u ./app/...
go mod tidy -compat=1.24
go mod tidy -compat=1.26
go mod vendor
app-local:
@@ -332,7 +332,7 @@ install-qtc:
golangci-lint: install-golangci-lint
GOEXPERIMENT=synctest golangci-lint run
golangci-lint run --build-tags 'synctest'
install-golangci-lint:
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)

View File

@@ -1,12 +1,11 @@
# VictoriaMetrics
[![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)
[![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)](https://hub.docker.com/u/victoriametrics)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg?link=https%3A%2F%2Fcodecov.io%2Fgh%2FVictoriaMetrics%2FVictoriaMetrics)](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
[![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
![Slack](https://img.shields.io/badge/Join-4A154B?logo=slack&link=https%3A%2F%2Fslack.victoriametrics.com)
[![Join Slack](https://img.shields.io/badge/Join%20Slack-4A154B?logo=slack)](https://slack.victoriametrics.com)
[![X](https://img.shields.io/twitter/follow/VictoriaMetrics?style=flat&label=Follow&color=black&logo=x&labelColor=black&link=https%3A%2F%2Fx.com%2FVictoriaMetrics)](https://x.com/VictoriaMetrics/)
[![Reddit](https://img.shields.io/reddit/subreddit-subscribers/VictoriaMetrics?style=flat&label=Join&labelColor=red&logoColor=white&logo=reddit&link=https%3A%2F%2Fwww.reddit.com%2Fr%2FVictoriaMetrics)](https://www.reddit.com/r/VictoriaMetrics/)
@@ -16,16 +15,21 @@
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
</picture>
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
Here are some resources and information about VictoriaMetrics:
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.

View File

@@ -1,17 +1,4 @@
# Security Policy
## Supported Versions
You can find out about our security policy and VictoriaMetrics version support on the [security page](https://docs.victoriametrics.com/victoriametrics/#security) in the documentation.
The following versions of VictoriaMetrics receive regular security fixes:
| Version | Supported |
|--------------------------------------------------------------------------------|--------------------|
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
See [this page](https://victoriametrics.com/security/) for more details.
## Reporting a Vulnerability
Please report any security issues to <security@victoriametrics.com>

View File

@@ -49,6 +49,11 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
Name: "__name__",
Value: m.Name,
})
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
labels = append(labels, prompb.Label{
Name: "host",
Value: sketch.Host,
})
for _, label := range m.Labels {
labels = append(labels, prompb.Label{
Name: label.Name,
@@ -57,9 +62,6 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
}
for _, tag := range sketch.Tags {
name, value := datadogutil.SplitTag(tag)
if name == "host" {
name = "exported_host"
}
labels = append(labels, prompb.Label{
Name: name,
Value: value,

View File

@@ -83,6 +83,9 @@ var (
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 0, "The maximum number of labels per time series to be accepted. Series with superfluous labels are ignored. In this case the vm_rows_ignored_total{reason=\"too_many_labels\"} metric at /metrics page is incremented")
maxLabelNameLen = flag.Int("maxLabelNameLen", 0, "The maximum length of label names in the accepted time series. Series with longer label name are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_name\"} metric at /metrics page is incremented")
maxLabelValueLen = flag.Int("maxLabelValueLen", 0, "The maximum length of label values in the accepted time series. Series with longer label value are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_value\"} metric at /metrics page is incremented")
enableMultitenancyViaHeaders = flag.Bool("enableMultitenancyViaHeaders", false, "Enables multitenancy via HTTP headers. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy")
)
var (
@@ -216,7 +219,7 @@ func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
}
return func(req *http.Request) error {
path := strings.ReplaceAll(req.URL.Path, "//", "/")
at, err := getAuthTokenFromPath(path)
at, err := getAuthTokenFromPath(path, req.Header)
if err != nil {
return fmt.Errorf("cannot obtain auth token from path %q: %w", path, err)
}
@@ -224,8 +227,15 @@ func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
}
}
func getAuthTokenFromPath(path string) (*auth.Token, error) {
p, err := httpserver.ParsePath(path)
func parsePath(path string, header http.Header) (*httpserver.Path, error) {
if *enableMultitenancyViaHeaders {
return httpserver.ParsePathAndHeaders(path, header)
}
return httpserver.ParsePath(path)
}
func getAuthTokenFromPath(path string, header http.Header) (*auth.Token, error) {
p, err := parsePath(path, header)
if err != nil {
return nil, fmt.Errorf("cannot parse multitenant path: %w", err)
}
@@ -245,6 +255,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>vmagent</h2>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
@@ -558,14 +569,15 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path string) bool {
p, err := httpserver.ParsePath(path)
p, err := parsePath(path, r.Header)
if err != nil {
// Cannot parse multitenant path. Skip it - probably it will be parsed later.
return false
}
if p.Prefix != "insert" {
httpserver.Errorf(w, r, `unsupported multitenant prefix: %q; expected "insert"`, p.Prefix)
return true
// processMultitenantRequest is called for all unmatched path variants,
// but we should try parsing only /insert prefixed to avoid catching all possible paths.
return false
}
at, err := auth.NewTokenPossibleMultitenant(p.AuthToken)
if err != nil {

View File

@@ -77,16 +77,6 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
var metadataTotal int
if prommetadata.IsEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID
projectID = at.ProjectID
for i := range mms {
mm := &mms[i]
mm.AccountID = accountID
mm.ProjectID = projectID
}
}
ctx.WriteRequest.Metadata = mms
metadataTotal = len(mms)
}

View File

@@ -75,11 +75,6 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
Samples: samples[len(samples)-1:],
})
}
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID
projectID = at.ProjectID
}
for i := range mms {
mm := &mms[i]
mmsDst = append(mmsDst, prompb.MetricMetadata{
@@ -88,8 +83,6 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
Type: mm.Type,
// there is no unit in Prometheus exposition formats
AccountID: accountID,
ProjectID: projectID,
})
}
ctx.WriteRequest.Timeseries = tssDst

View File

@@ -72,11 +72,6 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
var metadataTotal int
if prommetadata.IsEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID
projectID = at.ProjectID
}
for i := range mms {
mm := &mms[i]
mmsDst = append(mmsDst, prompb.MetricMetadata{
@@ -85,8 +80,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
Type: mm.Type,
Unit: mm.Unit,
AccountID: accountID,
ProjectID: projectID,
AccountID: mm.AccountID,
ProjectID: mm.ProjectID,
})
}
ctx.WriteRequest.Metadata = mmsDst

View File

@@ -2,6 +2,7 @@ package remotewrite
import (
"bytes"
"context"
"errors"
"fmt"
"io"
@@ -13,6 +14,9 @@ import (
"sync/atomic"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
@@ -21,10 +25,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
)
var (
@@ -59,6 +60,8 @@ var (
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
basicAuthUsernameFile = flagutil.NewArrayString("remoteWrite.basicAuth.usernameFile", "Optional path to basic auth username to use for the corresponding -remoteWrite.url. "+
"The file is re-read every second")
basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
"The file is re-read every second")
@@ -202,14 +205,10 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(*queues)
return float64(concurrency)
})
for i := 0; i < concurrency; i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
for range concurrency {
c.wg.Go(c.runWorker)
}
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
}
@@ -227,12 +226,14 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
hdrs = strings.Split(headersValue, "^^")
}
username := basicAuthUsername.GetOptionalArg(argIdx)
usernameFile := basicAuthUsernameFile.GetOptionalArg(argIdx)
password := basicAuthPassword.GetOptionalArg(argIdx)
passwordFile := basicAuthPasswordFile.GetOptionalArg(argIdx)
var basicAuthCfg *promauth.BasicAuthConfig
if username != "" || password != "" || passwordFile != "" {
if username != "" || usernameFile != "" || password != "" || passwordFile != "" {
basicAuthCfg = &promauth.BasicAuthConfig{
Username: username,
UsernameFile: usernameFile,
Password: promauth.NewSecret(password),
PasswordFile: passwordFile,
}
@@ -294,7 +295,7 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
accessKey := awsAccessKey.GetOptionalArg(argIdx)
secretKey := awsSecretKey.GetOptionalArg(argIdx)
service := awsService.GetOptionalArg(argIdx)
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service)
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service, "")
if err != nil {
return nil, err
}
@@ -330,15 +331,20 @@ func (c *client) runWorker() {
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
return
case <-c.stopCh:
// c must be stopped. Wait for a while in the hope the block will be sent.
graceDuration := 5 * time.Second
// c must be stopped. Wait up to 5 seconds for the in-flight request to complete.
// If it succeeds, drain the remaining in-memory queue before returning.
stopCtx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
select {
case ok := <-ch:
if !ok {
// Return unsent block to the queue.
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
} else {
c.drainInMemoryQueue(stopCtx, block[:0])
}
case <-time.After(graceDuration):
case <-stopCtx.Done():
// Return unsent block to the queue.
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
}
@@ -409,8 +415,7 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
// Otherwise, it tries sending the block to remote storage indefinitely.
func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.Register(len(block))
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
bt := timeutil.NewBackoffTimer(c.retryMinInterval, c.retryMaxInterval)
retriesCount := 0
again:
@@ -419,19 +424,10 @@ again:
c.requestDuration.UpdateDuration(startTime)
if err != nil {
c.errorsCount.Inc()
retryDuration *= 2
if retryDuration > maxRetryDuration {
retryDuration = maxRetryDuration
}
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
len(block), c.sanitizedURL, err, retryDuration.Seconds())
t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %s",
len(block), c.sanitizedURL, err, bt.CurrentDelay())
if !bt.Wait(c.stopCh) {
return false
case <-t.C:
timerpool.Put(t)
}
c.retriesCount.Inc()
goto again
@@ -480,7 +476,7 @@ again:
goto again
}
logger.Warnf("failed to repack zstd block (%s bytes) to snappy: %s; The block will be rejected. "+
logger.Warnf("failed to repack zstd block (%d bytes) to snappy: %s; The block will be rejected. "+
"Possible cause: ungraceful shutdown leading to persisted queue corruption.",
zstdBlockLen, err)
}
@@ -497,7 +493,10 @@ again:
// Unexpected status code returned
retriesCount++
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
// retryAfterDuration has the highest priority duration
if retryAfterHeader > 0 {
bt.SetDelay(retryAfterHeader)
}
// Handle response
body, err := io.ReadAll(resp.Body)
@@ -506,44 +505,48 @@ again:
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
} else {
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
"re-sending the block in %s", len(block), c.sanitizedURL, retriesCount, statusCode, body, bt.CurrentDelay())
}
t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
if !bt.Wait(c.stopCh) {
return false
case <-t.C:
timerpool.Put(t)
}
c.retriesCount.Inc()
goto again
}
func (c *client) drainInMemoryQueue(stopCtx context.Context, block []byte) {
var ok bool
for {
select {
case <-stopCtx.Done():
return
default:
}
block, ok = c.fq.MustReadInMemoryBlock(block[:0])
if !ok {
// The in memory queue has already been drained,
// or persisted queue is being used.
// In this case it is guaranteed that fq will be empty
return
}
if len(block) == 0 {
// skip empty data blocks from sending
continue
}
// at this stage c.stopCh should be closed
// so sendBlock function should not perform retries
if ok := c.sendBlock(block); !ok {
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
return
}
}
}
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
// getRetryDuration returns retry duration.
// retryAfterDuration has the highest priority.
// If retryAfterDuration is not specified, retryDuration gets doubled.
// retryDuration can't exceed maxRetryDuration.
//
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
// retryAfterDuration has the highest priority duration
if retryAfterDuration > 0 {
return timeutil.AddJitterToDuration(retryAfterDuration)
}
// default backoff retry policy
retryDuration *= 2
if retryDuration > maxRetryDuration {
retryDuration = maxRetryDuration
}
return retryDuration
}
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
//
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
@@ -574,24 +577,20 @@ func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
}
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
// retryAfterString should be in either HTTP-date or a number of seconds.
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
if retryAfterString == "" {
return retryAfterDuration
//
// s should be in either HTTP-date or a number of seconds.
// It returns time.Duration(0) if s does not follow RFC 7231.
func parseRetryAfterHeader(s string) time.Duration {
if s == "" {
return 0
}
defer func() {
v := retryAfterDuration.Seconds()
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
}()
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
if parsedTime, err := time.Parse(http.TimeFormat, s); err == nil {
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
}
// Retry-After could be in seconds.
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
if seconds, err := strconv.Atoi(s); err == nil {
return time.Duration(seconds) * time.Second
}

View File

@@ -6,66 +6,12 @@ import (
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
)
func TestCalculateRetryDuration(t *testing.T) {
// `testFunc` call `calculateRetryDuration` for `n` times
// and evaluate if the result of `calculateRetryDuration` is
// 1. >= expectMinDuration
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
t.Helper()
for i := 0; i < n; i++ {
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
}
expectMaxDuration := helper(expectMinDuration)
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
if retryDuration < expectMinDuration || retryDuration > expectMaxDuration {
t.Fatalf(
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
retryDuration.Milliseconds(),
)
}
}
// Call calculateRetryDuration for 1 time.
{
// default backoff policy
f(0, time.Second, 1, 2*time.Second)
// default backoff policy exceed max limit"
f(0, 10*time.Minute, 1, time.Minute)
// retry after > default backoff policy
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
// retry after < default backoff policy
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
// retry after invalid and < default backoff policy
f(0, time.Second, 1, 2*time.Second)
}
// Call calculateRetryDuration for multiple times.
{
// default backoff policy 2 times
f(0, time.Second, 2, 4*time.Second)
// default backoff policy 3 times
f(0, time.Second, 3, 8*time.Second)
// default backoff policy N times exceed max limit
f(0, time.Second, 10, time.Minute)
// retry after 120s 1 times
f(120*time.Second, time.Second, 1, 120*time.Second)
// retry after 120s 2 times
f(120*time.Second, time.Second, 2, 120*time.Second)
}
}
func TestParseRetryAfterHeader(t *testing.T) {
f := func(retryAfterString string, expectResult time.Duration) {
t.Helper()
@@ -91,11 +37,38 @@ func TestParseRetryAfterHeader(t *testing.T) {
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
}
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
func helper(d time.Duration) time.Duration {
dv := min(d/10, 10*time.Second)
func TestInitSecretFlags(t *testing.T) {
showRemoteWriteURLOrig := *showRemoteWriteURL
defer func() {
*showRemoteWriteURL = showRemoteWriteURLOrig
flagutil.UnregisterAllSecretFlags()
}()
return d + dv
flagutil.UnregisterAllSecretFlags()
*showRemoteWriteURL = false
InitSecretFlags()
if !flagutil.IsSecretFlag("remotewrite.url") {
t.Fatalf("expecting remoteWrite.url to be secret")
}
if !flagutil.IsSecretFlag("remotewrite.headers") {
t.Fatalf("expecting remoteWrite.headers to be secret")
}
if !flagutil.IsSecretFlag("remotewrite.proxyurl") {
t.Fatalf("expecting remoteWrite.proxyURL to be secret")
}
flagutil.UnregisterAllSecretFlags()
*showRemoteWriteURL = true
InitSecretFlags()
if flagutil.IsSecretFlag("remotewrite.url") {
t.Fatalf("remoteWrite.url must remain visible when -remoteWrite.showURL is set")
}
if !flagutil.IsSecretFlag("remotewrite.headers") {
t.Fatalf("expecting remoteWrite.headers to remain secret")
}
if !flagutil.IsSecretFlag("remotewrite.proxyurl") {
t.Fatalf("expecting remoteWrite.proxyURL to remain secret")
}
}
func TestRepackBlockFromZstdToSnappy(t *testing.T) {

View File

@@ -48,11 +48,7 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
ps.wr.significantFigures = significantFigures
ps.wr.roundDigits = roundDigits
ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
ps.periodicFlusherWG.Go(ps.periodicFlusher)
return &ps
}
@@ -215,6 +211,9 @@ func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
dst.Type = src.Type
dst.Unit = src.Unit
dst.AccountID = src.AccountID
dst.ProjectID = src.ProjectID
// Pre-allocate memory for all string fields.
neededBufLen := len(src.MetricFamilyName) + len(src.Help)
bufLen := len(wr.metadatabuf)

View File

@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
var wr prompb.WriteRequest
for i := 0; i < seriesCount; i++ {
for i := range seriesCount {
var labels []prompb.Label
for j := 0; j < labelsCount; j++ {
for j := range labelsCount {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d_%d", i, j),
Value: fmt.Sprintf("value_%d_%d", i, j),

View File

@@ -20,8 +20,7 @@ import (
)
var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+
@@ -39,7 +38,7 @@ var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter
@@ -91,8 +90,8 @@ func WriteURLRelabelConfigData(w io.Writer) {
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig interface{} `yaml:"relabel_config"`
Url string `yaml:"url"`
RelabelConfig any `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
@@ -145,7 +144,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
}
var urlRelabelCfgs []interface{}
var urlRelabelCfgs []any
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
for i, path := range *relabelConfigPaths {
if len(path) == 0 {
@@ -158,7 +157,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
}
rcs.perURL[i] = prc
var parsedCfg interface{}
var parsedCfg any
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
}

View File

@@ -3,6 +3,7 @@ package remotewrite
import (
"flag"
"fmt"
"math"
"net/http"
"net/url"
"path/filepath"
@@ -11,6 +12,10 @@ import (
"sync/atomic"
"time"
"github.com/cespare/xxhash/v2"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -23,6 +28,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
@@ -30,8 +36,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
"github.com/VictoriaMetrics/metrics"
"github.com/cespare/xxhash/v2"
)
var (
@@ -59,7 +63,7 @@ var (
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
"isn't enough for sending high volume of collected data to remote storage. "+
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
@@ -80,10 +84,14 @@ var (
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
`Enabled sorting for labels can slow down ingestion performance a bit`)
maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxHourlySeries = flag.Int64("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
"Excess series are logged and dropped. This can be useful for limiting series cardinality. "+
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxDailySeries = flag.Int64("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"Excess series are logged and dropped. This can be useful for limiting series churn rate. "+
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+
"By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit")
@@ -92,6 +100,8 @@ var (
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload")
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
"By default, metadata sending is controlled by the global -enableMetadata flag")
)
var (
@@ -141,6 +151,10 @@ func InitSecretFlags() {
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
flagutil.RegisterSecretFlag("remoteWrite.url")
}
// remoteWrite.proxyURL can contain authentication codes.
flagutil.RegisterSecretFlag("remoteWrite.proxyURL")
// remoteWrite.headers can contain auth headers such as Authorization and API keys.
flagutil.RegisterSecretFlag("remoteWrite.headers")
}
var (
@@ -157,8 +171,20 @@ func Init() {
if len(*remoteWriteURLs) == 0 {
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
}
if *maxHourlySeries > 0 {
hourlySeriesLimiter = bloomfilter.NewLimiter(*maxHourlySeries, time.Hour)
if *shardByURL && len(*disableOnDiskQueue) > 1 {
disableOnDiskQueues := *disableOnDiskQueue
firstValue := disableOnDiskQueues[0]
for _, v := range disableOnDiskQueues[1:] {
if firstValue != v {
logger.Fatalf("all -remoteWrite.url targets must have the same -remoteWrite.disableOnDiskQueue setting when -remoteWrite.shardByURL is enabled; " +
"either enable or disable -remoteWrite.disableOnDiskQueue for all targets")
}
}
}
if limit := getMaxHourlySeries(); limit > 0 {
hourlySeriesLimiter = bloomfilter.NewLimiter(limit, time.Hour)
_ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 {
return float64(hourlySeriesLimiter.MaxItems())
})
@@ -166,8 +192,8 @@ func Init() {
return float64(hourlySeriesLimiter.CurrentItems())
})
}
if *maxDailySeries > 0 {
dailySeriesLimiter = bloomfilter.NewLimiter(*maxDailySeries, 24*time.Hour)
if limit := getMaxDailySeries(); limit > 0 {
dailySeriesLimiter = bloomfilter.NewLimiter(limit, 24*time.Hour)
_ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 {
return float64(dailySeriesLimiter.MaxItems())
})
@@ -176,13 +202,6 @@ func Init() {
})
}
if *queues > maxQueues {
*queues = maxQueues
}
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
@@ -215,9 +234,7 @@ func Init() {
dropDanglingQueues()
// Start config reloader.
configReloaderWG.Add(1)
go func() {
defer configReloaderWG.Done()
configReloaderWG.Go(func() {
for {
select {
case <-configReloaderStopCh:
@@ -227,7 +244,7 @@ func Init() {
reloadRelabelConfigs()
reloadStreamAggrConfigs()
}
}()
})
}
func dropDanglingQueues() {
@@ -267,17 +284,6 @@ func initRemoteWriteCtxs(urls []string) {
if len(urls) == 0 {
logger.Panicf("BUG: urls must be non-empty")
}
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
if maxInmemoryBlocks / *queues > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * *queues
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
rwctxs := make([]*remoteWriteCtx, len(urls))
rwctxIdx := make([]int, len(urls))
if retryMaxTime.String() != "" {
@@ -292,9 +298,10 @@ func initRemoteWriteCtxs(urls []string) {
if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
}
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
rwctxIdx[i] = i
}
fs.RegisterPathFsMetrics(*tmpDataPath)
if *shardByURL {
consistentHashNodes := make([]string, 0, len(urls))
@@ -408,7 +415,7 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
// Push metadata separately from time series, since it doesn't need sharding,
// relabeling, stream aggregation, deduplication, etc.
if !tryPushMetadataToRemoteStorages(rwctxs, mms, forceDropSamplesOnFailure) {
if !tryPushMetadataToRemoteStorages(at, rwctxs, mms, forceDropSamplesOnFailure) {
return false
}
@@ -508,7 +515,9 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
//
// calculateHealthyRwctxIdx will rely on the order of rwctx to be in ascending order.
func getEligibleRemoteWriteCtxs(tss []prompb.TimeSeries, forceDropSamplesOnFailure bool) ([]*remoteWriteCtx, bool) {
if !disableOnDiskQueueAny {
// When -remoteWrite.shardByURL=true always use all configured remote writes to preserve stable metrics distribution across shards.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10507
if !disableOnDiskQueueAny || *shardByURL {
return rwctxsGlobal, true
}
@@ -523,12 +532,6 @@ func getEligibleRemoteWriteCtxs(tss []prompb.TimeSeries, forceDropSamplesOnFailu
return nil, false
}
rowsCount := getRowsCount(tss)
if *shardByURL {
// Todo: When shardByURL is enabled, the following metrics won't be 100% accurate. Because vmagent don't know
// which rwctx should data be pushed to yet. Let's consider the hashing algorithm fair and will distribute
// data to all rwctxs evenly.
rowsCount = rowsCount / len(rwctxsGlobal)
}
rwctx.rowsDroppedOnPushFailure.Add(rowsCount)
}
}
@@ -546,11 +549,18 @@ func pushTimeSeriesToRemoteStoragesTrackDropped(tss []prompb.TimeSeries) {
}
}
func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool {
func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool {
if len(mms) == 0 {
// Nothing to push
return true
}
if at != nil {
for idx := range mms {
mm := &mms[idx]
mm.AccountID = at.AccountID
mm.ProjectID = at.ProjectID
}
}
// Do not shard metadata even if -remoteWrite.shardByURL is set, just replicate it among rwctxs.
// Since metadata is usually small and there is no guarantee that metadata can be sent to
// the same remote storage with the corresponding metrics.
@@ -558,11 +568,13 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
// Push metadata to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.enableMetadata {
// Skip remote storage with disabled metadata
continue
}
wg.Go(func() {
if !rwctx.tryPushMetadataInternal(mms) {
rwctx.pushFailures.Inc()
if forceDropSamplesOnFailure {
@@ -571,7 +583,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
}
anyPushFailed.Store(true)
}
}(rwctx)
})
}
wg.Wait()
return !anyPushFailed.Load()
@@ -603,15 +615,13 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
// Push tssBlock to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
wg.Go(func() {
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
}(rwctx)
})
}
wg.Wait()
return !anyPushFailed.Load()
@@ -633,13 +643,11 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
if len(shard) == 0 {
continue
}
wg.Add(1)
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
wg.Go(func() {
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
}(rwctx, shard)
})
}
wg.Wait()
return !anyPushFailed.Load()
@@ -703,7 +711,7 @@ func shardAmountRemoteWriteCtx(tssBlock []prompb.TimeSeries, shards [][]prompb.T
}
tmpLabels.Labels = hashLabels
}
h := getLabelsHash(hashLabels)
h := getLabelsHashForShard(hashLabels)
// Get the rwctxIdx through consistent hashing and then map it to the index in shards.
// The rwctxIdx is not always equal to the shardIdx, for example, when some rwctx are not available.
@@ -794,11 +802,28 @@ var (
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
)
// getLabelsHashForShard is a separate function from getLabelsHash because
// it omits the '=' separator between label name and value for backward compatibility.
// Changing it would re-shard all series across remoteWrite targets.
func getLabelsHashForShard(labels []prompb.Label) uint64 {
bb := labelsHashBufPool.Get()
b := bb.B[:0]
for _, label := range labels {
b = append(b, label.Name...)
b = append(b, label.Value...)
}
h := xxhash.Sum64(b)
bb.B = b
labelsHashBufPool.Put(bb)
return h
}
func getLabelsHash(labels []prompb.Label) uint64 {
bb := labelsHashBufPool.Get()
b := bb.B[:0]
for _, label := range labels {
b = append(b, label.Name...)
b = append(b, '=')
b = append(b, label.Value...)
}
h := xxhash.Sum64(b)
@@ -837,6 +862,11 @@ type remoteWriteCtx struct {
streamAggrKeepInput bool
streamAggrDropInput bool
// enableMetadata indicates whether metadata should be sent to this remote storage.
// It is determined by -remoteWrite.enableMetadata per-URL flag if set,
// otherwise by the global -enableMetadata flag.
enableMetadata bool
pss []*pendingSeries
pssNextIdx atomic.Uint64
@@ -848,7 +878,19 @@ type remoteWriteCtx struct {
rowsDroppedOnPushFailure *metrics.Counter
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// isMetadataEnabledForURL returns true if metadata should be sent to the remote storage at argIdx.
// It checks the per-URL -remoteWrite.disableMetadata flag first.
// If not set, it falls back to the global -enableMetadata flag.
func isMetadataEnabledForURL(argIdx int) bool {
if disableMetadataPerURL.GetOptionalArg(argIdx) {
// Metadata is explicitly disabled for this URL
return false
}
// Use global -enableMetadata value
return prommetadata.IsEnabled()
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL
pqURL.RawQuery = ""
@@ -863,6 +905,23 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
}
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
queuesSize := queues.GetOptionalArg(argIdx)
if queuesSize > maxQueues {
queuesSize = maxQueues
} else if queuesSize <= 0 {
queuesSize = 1
}
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
if maxInmemoryBlocks/queuesSize > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * queuesSize
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetPendingBytes())
@@ -880,16 +939,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
var c *client
switch remoteWriteURL.Scheme {
case "http", "https":
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
default:
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
}
c.init(argIdx, *queues, sanitizedURL)
c.init(argIdx, queuesSize, sanitizedURL)
// Initialize pss
sf := significantFigures.GetOptionalArg(argIdx)
rd := roundDigits.GetOptionalArg(argIdx)
pssLen := *queues
pssLen := queuesSize
if n := cgroup.AvailableCPUs(); pssLen > n {
// There is no sense in running more than availableCPUs concurrent pendingSeries,
// since every pendingSeries can saturate up to a single CPU.
@@ -901,10 +960,11 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
}
rwctx := &remoteWriteCtx{
idx: argIdx,
fq: fq,
c: c,
pss: pss,
idx: argIdx,
fq: fq,
c: c,
pss: pss,
enableMetadata: isMetadataEnabledForURL(argIdx),
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
@@ -1089,7 +1149,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
}()
if len(labelsGlobal) > 0 {
// Make a copy of tss before adding extra labels in order to prevent
// Make a copy of tss before adding extra labels to prevent
// from affecting time series for other remoteWrite.url configs.
rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompb.TimeSeries)
@@ -1125,3 +1185,21 @@ func newMapFromStrings(a []string) map[string]struct{} {
}
return m
}
func getMaxHourlySeries() int {
limit := *maxHourlySeries
if limit == -1 || limit > math.MaxInt32 {
return math.MaxInt32
}
return int(limit)
}
func getMaxDailySeries() int {
limit := *maxDailySeries
if limit == -1 || limit > math.MaxInt32 {
return math.MaxInt32
}
return int(limit)
}

View File

@@ -25,15 +25,15 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
t.Helper()
// Distribute itemsCount hashes returned by getLabelsHash() across bucketsCount buckets.
itemsCount := 1_000 * bucketsCount
itemsCount := 10_000 * bucketsCount
m := make([]int, bucketsCount)
var labels []prompb.Label
for i := 0; i < itemsCount; i++ {
for i := range itemsCount {
labels = append(labels[:0], prompb.Label{
Name: "__name__",
Value: fmt.Sprintf("some_name_%d", i),
})
for j := 0; j < 10; j++ {
for j := range 10 {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d", j),
Value: fmt.Sprintf("value_%d_%d", i, j),
@@ -44,10 +44,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
}
// Verify that the distribution is even
expectedItemsPerBucket := itemsCount / bucketsCount
expectedItemsPerBucket := float64(itemsCount / bucketsCount)
allowedDeviation := math.Round(float64(expectedItemsPerBucket) * 0.04)
for _, n := range m {
if math.Abs(1-float64(n)/float64(expectedItemsPerBucket)) > 0.04 {
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want around %d", bucketsCount, n, expectedItemsPerBucket)
if math.Abs(expectedItemsPerBucket-float64(n)) > allowedDeviation {
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want in range [%.0f, %.0f]",
bucketsCount, n, expectedItemsPerBucket-allowedDeviation, expectedItemsPerBucket+allowedDeviation)
}
}
}
@@ -248,7 +250,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
seriesCount := 100000
// build 1000000 series
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
for i := 0; i < seriesCount; i++ {
for i := range seriesCount {
tssBlock = append(tssBlock, prompb.TimeSeries{
Labels: []prompb.Label{
{
@@ -269,7 +271,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
// build active time series set
nodes := make([]string, 0, remoteWriteCount)
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
for i := 0; i < remoteWriteCount; i++ {
for i := range remoteWriteCount {
nodes = append(nodes, fmt.Sprintf("node%d", i))
activeTimeSeriesByNodes[i] = make(map[string]struct{})
}

View File

@@ -81,12 +81,9 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
if g.Interval.Duration() < 0 {
return fmt.Errorf("interval shouldn't be lower than 0")
}
if g.EvalOffset.Duration() < 0 {
return fmt.Errorf("eval_offset shouldn't be lower than 0")
}
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
if g.EvalOffset.Duration() > g.Interval.Duration() {
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
// if `eval_offset` is set, the group interval must be specified explicitly(instead of inherited from global evaluationInterval flag) and must bigger than offset.
if g.EvalOffset.Duration().Abs() > g.Interval.Duration() {
return fmt.Errorf("the abs value of eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
}
if g.EvalOffset != nil && g.EvalDelay != nil {
return fmt.Errorf("eval_offset cannot be used with eval_delay")
@@ -225,6 +222,9 @@ func (r *Rule) Validate() error {
if r.Expr == "" {
return fmt.Errorf("expression can't be empty")
}
if _, ok := r.Labels["__name__"]; ok {
return fmt.Errorf("invalid rule label __name__")
}
return checkOverflow(r.XXX, "rule")
}

View File

@@ -136,6 +136,9 @@ func TestRuleValidate(t *testing.T) {
if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
t.Fatalf("expected empty expr error")
}
if err := (&Rule{Record: "record", Expr: "sum(test)", Labels: map[string]string{"__name__": "test"}}).Validate(); err == nil {
t.Fatalf("invalid rule label; got %s", err)
}
if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
t.Fatalf("expected valid rule; got %s", err)
}
@@ -176,11 +179,17 @@ func TestGroupValidate_Failure(t *testing.T) {
}, false, "interval shouldn't be lower than 0")
f(&Group{
Name: "wrong eval_offset",
Name: "too big eval_offset",
Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
f(&Group{
Name: "too big negative eval_offset",
Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(-2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
limit := -1
f(&Group{
Name: "wrong limit",

View File

@@ -2,6 +2,7 @@ package config
import (
"fmt"
"slices"
"strings"
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
@@ -76,13 +77,12 @@ func (t *Type) ValidateExpr(expr string) error {
if err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
}
fields, _ := q.GetStatsByFields()
for i := range fields {
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
// making the result meaningless and may lead to cardinality issues.
if fields[i] == "_time" {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
labels, err := q.GetStatsLabels()
if err != nil {
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
}
if slices.Contains(labels, "_time") {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
default:
return fmt.Errorf("unknown datasource type=%q", t.Name)

View File

@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"maps"
"net/http"
"net/url"
"strings"
@@ -91,9 +92,7 @@ func (c *Client) Clone() *Client {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders)
}
for k, v := range c.extraParams {
ns.extraParams[k] = v
}
maps.Copy(ns.extraParams, c.extraParams)
return ns
}

View File

@@ -34,7 +34,7 @@ type promResponse struct {
// Stats supported by VictoriaMetrics since v1.90
Stats struct {
SeriesFetched *string `json:"seriesFetched,omitempty"`
} `json:"stats,omitempty"`
} `json:"stats"`
// IsPartial supported by VictoriaMetrics
IsPartial *bool `json:"isPartial,omitempty"`
}

View File

@@ -772,7 +772,7 @@ func TestHeaders(t *testing.T) {
// basic auth
f(func() *Client {
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "", "bar", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}
@@ -817,7 +817,7 @@ func TestHeaders(t *testing.T) {
// custom header overrides basic auth
f(func() *Client {
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "", "bar", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}

View File

@@ -87,6 +87,7 @@ func (m *Metric) DelLabel(key string) {
for i, l := range m.Labels {
if l.Name == key {
m.Labels = append(m.Labels[:i], m.Labels[i+1:]...)
break
}
}
}
@@ -134,7 +135,7 @@ func (ls Labels) String() string {
func LabelCompare(a, b Labels) int {
l := min(len(b), len(a))
for i := 0; i < l; i++ {
for i := range l {
if a[i].Name != b[i].Name {
if a[i].Name < b[i].Name {
return -1

View File

@@ -27,6 +27,7 @@ var (
"Multiple headers must be delimited by '^^': -datasource.headers='header1:value1^^header2:value2'")
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
basicAuthUsernameFile = flag.String("datasource.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -datasource.url")
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
basicAuthPasswordFile = flag.String("datasource.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -datasource.url")
@@ -63,6 +64,7 @@ func InitSecretFlags() {
if !*showDatasourceURL {
flagutil.RegisterSecretFlag("datasource.url")
}
flagutil.RegisterSecretFlag("datasource.headers")
}
// ShowDatasourceURL whether to show -datasource.url with sensitive information
@@ -105,7 +107,7 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
}
authCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
vmalertutil.WithHeaders(*headers))

View File

@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
b.Run("Instant std+fastjson", func(b *testing.B) {
for i := 0; i < b.N; i++ {
for range b.N {
var pi promInstant
err = pi.Unmarshal(data)
if err != nil {

View File

@@ -56,7 +56,7 @@ absolute path to all .tpl files in root.
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
`)
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule', '-rule.templates' and '-notifier.config' files. "+
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol")
@@ -81,9 +81,7 @@ absolute path to all .tpl files in root.
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
)
var (
extURL *url.URL
)
var extURL *url.URL
func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe.
@@ -161,7 +159,7 @@ func main() {
ctx, cancel := context.WithCancel(context.Background())
manager, err := newManager(ctx)
if err != nil {
logger.Fatalf("failed to init: %s", err)
logger.Fatalf("failed to create manager: %s", err)
}
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)

View File

@@ -98,7 +98,7 @@ func (m *manager) close() {
m.wg.Wait()
}
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
id := g.GetID()
g.Init()
m.wg.Go(func() {
@@ -110,7 +110,6 @@ func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) e
})
m.groups[id] = g
return nil
}
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
@@ -119,7 +118,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
for _, cfg := range groupsCfg {
for _, r := range cfg.Rules {
if rrPresent && arPresent {
continue
break
}
if r.Record != "" {
rrPresent = true
@@ -162,10 +161,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
}
}
for _, ng := range groupsRegistry {
if err := m.startGroup(ctx, ng, restore); err != nil {
m.groupsMu.Unlock()
return err
}
m.startGroup(ctx, ng, restore)
}
m.groupsMu.Unlock()

View File

@@ -65,13 +65,11 @@ func TestManagerUpdateConcurrent(t *testing.T) {
const workers = 500
const iterations = 10
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
r := rand.New(rand.NewSource(int64(n)))
for i := 0; i < iterations; i++ {
for range iterations {
rnd := r.Intn(len(paths))
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
if err != nil { // update can fail and this is expected
@@ -79,7 +77,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
}
_ = m.update(context.Background(), cfg, false)
}
}(i)
})
}
wg.Wait()
}
@@ -261,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
for i, r := range a.Rules {
got, want := r, b.Rules[i]
if a.CreateID() != b.CreateID() {
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
}
if err := rule.CompareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err)

View File

@@ -14,7 +14,6 @@ import (
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
@@ -172,11 +171,6 @@ const alertManagerPath = "/api/v2/alerts"
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
) (*AlertManager, error) {
if err := httputil.CheckURL(alertManagerURL); err != nil {
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
}
tls := &promauth.TLSConfig{}
if authCfg.TLSConfig != nil {
tls = authCfg.TLSConfig
@@ -197,7 +191,7 @@ func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg proma
}
aCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(ba.Username, ba.Password.String(), ba.PasswordFile),
vmalertutil.WithBasicAuth(ba.Username, ba.UsernameFile, ba.Password.String(), ba.PasswordFile),
vmalertutil.WithBearer(authCfg.BearerToken.String(), authCfg.BearerTokenFile),
vmalertutil.WithOAuth(oauth.ClientID, oauth.ClientSecret.String(), oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";"), oauth.EndpointParams),
vmalertutil.WithHeaders(strings.Join(authCfg.Headers, "^^")),

View File

@@ -105,7 +105,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
}
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
for _, err := range errors {
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
logger.Errorf("failed to init notifier for %q: %s", typeK, err)
}
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
}
@@ -274,7 +274,7 @@ func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targ
for addr, metadata := range targetMts {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
if err != nil {
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
logger.Errorf("failed to init %s notifier with addr %q: %s", key, addr, err)
continue
}
updatedTargets = append(updatedTargets, Target{

View File

@@ -212,18 +212,16 @@ consul_sd_configs:
const workers = 500
const iterations = 10
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
r := rand.New(rand.NewSource(int64(n)))
for i := 0; i < iterations; i++ {
for range iterations {
rnd := r.Intn(len(paths))
_ = cw.reload(paths[rnd]) // update can fail and this is expected
_ = cw.notifiers()
}
}(i)
})
}
wg.Wait()
}

View File

@@ -11,8 +11,8 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
@@ -36,6 +36,7 @@ var (
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -notifier.url. "+
"Multiple headers must be delimited by '^^': -notifier.headers='header1:value1^^header2:value2,header3:value3'")
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
basicAuthUsernameFile = flagutil.NewArrayString("notifier.basicAuth.usernameFile", "Optional path to basic auth username file for -notifier.url")
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
basicAuthPasswordFile = flagutil.NewArrayString("notifier.basicAuth.passwordFile", "Optional path to basic auth password file for -notifier.url")
@@ -193,6 +194,7 @@ func InitSecretFlags() {
if !*showNotifierURL {
flagutil.RegisterSecretFlag("notifier.url")
}
flagutil.RegisterSecretFlag("notifier.headers")
}
func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
@@ -213,6 +215,7 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
},
BasicAuth: &promauth.BasicAuthConfig{
Username: basicAuthUsername.GetOptionalArg(i),
UsernameFile: basicAuthUsernameFile.GetOptionalArg(i),
Password: promauth.NewSecret(basicAuthPassword.GetOptionalArg(i)),
PasswordFile: basicAuthPasswordFile.GetOptionalArg(i),
},
@@ -229,6 +232,9 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
Headers: []string{headers.GetOptionalArg(i)},
}
if err := httputil.CheckURL(addr); err != nil {
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
}
addr = strings.TrimSuffix(addr, "/")
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
if err != nil {
@@ -266,7 +272,7 @@ func GetTargets() map[TargetType][]Target {
if getActiveNotifiers == nil {
return nil
}
var targets = make(map[TargetType][]Target)
targets := make(map[TargetType][]Target)
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
if cw != nil {
cw.targetsMu.RLock()
@@ -287,7 +293,7 @@ func GetTargets() map[TargetType][]Target {
}
// Send sends alerts to all active notifiers
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) *vmalertutil.ErrGroup {
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
// apply global relabel config first without modifying original alerts in alerts
@@ -300,17 +306,18 @@ func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string
lblss = append(lblss, lbls)
}
errGr := new(vmalertutil.ErrGroup)
wg := sync.WaitGroup{}
activeNotifiers := getActiveNotifiers()
errCh := make(chan error, len(activeNotifiers))
defer close(errCh)
for i := range activeNotifiers {
nt := activeNotifiers[i]
wg.Go(func() {
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err))
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
}
})
}
wg.Wait()
return errGr
return errCh
}

View File

@@ -55,9 +55,9 @@ func TestInitNegative(t *testing.T) {
*blackHole = oldBlackHole
}()
f := func(path, addr string, bh bool) {
f := func(path string, addr []string, bh bool) {
*configPath = path
*addrs = flagutil.ArrayString{addr}
*addrs = flagutil.ArrayString(addr)
*blackHole = bh
if err := Init(nil, ""); err == nil {
t.Fatalf("expected to get error; got nil instead")
@@ -65,9 +65,12 @@ func TestInitNegative(t *testing.T) {
}
// *configPath, *addrs and *blackhole are mutually exclusive
f("/dummy/path", "127.0.0.1", false)
f("/dummy/path", "", true)
f("", "127.0.0.1", true)
f("/dummy/path", []string{"127.0.0.1"}, false)
f("/dummy/path", []string{}, true)
f("", []string{"127.0.0.1"}, true)
// addr cannot be ""
f("", []string{""}, false)
f("", []string{"127.0.0.1", ""}, false)
}
func TestBlackHole(t *testing.T) {
@@ -202,7 +205,9 @@ alert_relabel_configs:
},
}
errG := Send(context.Background(), firingAlerts, nil)
if errG.Err() != nil {
t.Fatalf("unexpected error when sending alerts: %s", err)
for err := range errG {
if err != nil {
t.Errorf("unexpected error when sending alerts: %s", err)
}
}
}

View File

@@ -14,7 +14,7 @@ type Notifier interface {
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
// Addr returns address where alerts are sent.
Addr() string
// LastError returns error, that occured during last attempt to send data
// LastError returns error, that occurred during last attempt to send data
LastError() string
// Close is a destructor for the Notifier
Close()

View File

@@ -28,6 +28,7 @@ var (
"Multiple headers must be delimited by '^^': -remoteRead.headers='header1:value1^^header2:value2'")
basicAuthUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
basicAuthUsernameFile = flag.String("remoteRead.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -remoteRead.url")
basicAuthPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
basicAuthPasswordFile = flag.String("remoteRead.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteRead.url")
@@ -58,6 +59,7 @@ func InitSecretFlags() {
if !*showRemoteReadURL {
flagutil.RegisterSecretFlag("remoteRead.url")
}
flagutil.RegisterSecretFlag("remoteRead.headers")
}
// Init creates a Querier from provided flag values.
@@ -80,7 +82,7 @@ func Init() (datasource.QuerierBuilder, error) {
return nil, fmt.Errorf("cannot parse JSON for -remoteRead.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
}
authCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
vmalertutil.WithHeaders(*headers))

View File

@@ -11,16 +11,23 @@ import (
"path"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/cespare/xxhash/v2"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
)
@@ -53,6 +60,11 @@ type Client struct {
wg sync.WaitGroup
doneCh chan struct{}
// Whether to encode the write request with VictoriaMetrics remote write protocol.
// It is set to true by default, and will be switched to false if the client
// receives specific errors indicating that the remote storage doesn't support VictoriaMetrics remote write protocol.
isVMRemoteWrite atomic.Bool
}
// Config is config for remote write client.
@@ -112,9 +124,12 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
doneCh: make(chan struct{}),
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
}
c.isVMRemoteWrite.Store(true)
for i := 0; i < cc; i++ {
c.run(ctx)
c.wg.Go(func() {
c.run(ctx, i)
})
}
return c, nil
}
@@ -156,8 +171,7 @@ func (c *Client) Close() error {
return nil
}
func (c *Client) run(ctx context.Context) {
ticker := time.NewTicker(c.flushInterval)
func (c *Client) run(ctx context.Context, id int) {
wr := &prompb.WriteRequest{}
shutdown := func() {
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
@@ -174,40 +188,72 @@ func (c *Client) run(ctx context.Context) {
cancel()
}
c.wg.Go(func() {
defer ticker.Stop()
for {
// add jitter to spread remote write flushes over the flush interval to avoid congestion at the remote write destination
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(fmt.Sprintf("%d", id)))
randJitter := uint64(float64(c.flushInterval) * (float64(h) / (1 << 64)))
timer := time.NewTimer(time.Duration(randJitter))
addJitter:
for {
select {
case <-c.doneCh:
timer.Stop()
shutdown()
return
case <-ctx.Done():
timer.Stop()
shutdown()
return
case <-timer.C:
break addJitter
}
}
ticker := time.NewTicker(c.flushInterval)
defer ticker.Stop()
for {
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
c.flush(ctx, wr)
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
default:
}
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
}
}
}
})
}
}
var (
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
// sentRows and sentBytes are historical counters that can now be replaced by flushedRows and flushedBytes histograms. They may be deprecated in the future after the new histograms have been adopted for some time.
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
flushedRows = metrics.NewHistogram(`vmalert_remotewrite_sent_rows`)
flushedBytes = metrics.NewHistogram(`vmalert_remotewrite_sent_bytes`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
remoteWriteQueueSize = metrics.NewHistogram(`vmalert_remotewrite_queue_size`)
_ = metrics.NewGauge(`vmalert_remotewrite_queue_capacity`, func() float64 {
return float64(*maxQueueSize)
})
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
return float64(*concurrency)
@@ -221,34 +267,45 @@ func GetDroppedRows() int { return int(droppedRows.Get()) }
// it to remote-write endpoint. Flush performs limited amount of retries
// if request fails.
func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
remoteWriteQueueSize.Update(float64(len(c.input)))
if len(wr.Timeseries) < 1 {
return
}
defer wr.Reset()
defer bufferFlushDuration.UpdateDuration(time.Now())
data := wr.MarshalProtobuf(nil)
b := snappy.Encode(nil, data)
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
if retryInterval > maxRetryInterval {
retryInterval = maxRetryInterval
bb := writeRequestBufPool.Get()
bb.B = wr.MarshalProtobuf(bb.B[:0])
zb := compressBufPool.Get()
defer compressBufPool.Put(zb)
if c.isVMRemoteWrite.Load() {
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, 0)
} else {
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
}
writeRequestBufPool.Put(bb)
maxRetryInterval := *retryMaxTime
bt := timeutil.NewBackoffTimer(*retryMinInterval, maxRetryInterval)
timeStart := time.Now()
defer func() {
sendDuration.Add(time.Since(timeStart).Seconds())
}()
attempts := 0
L:
for attempts := 0; ; attempts++ {
err := c.send(ctx, b)
for {
err := c.send(ctx, zb.B)
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
// Something in the middle between client and destination might be closing
// the connection. So we do a one more attempt in hope request will succeed.
err = c.send(ctx, b)
err = c.send(ctx, zb.B)
}
if err == nil {
sentRows.Add(len(wr.Timeseries))
sentBytes.Add(len(b))
sentBytes.Add(len(zb.B))
flushedRows.Update(float64(len(wr.Timeseries)))
flushedBytes.Update(float64(len(zb.B)))
return
}
@@ -274,13 +331,13 @@ L:
break
}
if retryInterval > timeLeftForRetries {
retryInterval = timeLeftForRetries
if bt.CurrentDelay() > timeLeftForRetries {
bt.SetDelay(timeLeftForRetries)
}
// sleeping to prevent remote db hammering
time.Sleep(retryInterval)
retryInterval *= 2
bt.Wait(ctx.Done())
attempts++
}
rwErrors.Inc()
@@ -300,12 +357,16 @@ func (c *Client) send(ctx context.Context, data []byte) error {
return fmt.Errorf("failed to create new HTTP request: %w", err)
}
// RFC standard compliant headers
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("User-Agent", "vmalert")
req.Header.Set("Content-Type", "application/x-protobuf")
// Prometheus compliant headers
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
if encoding.IsZstd(data) {
req.Header.Set("Content-Encoding", "zstd")
req.Header.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
} else {
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
}
if c.authCfg != nil {
err = c.authCfg.SetHeaders(req, true)
@@ -334,6 +395,29 @@ func (c *Client) send(ctx context.Context, data []byte) error {
// respond with HTTP 2xx status code when write is successful.
return nil
case 4:
// - Remote Write v1 specification implicitly expects a `400 Bad Request` when the encoding is not supported.
// - Remote Write v2 specification explicitly specifies a `415 Unsupported Media Type` for unsupported encodings.
// - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
if resp.StatusCode == http.StatusUnsupportedMediaType || resp.StatusCode == http.StatusBadRequest {
if encoding.IsZstd(data) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", req.URL.Redacted())
zstdBlockLen := len(data)
data, err = repackBlockFromZstdToSnappy(data)
if err == nil {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", req.URL.Redacted())
c.isVMRemoteWrite.Store(false)
return c.send(ctx, data)
}
logger.Warnf("failed to repack zstd block (%d bytes) to snappy: %s; The block will be rejected. "+
"Possible cause: ungraceful shutdown leading to persisted queue corruption.",
zstdBlockLen, err)
}
}
if resp.StatusCode != http.StatusTooManyRequests {
// MUST NOT retry write requests on HTTP 4xx responses other than 429
return &nonRetriableError{
@@ -354,3 +438,19 @@ type nonRetriableError struct {
func (e *nonRetriableError) Error() string {
return e.err.Error()
}
var (
writeRequestBufPool bytesutil.ByteBufferPool
compressBufPool bytesutil.ByteBufferPool
)
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
if err != nil {
return nil, err
}
return snappy.Encode(nil, plainBlock), nil
}

View File

@@ -12,8 +12,7 @@ import (
"testing"
"time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
@@ -44,7 +43,7 @@ func TestClient_Push(t *testing.T) {
r := rand.New(rand.NewSource(1))
const rowsN = int(1e4)
for i := 0; i < rowsN; i++ {
for range rowsN {
s := prompb.TimeSeries{
Samples: []prompb.Sample{{
Value: r.Float64(),
@@ -102,8 +101,11 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
}
// push time series to the client.
for i := 0; i < pushCnt; i++ {
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
for range pushCnt {
if err = rwClient.Push(prompb.TimeSeries{
Labels: []prompb.Label{{Name: "__name__", Value: "m"}},
Samples: []prompb.Sample{{Value: 1, Timestamp: 1000}},
}); err != nil {
t.Fatalf("cannot time series to the client: %s", err)
}
}
@@ -156,8 +158,8 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
}
h := r.Header.Get("Content-Encoding")
if h != "snappy" {
rw.err(w, fmt.Errorf("header read error: Content-Encoding is not snappy (%q)", h))
if h != "zstd" {
rw.err(w, fmt.Errorf("header read error: Content-Encoding is not zstd (%q)", h))
}
h = r.Header.Get("Content-Type")
@@ -165,9 +167,9 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
rw.err(w, fmt.Errorf("header read error: Content-Type is not x-protobuf (%q)", h))
}
h = r.Header.Get("X-Prometheus-Remote-Write-Version")
if h != "0.1.0" {
rw.err(w, fmt.Errorf("header read error: X-Prometheus-Remote-Write-Version is not 0.1.0 (%q)", h))
h = r.Header.Get("X-VictoriaMetrics-Remote-Write-Version")
if h != "1" {
rw.err(w, fmt.Errorf("header read error: X-VictoriaMetrics-Remote-Write-Version is not 1 (%q)", h))
}
data, err := io.ReadAll(r.Body)
@@ -177,7 +179,7 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
}
defer func() { _ = r.Body.Close() }()
b, err := snappy.Decode(nil, data)
b, err := zstd.Decompress(nil, data)
if err != nil {
rw.err(w, fmt.Errorf("decode err: %w", err))
return

View File

@@ -9,8 +9,7 @@ import (
"strings"
"sync"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
@@ -64,19 +63,17 @@ func (c *DebugClient) Close() error {
}
func (c *DebugClient) send(data []byte) error {
b := snappy.Encode(nil, data)
b := zstd.CompressLevel(nil, data, 0)
r := bytes.NewReader(b)
req, err := http.NewRequest(http.MethodPost, c.addr, r)
if err != nil {
return fmt.Errorf("failed to create new HTTP request: %w", err)
}
// RFC standard compliant headers
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("Content-Encoding", "zstd")
req.Header.Set("Content-Type", "application/x-protobuf")
// Prometheus compliant headers
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
req.Header.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
if !*disablePathAppend {
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")

View File

@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
const rowsN = 100
var sent int
for i := 0; i < rowsN; i++ {
for i := range rowsN {
s := prompb.TimeSeries{
Samples: []prompb.Sample{{
Value: float64(i),

View File

@@ -13,8 +13,8 @@ import (
)
var (
addr = flag.String("remoteWrite.url", "", "Optional URL to VictoriaMetrics or vminsert where to persist alerts state "+
"and recording rules results in form of timeseries. "+
addr = flag.String("remoteWrite.url", "", "Optional URL to persist alerts state and recording rules results in form of timeseries. "+
"It must support either VictoriaMetrics remote write protocol or Prometheus remote_write protocol. "+
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
"For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, "+
"then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend, '-remoteWrite.showURL'.")
@@ -26,6 +26,7 @@ var (
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
basicAuthUsernameFile = flag.String("remoteWrite.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -remoteWrite.url")
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
basicAuthPasswordFile = flag.String("remoteWrite.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteWrite.url")
@@ -61,6 +62,7 @@ func InitSecretFlags() {
if !*showRemoteWriteURL {
flagutil.RegisterSecretFlag("remoteWrite.url")
}
flagutil.RegisterSecretFlag("remoteWrite.headers")
}
// Init creates Client object from given flags.
@@ -83,7 +85,7 @@ func Init(ctx context.Context) (*Client, error) {
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
}
authCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
vmalertutil.WithHeaders(*headers))

View File

@@ -312,9 +312,11 @@ type labelSet struct {
// On k conflicts in origin set, the original value is preferred and copied
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
func (ls *labelSet) add(k, v string) {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
// do not add label with empty value to the result, as it has no meaning:
// if the label already exists in the original query result, remove it to preserve compatibility with relabeling, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766.
// otherwise, ignore the label, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984.
if v == "" {
delete(ls.processed, k)
return
}
ls.processed[k] = v
@@ -818,7 +820,9 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
res, _, err := q.Query(ctx, expr, ts)
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
if err != nil {
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
}

View File

@@ -0,0 +1,106 @@
//go:build synctest
package rule
import (
"context"
"strings"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -10,7 +10,6 @@ import (
"strings"
"sync"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/metrics"
@@ -1364,6 +1363,7 @@ func TestAlertingRule_ToLabels(t *testing.T) {
{Name: "instance", Value: "0.0.0.0:8800"},
{Name: "group", Value: "vmalert"},
{Name: "alertname", Value: "ConfigurationReloadFailure"},
{Name: "pod", Value: "vmalert-0"},
},
Values: []float64{1},
Timestamps: []int64{time.Now().UnixNano()},
@@ -1375,6 +1375,7 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"group": "vmalert", // this shouldn't have effect since value in metric is equal
"invalid_label": "{{ .Values.mustRuntimeFail }}",
"empty_label": "", // this should be dropped
"pod": "", // this should remove the pod label from query result
},
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
Name: "AlertingRulesError",
@@ -1386,6 +1387,7 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
"pod": "vmalert-0",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
}
@@ -1479,95 +1481,3 @@ func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
}
}
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -6,7 +6,9 @@ import (
"flag"
"fmt"
"hash/fnv"
"maps"
"net/url"
"path"
"sync"
"time"
@@ -18,6 +20,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
@@ -29,8 +32,8 @@ var (
"0 means no limit.")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent group")
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
@@ -40,6 +43,9 @@ var (
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
ruleStripFilePath = flag.Bool("rule.stripFilePath", false, "Whether to strip rule file paths in logs and all API responses, including /metrics. "+
"For example, file path '/path/to/tenant_id/rules.yml' will be stripped to 'groupHashID/rules.yml'. "+
"This flag may be useful for hiding sensitive information in file paths, such as S3 bucket details.")
)
// Group is an entity for grouping rules
@@ -96,9 +102,7 @@ type groupMetrics struct {
// set2 has priority over set1.
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
r := map[string]string{}
for k, v := range set1 {
r[k] = v
}
maps.Copy(r, set1)
for k, v := range set2 {
if prevV, ok := r[k]; ok {
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
@@ -147,6 +151,12 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
g.EvalDelay = &cfg.EvalDelay.D
}
g.id = g.CreateID()
// strip file path from group.File after generated group ID when ruleStripFilePath is set,
// so it won't be exposed in logs and api responses
if *ruleStripFilePath {
_, filename := path.Split(g.File)
g.File = fmt.Sprintf("%d/%s", g.id, filename)
}
for _, h := range cfg.Headers {
g.Headers[h.Key] = h.Value
}
@@ -374,15 +384,17 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.infof("started")
eval := func(ctx context.Context, ts time.Time) {
eval := func(ctx context.Context, ts time.Time) time.Time {
g.metrics.iterationTotal.Inc()
start := time.Now()
if len(g.Rules) < 1 {
g.metrics.iterationDuration.UpdateDuration(start)
g.mu.Lock()
g.LastEvaluation = start
return
g.mu.Unlock()
return ts
}
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
@@ -395,7 +407,10 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
}
}
g.metrics.iterationDuration.UpdateDuration(start)
g.mu.Lock()
g.LastEvaluation = start
g.mu.Unlock()
return ts
}
evalCtx, cancel := context.WithCancel(ctx)
@@ -404,15 +419,18 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.mu.Unlock()
defer g.evalCancel()
eval(evalCtx, evalTS)
// start the interval ticker before the first evaluation,
// so that the evaluation timestamps of groups with the `eval_offset` option are also aligned,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10773
t := time.NewTicker(g.Interval)
defer t.Stop()
realEvalTS := eval(evalCtx, evalTS)
// restore the rules state after the first evaluation
// so only active alerts can be restored.
if rr != nil {
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
if err != nil {
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
}
@@ -483,8 +501,15 @@ func (g *Group) UpdateWith(newGroup *Group) {
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
if g.EvalOffset != nil {
offset := *g.EvalOffset
// adjust the offset for negative evalOffset, the rule is:
// `eval_offset: -x` is equivalent to `eval_offset: y` for `interval: x+y`.
// For example, `eval_offset: -6m` is equivalent to `eval_offset: 4m` for `interval: 10m`.
if offset < 0 {
offset += g.Interval
}
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
currentOffsetPoint := ts.Truncate(g.Interval).Add(offset)
if currentOffsetPoint.Before(ts) {
// wait until the next offset point
return currentOffsetPoint.Add(g.Interval).Sub(ts)
@@ -493,11 +518,8 @@ func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Dura
}
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
interval := g.Interval
if interval > maxDelay {
// artificially limit interval, so groups with big intervals could start sooner.
interval = maxDelay
}
// artificially limit interval, so groups with big intervals could start sooner.
interval := min(g.Interval, maxDelay)
var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
@@ -755,6 +777,7 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
}
var errG vmalertutil.ErrGroup
if e.Rw != nil {
pushToRW := func(tss []prompb.TimeSeries) error {
var lastErr error
@@ -766,20 +789,26 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return lastErr
}
if err := pushToRW(tss); err != nil {
return err
errG.Add(err)
}
}
ar, ok := r.(*AlertingRule)
if !ok {
return nil
return errG.Err()
}
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
if len(alerts) < 1 {
return nil
return errG.Err()
}
errGr := notifier.Send(ctx, alerts, e.notifierHeaders)
return errGr.Err()
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
for err := range notifierErr {
if err != nil {
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
}
}
return errG.Err()
}

View File

@@ -405,7 +405,8 @@ func TestGroupStart(t *testing.T) {
var cur uint64
prev := g.metrics.iterationTotal.Get()
for i := 0; ; i++ {
i := 0
for {
if i > 40 {
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
}
@@ -414,6 +415,7 @@ func TestGroupStart(t *testing.T) {
return
}
time.Sleep(interval)
i++
}
}
@@ -604,6 +606,15 @@ func TestGroupStartDelay(t *testing.T) {
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
// test group with negative offset -2min, which is equivalent to 3min offset for 5min interval
offset = -2 * time.Minute
g.EvalOffset = &offset
f("2023-01-01T00:00:15.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
maxDelay = time.Minute * 1
g.EvalOffset = nil
@@ -731,3 +742,64 @@ func parseTime(t *testing.T, s string) time.Time {
}
return tt
}
func TestRuleStripFilePath(t *testing.T) {
configG := config.Group{
Name: "group",
File: "/var/local/test/rules.yaml",
Type: config.NewRawType("prometheus"),
Concurrency: 1,
Rules: []config.Rule{
{
ID: 0,
Alert: "alert",
},
{
ID: 1,
Record: "record",
},
}}
qb := &datasource.FakeQuerier{}
g := NewGroup(configG, qb, 1*time.Minute, nil)
gID := g.id
if g.File != "/var/local/test/rules.yaml" {
t.Fatalf("expected file path to be unchanged; got %q instead", g.File)
}
for _, r := range g.Rules {
if ar, ok := r.(*AlertingRule); ok {
if ar.File != "/var/local/test/rules.yaml" {
t.Fatalf("expected rule file path to be unchanged; got %q instead", ar.File)
}
}
if rr, ok := r.(*RecordingRule); ok {
if rr.File != "/var/local/test/rules.yaml" {
t.Fatalf("expected rule file path to be unchanged; got %q instead", rr.File)
}
}
}
oldRuleStripFilePath := *ruleStripFilePath
*ruleStripFilePath = true
defer func() {
*ruleStripFilePath = oldRuleStripFilePath
}()
g = NewGroup(configG, qb, 1*time.Minute, nil)
if g.File != fmt.Sprintf("%d/rules.yaml", gID) {
t.Fatalf("expected file path to be stripped to %q; got %q instead", fmt.Sprintf("%d/rules.yaml", gID), g.File)
}
for _, r := range g.Rules {
if ar, ok := r.(*AlertingRule); ok {
if ar.File != fmt.Sprintf("%d/rules.yaml", gID) {
t.Fatalf("expected rule file path to be unchanged; got %q instead", ar.File)
}
}
if rr, ok := r.(*RecordingRule); ok {
if rr.File != fmt.Sprintf("%d/rules.yaml", gID) {
t.Fatalf("expected rule file path to be unchanged; got %q instead", rr.File)
}
}
}
}

View File

@@ -293,9 +293,11 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
}
// add extra labels configured by user
for k := range rr.Labels {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
// do not add label with empty value to the result, as it has no meaning:
// if the label already exists in the original query result, remove it to preserve compatibility with relabeling, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766.
// otherwise, ignore the label, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984.
if rr.Labels[k] == "" {
m.DelLabel(k)
continue
}
existingLabel := promrelabel.GetLabelByName(m.Labels, k)

View File

@@ -163,11 +163,13 @@ func TestRecordingRule_Exec(t *testing.T) {
f(&RecordingRule{
Name: "job:foo",
Labels: map[string]string{
"source": "test",
"source": "test",
"empty_label": "", // this should be dropped
"pod": "", // this should remove the pod label from query result
},
}, [][]datasource.Metric{{
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin"),
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo", "pod", "vmalert-0"),
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin", "pod", "vmalert-1"),
metricWithValueAndLabels(t, 1, "__name__", "baz", "job", "baz", "source", "test"),
}}, [][]prompb.TimeSeries{{
newTimeSeries([]float64{2}, []int64{ts.UnixNano()}, []prompb.Label{

View File

@@ -121,7 +121,7 @@ func (s *ruleState) add(e StateEntry) {
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
var err error
var tss []prompb.TimeSeries
for i := 0; i < replayRuleRetryAttempts; i++ {
for i := range replayRuleRetryAttempts {
tss, err = r.execRange(context.Background(), start, end)
if err == nil {
break

View File

@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
}
var last time.Time
for i := 0; i < stateEntriesN*2; i++ {
for range stateEntriesN * 2 {
last = time.Now()
r.state.add(StateEntry{At: last})
}
@@ -65,17 +65,15 @@ func TestRule_stateConcurrent(_ *testing.T) {
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
const workers = 50
const iterations = 100
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for range iterations {
r.state.add(StateEntry{At: time.Now()})
r.state.getAll()
r.state.getLast()
}
}()
})
}
wg.Wait()
}

View File

@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
case *AlertingRule:
br, ok := b.(*AlertingRule)
if !ok {
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
}
return compareAlertingRules(t, v, br)
case *RecordingRule:
br, ok := b.(*RecordingRule)
if !ok {
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
}
return compareRecordingRules(t, v, br)
default:

View File

@@ -57,12 +57,8 @@ type ApiGroup struct {
EvalOffset float64 `json:"eval_offset,omitempty"`
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
EvalDelay float64 `json:"eval_delay,omitempty"`
// Unhealthy unhealthy rules count
Unhealthy int
// Healthy passing rules count
Healthy int
// NoMatch not matching rules count
NoMatch int
// States represents counts per each rule state
States map[string]int `json:"states"`
}
// APILink returns a link to the group's JSON representation.
@@ -134,6 +130,11 @@ type ApiRule struct {
Updates []StateEntry `json:"-"`
}
// IsNoMatch returns true if rule is in nomatch state
func (r *ApiRule) IsNoMatch() bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}
// ApiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
@@ -235,8 +236,25 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
return aa
}
func (r *ApiRule) ExtendState() {
if len(r.Alerts) > 0 {
return
}
if r.State == "" {
r.State = "ok"
}
if r.Health != "ok" {
r.State = "unhealthy"
} else if r.IsNoMatch() {
r.State = "nomatch"
}
}
// ToAPI returns ApiGroup representation of g
func (g *Group) ToAPI() *ApiGroup {
if g == nil {
return &ApiGroup{}
}
g.mu.RLock()
defer g.mu.RUnlock()
ag := ApiGroup{
@@ -252,6 +270,7 @@ func (g *Group) ToAPI() *ApiGroup {
Headers: headersToStrings(g.Headers),
NotifierHeaders: headersToStrings(g.NotifierHeaders),
Labels: g.Labels,
States: make(map[string]int),
}
if g.EvalOffset != nil {
ag.EvalOffset = g.EvalOffset.Seconds()
@@ -259,9 +278,10 @@ func (g *Group) ToAPI() *ApiGroup {
if g.EvalDelay != nil {
ag.EvalDelay = g.EvalDelay.Seconds()
}
ag.Rules = make([]ApiRule, 0)
ag.Rules = make([]ApiRule, 0, len(g.Rules))
for _, r := range g.Rules {
ag.Rules = append(ag.Rules, r.ToAPI())
ar := r.ToAPI()
ag.Rules = append(ag.Rules, ar)
}
return &ag
}

View File

@@ -11,7 +11,7 @@
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
</symbol>
<symbol id="filter" viewBox="-10 -10 320 310">
<symbol id="state" viewBox="-10 -10 320 310">
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
</symbol>

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

@@ -8,9 +8,9 @@ function actionAll(isCollapse) {
});
}
function groupFilter(key) {
function groupForState(key) {
if (key) {
location.href = `?filter=${key}`;
location.href = `?state=${key}`;
} else {
window.location = window.location.pathname;
}

View File

@@ -402,6 +402,20 @@ func templateFuncs() textTpl.FuncMap {
return t, nil
},
// formatTime formats the given Unix timestamp with the provided layout.
// For example: {{ now | formatTime "2006-01-02T15:04:05Z07:00" }}
"formatTime": func(layout string, i any) (string, error) {
v, err := toFloat64(i)
if err != nil {
return "", fmt.Errorf("formatTime: %w", err)
}
if math.IsNaN(v) || math.IsInf(v, 0) {
return "", fmt.Errorf("formatTime: cannot convert %v to time", v)
}
t := timeFromUnixTimestamp(v).Time().UTC()
return t.Format(layout), nil
},
/* URLs */
// externalURL returns value of `external.url` flag

View File

@@ -6,6 +6,7 @@ import (
"strings"
"testing"
textTpl "text/template"
"time"
)
func TestTemplateFuncs_StringConversion(t *testing.T) {
@@ -103,6 +104,26 @@ func TestTemplateFuncs_Formatting(t *testing.T) {
f("humanizeTimestamp", 1679055557, "2023-03-17 12:19:17 +0000 UTC")
}
func TestTemplateFuncs_FormatTime(t *testing.T) {
funcs := templateFuncs()
formatTime := funcs["formatTime"].(func(layout string, i any) (string, error))
f := func(layout string, input any, expected string) {
t.Helper()
result, err := formatTime(layout, input)
if err != nil {
t.Fatalf("unexpected error for formatTime(%q, %v): %s", layout, input, err)
}
if result != expected {
t.Fatalf("unexpected result for formatTime(%q, %v); got\n%s\nwant\n%s", layout, input, result, expected)
}
}
f(time.RFC3339, float64(1679055557), "2023-03-17T12:19:17Z")
f("2006-01-02T15:04:05", int64(1679055557), "2023-03-17T12:19:17")
f(time.RFC822, int(1679055557), "17 Mar 23 12:19 UTC")
}
func mkTemplate(current, replacement any) textTemplate {
tmpl := textTemplate{}
if current != nil {

View File

@@ -20,11 +20,12 @@ func AuthConfig(filterOptions ...AuthConfigOptions) (*promauth.Config, error) {
}
// WithBasicAuth returns AuthConfigOptions and initialized promauth.BasicAuthConfig based on given params
func WithBasicAuth(username, password, passwordFile string) AuthConfigOptions {
func WithBasicAuth(username, usernameFile, password, passwordFile string) AuthConfigOptions {
return func(config *promauth.HTTPClientConfig) {
if username != "" || password != "" || passwordFile != "" {
if username != "" || usernameFile != "" || password != "" || passwordFile != "" {
config.BasicAuth = &promauth.BasicAuthConfig{
Username: username,
UsernameFile: usernameFile,
Password: promauth.NewSecret(password),
PasswordFile: passwordFile,
}

View File

@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
return ""
}
var b strings.Builder
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
for i, err := range eg.errs {
b.WriteString(err.Error())
if i != len(eg.errs)-1 {

View File

@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
}
f(nil, "")
f([]error{errors.New("timeout")}, "errors(1): timeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
}
// TestErrGroupConcurrent supposed to test concurrent
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
const writersN = 4
payload := make(chan error, writersN)
for i := 0; i < writersN; i++ {
for range writersN {
go func() {
for err := range payload {
eg.Add(err)
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
}
const iterations = 500
for i := 0; i < iterations; i++ {
for i := range iterations {
payload <- fmt.Errorf("error %d", i)
if i%10 == 0 {
_ = eg.Err()

View File

@@ -1,9 +1,11 @@
package main
import (
"cmp"
"embed"
"encoding/json"
"fmt"
"math"
"net/http"
"slices"
"strconv"
@@ -50,6 +52,13 @@ var (
"alert": rule.TypeAlerting,
"record": rule.TypeRecording,
}
// The "recovering", "noData", "normal", "error" states are used by Grafana.
// Ignore "recovering" since it is not currently acknowledged by vmalert,
// treat "noData" as an alias for "nomatch",
// treat "normal" as an alias for "inactive",
// treat "error" as an alias for "unhealthy"
ruleStates = []string{"ok", "nomatch", "inactive", "firing", "pending", "unhealthy", "recovering", "noData", "normal", "error"}
)
type requestHandler struct {
@@ -63,6 +72,14 @@ var (
staticServer = http.StripPrefix("/vmalert", staticHandler)
)
func marshalJson(v any, kind string) ([]byte, *httpserver.ErrorWithStatusCode) {
data, err := json.Marshal(v)
if err != nil {
return nil, errResponse(fmt.Errorf("failed to marshal %s: %s", kind, err), http.StatusInternalServerError)
}
return data, nil
}
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
staticServer.ServeHTTP(w, r)
@@ -94,40 +111,32 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
httpserver.Errorf(w, r, "%s", err)
return true
}
WriteRuleDetails(w, r, rule)
WriteRule(w, r, rule)
return true
case "/vmalert/groups":
// current used by old vmalert UI and Grafana Alerts
case "/vmalert/groups", "/rules":
rf, err := newRulesFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
data := rh.groups(rf)
WriteListGroups(w, r, data, rf.filter)
// only support filtering by a single state
state := ""
if len(rf.states) > 0 {
state = rf.states[0]
rf.states = rf.states[:1]
}
lr := rh.groups(rf)
WriteListGroups(w, r, lr.Data.Groups, state)
return true
case "/vmalert/notifiers":
WriteListTargets(w, r, notifier.GetTargets())
return true
// special cases for Grafana requests,
// served without `vmalert` prefix:
case "/rules":
// Grafana makes an extra request to `/rules`
// handler in addition to `/api/v1/rules` calls in alerts UI
var data []*rule.ApiGroup
rf, err := newRulesFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
data = rh.groups(rf)
WriteListGroups(w, r, data, rf.filter)
return true
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
data, err := rh.listNotifiers()
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -135,15 +144,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
return true
case "/vmalert/api/v1/rules", "/api/v1/rules":
// path used by Grafana for ng alerting
var data []byte
rf, err := newRulesFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
data, err = rh.listGroups(rf)
data, err := rh.listGroups(rf)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -152,14 +160,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
// path used by Grafana for ng alerting
rf, err := newRulesFilter(r)
gf, err := newGroupsFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
data, err := rh.listAlerts(rf)
data, err := rh.listAlerts(gf)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -168,12 +176,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/vmalert/api/v1/alert", "/api/v1/alert":
alert, err := rh.getAlert(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
data, err := json.Marshal(alert)
data, err := marshalJson(alert, "alert")
if err != nil {
httpserver.Errorf(w, r, "failed to marshal alert: %s", err)
errJson(w, r, err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -182,16 +190,16 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/vmalert/api/v1/rule", "/api/v1/rule":
apiRule, err := rh.getRule(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
rwu := rule.ApiRuleWithUpdates{
ApiRule: apiRule,
StateUpdates: apiRule.Updates,
}
data, err := json.Marshal(rwu)
data, err := marshalJson(rwu, "rule")
if err != nil {
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
errJson(w, r, err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -200,12 +208,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/vmalert/api/v1/group", "/api/v1/group":
group, err := rh.getGroup(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
errJson(w, r, err)
return true
}
data, err := json.Marshal(group)
data, err := marshalJson(group, "group")
if err != nil {
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
errJson(w, r, err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -225,10 +233,10 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
}
}
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver.ErrorWithStatusCode) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
}
obj, err := rh.m.groupAPI(groupID)
if err != nil {
@@ -237,14 +245,14 @@ func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
return obj, nil
}
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.ErrorWithStatusCode) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
}
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err), http.StatusBadRequest)
}
obj, err := rh.m.ruleAPI(groupID, ruleID)
if err != nil {
@@ -253,14 +261,14 @@ func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
return obj, nil
}
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver.ErrorWithStatusCode) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
}
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err), http.StatusBadRequest)
}
a, err := rh.m.alertAPI(groupID, alertID)
if err != nil {
@@ -270,28 +278,76 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
}
type listGroupsResponse struct {
Status string `json:"status"`
Data struct {
Status string `json:"status"`
Page int `json:"page,omitempty"`
TotalPages int `json:"total_pages,omitempty"`
TotalGroups int `json:"total_groups,omitempty"`
TotalRules int `json:"total_rules,omitempty"`
Data struct {
Groups []*rule.ApiGroup `json:"groups"`
} `json:"data"`
}
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
type rulesFilter struct {
files []string
groupNames []string
ruleNames []string
ruleType string
excludeAlerts bool
filter string
dsType config.Type
type groupsFilter struct {
groupNames []string
files []string
dsType config.Type
}
func newRulesFilter(r *http.Request) (*rulesFilter, error) {
rf := &rulesFilter{}
query := r.URL.Query()
func newGroupsFilter(r *http.Request) (*groupsFilter, *httpserver.ErrorWithStatusCode) {
_ = r.ParseForm()
vs := r.Form
gf := &groupsFilter{
groupNames: vs["rule_group[]"],
files: vs["file[]"],
}
dsType := vs.Get("datasource_type")
if len(dsType) > 0 {
if config.SupportedType(dsType) {
gf.dsType = config.NewRawType(dsType)
} else {
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
}
}
return gf, nil
}
ruleTypeParam := query.Get("type")
func (gf *groupsFilter) matches(group *rule.Group) bool {
if len(gf.groupNames) > 0 && !slices.Contains(gf.groupNames, group.Name) {
return false
}
if len(gf.files) > 0 && !slices.Contains(gf.files, group.File) {
return false
}
if len(gf.dsType.Name) > 0 && gf.dsType.String() != group.Type.String() {
return false
}
return true
}
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
type rulesFilter struct {
gf *groupsFilter
ruleNames []string
ruleType string
excludeAlerts bool
states []string
maxGroups int
pageNum int
search string
extendedStates bool
}
func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusCode) {
gf, err := newGroupsFilter(r)
if err != nil {
return nil, err
}
var rf rulesFilter
rf.gf = gf
vs := r.Form
ruleTypeParam := vs.Get("type")
if len(ruleTypeParam) > 0 {
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
rf.ruleType = ruleType
@@ -300,102 +356,155 @@ func newRulesFilter(r *http.Request) (*rulesFilter, error) {
}
}
dsType := query.Get("datasource_type")
if len(dsType) > 0 {
if config.SupportedType(dsType) {
rf.dsType = config.NewRawType(dsType)
} else {
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
}
states := vs["state"]
if len(states) == 0 {
states = vs["filter"]
}
filter := strings.ToLower(query.Get("filter"))
if len(filter) > 0 {
if filter == "nomatch" || filter == "unhealthy" {
rf.filter = filter
} else {
return nil, errResponse(fmt.Errorf(`invalid parameter "filter": not supported value %q`, filter), http.StatusBadRequest)
for _, s := range states {
values := strings.Split(s, ",")
for _, v := range values {
if len(v) == 0 {
continue
}
if !slices.Contains(ruleStates, v) {
return nil, errResponse(fmt.Errorf(`invalid parameter "state": contains not supported value %q`, v), http.StatusBadRequest)
}
// Replace grafana states with supported internal states
switch v {
case "noData":
v = "nomatch"
case "normal":
v = "inactive"
case "error":
v = "unhealthy"
}
rf.states = append(rf.states, v)
}
}
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
rf.files = append([]string{}, r.Form["file[]"]...)
return rf, nil
rf.extendedStates = httputil.GetBool(r, "extended_states")
rf.ruleNames = append([]string{}, vs["rule_name[]"]...)
rf.search = strings.ToLower(vs.Get("search"))
pageNum := vs.Get("page_num")
maxGroups := vs.Get("group_limit")
if pageNum != "" {
if maxGroups == "" {
return nil, errResponse(fmt.Errorf(`"group_limit" needs to be present in order to paginate over the groups`), http.StatusBadRequest)
}
v, err := strconv.Atoi(pageNum)
if err != nil || v <= 0 {
return nil, errResponse(fmt.Errorf(`"page_num" is expected to be a positive number, found %q`, pageNum), http.StatusBadRequest)
}
rf.pageNum = v
}
if maxGroups != "" {
v, err := strconv.Atoi(maxGroups)
if err != nil || v <= 0 {
return nil, errResponse(fmt.Errorf(`"group_limit" is expected to be a positive number, found %q`, maxGroups), http.StatusBadRequest)
}
rf.maxGroups = v
}
return &rf, nil
}
func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
func (rf *rulesFilter) matchesRule(r *rule.ApiRule) bool {
if rf.ruleType != "" && rf.ruleType != r.Type {
return false
}
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, r.Name) {
return false
}
if len(rf.dsType.Name) > 0 && rf.dsType.String() != group.Type.String() {
return false
if len(rf.states) == 0 {
return true
}
return true
return slices.Contains(rf.states, r.State)
}
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
func (rh *requestHandler) groups(rf *rulesFilter) *listGroupsResponse {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
groups := make([]*rule.ApiGroup, 0)
skipGroups := (rf.pageNum - 1) * rf.maxGroups
lr := &listGroupsResponse{
Status: "success",
}
lr.Data.Groups = make([]*rule.ApiGroup, 0)
if skipGroups >= len(rh.m.groups) {
return lr
}
// sort list of groups for deterministic output
groups := make([]*rule.Group, 0, len(rh.m.groups))
for _, group := range rh.m.groups {
if !rf.matchesGroup(group) {
groups = append(groups, group)
}
slices.SortFunc(groups, func(a, b *rule.Group) int {
nameCmp := cmp.Compare(a.Name, b.Name)
if nameCmp != 0 {
return nameCmp
}
return cmp.Compare(a.File, b.File)
})
for _, group := range groups {
if !rf.gf.matches(group) {
continue
}
groupFound := len(rf.search) == 0 || strings.Contains(strings.ToLower(group.Name), rf.search) || strings.Contains(strings.ToLower(group.File), rf.search)
g := group.ToAPI()
// the returned list should always be non-nil
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
filteredRules := make([]rule.ApiRule, 0)
for _, rule := range g.Rules {
if rf.ruleType != "" && rf.ruleType != rule.Type {
if !groupFound && !strings.Contains(strings.ToLower(rule.Name), rf.search) {
continue
}
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, rule.Name) {
continue
if rf.extendedStates {
rule.ExtendState()
}
if (rule.LastError == "" && rf.filter == "unhealthy") || (!isNoMatch(rule) && rf.filter == "nomatch") {
if !rf.matchesRule(&rule) {
continue
}
if rf.excludeAlerts {
rule.Alerts = nil
}
if rule.LastError != "" {
g.Unhealthy++
} else {
g.Healthy++
}
if isNoMatch(rule) {
g.NoMatch++
}
g.States[rule.State]++
filteredRules = append(filteredRules, rule)
}
g.Rules = filteredRules
groups = append(groups, g)
}
// sort list of groups for deterministic output
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
if a.Name != b.Name {
return strings.Compare(a.Name, b.Name)
if len(g.Rules) == 0 || len(filteredRules) > 0 {
if rf.maxGroups > 0 {
lr.TotalGroups++
lr.TotalRules += len(filteredRules)
}
if skipGroups > 0 {
skipGroups--
continue
}
if rf.maxGroups == 0 || len(lr.Data.Groups) < rf.maxGroups {
g.Rules = filteredRules
lr.Data.Groups = append(lr.Data.Groups, g)
}
}
return strings.Compare(a.File, b.File)
})
return groups
}
if rf.maxGroups > 0 {
lr.Page = rf.pageNum
lr.TotalPages = max(int(math.Ceil(float64(lr.TotalGroups)/float64(rf.maxGroups))), 1)
}
return lr
}
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
lr := listGroupsResponse{Status: "success"}
lr.Data.Groups = rh.groups(rf)
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
lr := rh.groups(rf)
if rf.pageNum > 1 && len(lr.Data.Groups) == 0 {
return nil, errResponse(fmt.Errorf(`page_num exceeds total amount of pages`), http.StatusBadRequest)
}
if lr.Page > lr.TotalPages {
return nil, errResponse(fmt.Errorf(`page_num=%d exceeds total amount of pages in result=%d`, lr.Page, lr.TotalPages), http.StatusBadRequest)
}
b, err := json.Marshal(lr)
if err != nil {
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
StatusCode: http.StatusInternalServerError,
}
return nil, errResponse(fmt.Errorf(`error encoding list of groups: %w`, err), http.StatusInternalServerError)
}
return b, nil
}
@@ -434,14 +543,14 @@ func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
return gAlerts
}
func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
lr := listAlertsResponse{Status: "success"}
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
for _, group := range rh.m.groups {
if !rf.matchesGroup(group) {
if !gf.matches(group) {
continue
}
g := group.ToAPI()
@@ -460,10 +569,7 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
b, err := json.Marshal(lr)
if err != nil {
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
StatusCode: http.StatusInternalServerError,
}
return nil, errResponse(fmt.Errorf(`error encoding list of active alerts: %w`, err), http.StatusInternalServerError)
}
return b, nil
}
@@ -475,7 +581,7 @@ type listNotifiersResponse struct {
} `json:"data"`
}
func (rh *requestHandler) listNotifiers() ([]byte, error) {
func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCode) {
targets := notifier.GetTargets()
lr := listNotifiersResponse{Status: "success"}
@@ -497,10 +603,7 @@ func (rh *requestHandler) listNotifiers() ([]byte, error) {
b, err := json.Marshal(lr)
if err != nil {
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`error encoding list of notifiers: %w`, err),
StatusCode: http.StatusInternalServerError,
}
return nil, errResponse(fmt.Errorf(`error encoding list of notifiers: %w`, err), http.StatusInternalServerError)
}
return b, nil
}
@@ -511,3 +614,8 @@ func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
StatusCode: sc,
}
}
func errJson(w http.ResponseWriter, r *http.Request, err *httpserver.ErrorWithStatusCode) {
w.Header().Set("Content-Type", "application/json")
httpserver.Errorf(w, r, `{"error":%q,"errorType":%d}`, err, err.StatusCode)
}

View File

@@ -9,9 +9,10 @@
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
) %}
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
{% func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) %}
<div class="btn-toolbar mb-3" role="toolbar">
<div class="d-flex gap-2 justify-content-between w-100">
<div class="d-flex gap-2 align-items-center">
@@ -27,10 +28,10 @@
<use href="{%s prefix %}static/icons/icons.svg#expand"/>
</svg>
</a>
{% if len(filters) > 0 %}
{% if len(states) > 0 %}
<span class="d-none d-md-inline-block">Filter by status:</span>
<svg class="d-md-none" width="20" height="20">
<use href="{%s prefix %}static/icons/icons.svg#filter">
<use href="{%s prefix %}static/icons/icons.svg#state">
</svg>
<div class="dropdown">
<button
@@ -45,10 +46,10 @@
</svg>
</button>
<ul class="dropdown-menu">
{% for key, title := range filters %}
{% for key, title := range states %}
{% if title != currentText %}
<li>
<a class="dropdown-item" onclick="groupFilter('{%s key %}')">
<a class="dropdown-item" onclick="groupForState('{%s key %}')">
<span class="d-none d-md-inline-block">{%s title %}</span>
<svg class="d-md-none" width="22" height="22">
<use href="{%s prefix %}static/icons/icons.svg#{%s icons[key] %}"/>
@@ -78,6 +79,8 @@
{% func Welcome(r *http.Request) %}
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
<p>
Version {%s buildinfo.Version %} <br>
API:<br>
{% for _, p := range apiLinks %}
{%code p, doc := p[0], p[1] %}
@@ -94,10 +97,10 @@
{%= tpl.Footer(r) %}
{% endfunc %}
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) %}
{%code
prefix := vmalertutil.Prefix(r.URL.Path)
filters := map[string]string{
states := map[string]string{
"": "All",
"unhealthy": "Unhealthy",
"nomatch": "No Match",
@@ -107,14 +110,14 @@
"unhealthy": "unhealthy",
"nomatch": "nomatch",
}
currentText := filters[filter]
currentIcon := icons[filter]
currentText := states[state]
currentIcon := icons[state]
%}
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
{%= Controls(prefix, currentIcon, currentText, icons, states, true) %}
{% if len(groups) > 0 %}
{% for _, g := range groups %}
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.States["unhealthy"] > 0 %} alert-danger{% endif %}">
<span class="d-flex justify-content-between">
<a
class="vm-group-search"
@@ -127,9 +130,9 @@
data-bs-target="#item-{%s g.ID %}"
>
<span class="d-flex gap-2">
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
{% if g.NoMatch > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.NoMatch %}</span> {% endif %}
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.Healthy %}</span>
{% if g.States["unhealthy"] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.States["unhealthy"] %}</span> {% endif %}
{% if g.States["nomatch"] > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.States["nomatch"] %}</span> {% endif %}
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.States["ok"] %}</span>
</span>
</span>
</span>
@@ -186,7 +189,7 @@
<b>record:</b> {%s r.Name %}
{% endif %}
|
{%= seriesFetchedWarn(prefix, r) %}
{%= seriesFetchedWarn(prefix, &r) %}
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
</div>
<div class="col-12">
@@ -473,7 +476,7 @@
{% endfunc %}
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
{% func Rule(r *http.Request, rule rule.ApiRule) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -658,8 +661,8 @@
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
{% endfunc %}
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
{% if isNoMatch(r) %}
{% func seriesFetchedWarn(prefix string, r *rule.ApiRule) %}
{% if r.IsNoMatch() %}
<svg
data-bs-toggle="tooltip"
title="No match! This rule's last evaluation hasn't selected any time series from the datasource.
@@ -670,9 +673,3 @@
</svg>
{% endif %}
{% endfunc %}
{%code
func isNoMatch (r rule.ApiRule) bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}
%}

File diff suppressed because it is too large Load Diff

View File

@@ -210,7 +210,7 @@ func TestHandler(t *testing.T) {
}
})
t.Run("/api/v1/rules&filters", func(t *testing.T) {
t.Run("/api/v1/rules&states", func(t *testing.T) {
check := func(url string, statusCode, expGroups, expRules int) {
t.Helper()
lr := listGroupsResponse{}
@@ -252,9 +252,15 @@ func TestHandler(t *testing.T) {
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 3, 0)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 0, 0)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
check("/api/v1/rules?group_limit=1", 200, 1, 2)
check("/api/v1/rules?group_limit=1&type=alert", 200, 1, 1)
check("/api/v1/rules?group_limit=1&type=record", 200, 1, 1)
check("/api/v1/rules?group_limit=2", 200, 2, 4)
check(fmt.Sprintf("/api/v1/rules?group_limit=1&page_num=%d", 1), 200, 1, 2)
})
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
// check if response returns active alerts by default

View File

@@ -13,6 +13,7 @@ import (
"net/url"
"os"
"regexp"
"slices"
"sort"
"strconv"
"strings"
@@ -28,6 +29,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -65,10 +67,11 @@ type AuthConfig struct {
type UserInfo struct {
Name string `yaml:"name,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
AuthToken string `yaml:"auth_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
JWT *JWTConfig `yaml:"jwt,omitempty"`
AuthToken string `yaml:"auth_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
@@ -89,6 +92,8 @@ type UserInfo struct {
MetricLabels map[string]string `yaml:"metric_labels,omitempty"`
AccessLog *AccessLog `yaml:"access_log,omitempty"`
concurrencyLimitCh chan struct{}
concurrencyLimitReached *metrics.Counter
@@ -101,11 +106,40 @@ type UserInfo struct {
requestsDuration *metrics.Summary
}
// AccessLog represents configuration for access log settings.
type AccessLog struct {
Filters *AccessLogFilters `yaml:"filters"`
}
// AccessLogFilters represents list of filters for access logs printing
type AccessLogFilters struct {
// SkipStatusCodes is a list of HTTP status codes for which access logs will be skipped
SkipStatusCodes []int `yaml:"skip_status_codes"`
}
func (ui *UserInfo) logRequest(r *http.Request, userName string, statusCode int, duration time.Duration) {
if ui.AccessLog == nil {
return
}
filters := ui.AccessLog.Filters
if filters != nil && len(filters.SkipStatusCodes) > 0 {
if slices.Contains(filters.SkipStatusCodes, statusCode) {
return
}
}
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Infof("access_log request_host=%q request_uri=%q status_code=%d remote_addr=%s user_agent=%q referer=%q duration_ms=%d username=%q",
r.Host, requestURI, statusCode, remoteAddr, r.UserAgent(), r.Referer(), duration.Milliseconds(), userName)
}
// HeadersConf represents config for request and response headers.
type HeadersConf struct {
RequestHeaders []*Header `yaml:"headers,omitempty"`
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
RequestHeaders []*Header `yaml:"headers,omitempty"`
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
hasAnyPlaceHolders bool
}
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
@@ -113,10 +147,8 @@ func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
default:
ui.concurrencyLimitReached.Inc()
// The per-user limit for the number of concurrent requests is reached.
// Wait until the currently executed requests are finished, so the current request could be executed.
// The number of concurrently executed requests for the given user equals the limit.
// Wait until some of the currently executed requests are finished, so the current request could be executed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case ui.concurrencyLimitCh <- struct{}{}:
@@ -124,6 +156,8 @@ func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
}
@@ -150,12 +184,22 @@ func (ui *UserInfo) stopHealthChecks() {
if ui == nil {
return
}
if ui.URLPrefix == nil {
return
}
bus := ui.URLPrefix.bus.Load()
bus.stopHealthChecks()
if ui.URLPrefix != nil {
bus := ui.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
if ui.DefaultURL != nil {
bus := ui.DefaultURL.bus.Load()
bus.stopHealthChecks()
}
for i := range ui.URLMaps {
um := &ui.URLMaps[i]
if um.URLPrefix != nil {
bus := um.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
}
}
// Header is `Name: Value` http header, which must be added to the proxied request.
@@ -318,43 +362,68 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
}
type backendURLs struct {
healthChecksContext context.Context
healthChecksCancel func()
healthChecksWG sync.WaitGroup
bhc backendHealthCheck
bus []*backendURL
}
type backendHealthCheck struct {
ctx context.Context
// mu protects fields below
cancel func()
mu sync.Mutex
isStopped bool
wg sync.WaitGroup
}
func (bhc *backendHealthCheck) run(hc func()) {
bhc.mu.Lock()
defer bhc.mu.Unlock()
if bhc.isStopped {
return
}
bhc.wg.Go(hc)
}
func (bhc *backendHealthCheck) stop() {
bhc.mu.Lock()
bhc.cancel()
bhc.isStopped = true
bhc.mu.Unlock()
bhc.wg.Wait()
}
func newBackendURLs() *backendURLs {
ctx, cancel := context.WithCancel(context.Background())
return &backendURLs{
healthChecksContext: ctx,
healthChecksCancel: cancel,
bhc: backendHealthCheck{
ctx: ctx,
cancel: cancel,
},
}
}
func (bus *backendURLs) add(u *url.URL) {
bus.bus = append(bus.bus, &backendURL{
url: u,
healthCheckContext: bus.healthChecksContext,
healthCheckWG: &bus.healthChecksWG,
url: u,
bhc: &bus.bhc,
hasPlaceHolders: hasAnyPlaceholders(u),
})
}
func (bus *backendURLs) stopHealthChecks() {
bus.healthChecksCancel()
bus.healthChecksWG.Wait()
bus.bhc.stop()
}
type backendURL struct {
broken atomic.Bool
healthCheckContext context.Context
healthCheckWG *sync.WaitGroup
bhc *backendHealthCheck
concurrentRequests atomic.Int32
url *url.URL
hasPlaceHolders bool
}
func (bu *backendURL) isBroken() bool {
@@ -363,12 +432,10 @@ func (bu *backendURL) isBroken() bool {
func (bu *backendURL) setBroken() {
if bu.broken.CompareAndSwap(false, true) {
bu.healthCheckWG.Add(1)
go func() {
defer bu.healthCheckWG.Done()
bu.bhc.run(func() {
bu.runHealthCheck()
bu.broken.Store(false)
}()
})
}
}
@@ -387,11 +454,11 @@ func (bu *backendURL) runHealthCheck() {
case <-t.C:
// Verify network connectivity via TCP dial before marking backend healthy.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
ctx, cancel := context.WithTimeout(bu.bhc.ctx, time.Second)
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
cancel()
if err != nil {
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
if errors.Is(bu.bhc.ctx.Err(), context.Canceled) {
return
}
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
@@ -400,7 +467,7 @@ func (bu *backendURL) runHealthCheck() {
_ = c.Close()
return
case <-bu.healthCheckContext.Done():
case <-bu.bhc.ctx.Done():
return
}
}
@@ -543,6 +610,7 @@ func areEqualBackendURLs(a, b []*backendURL) bool {
}
// getFirstAvailableBackendURL returns the first available backendURL, which isn't broken.
// If all backendURLs are broken, then returns the first backendURL.
//
// backendURL.put() must be called on the returned backendURL after the request is complete.
func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
@@ -561,26 +629,27 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
return bu
}
}
return nil
// All backend urls are unavailable, then returning a first one, it could help increase the success rate of the requests。
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10837#issuecomment-4307050980.
bu.get()
return bu
}
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
// If all backendURLs are broken, then returns the first backendURL.
//
// backendURL.put() must be called on the returned backendURL after the request is complete.
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
firstBu := bus[0]
if len(bus) == 1 {
// Fast path - return the only backend url.
bu := bus[0]
if bu.isBroken() {
return nil
}
bu.get()
return bu
firstBu.get()
return firstBu
}
// Slow path - select other backend urls.
n := atomicCounter.Add(1) - 1
for i := uint32(0); i < uint32(len(bus)); i++ {
for i := range uint32(len(bus)) {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
if bu.isBroken() {
@@ -590,7 +659,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
atomicCounter.CompareAndSwap(n+1, idx+1)
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
// There is no need in the call bu.get(), because we already incremented bu.concurrentRequests above.
return bu
}
}
@@ -613,7 +682,10 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
}
buMin := bus[buMinIdx]
if buMin.isBroken() {
return nil
// If all backendURLs are broken, then returns the first backendURL.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10837#issuecomment-4307050980.
firstBu.get()
return firstBu
}
buMin.get()
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
@@ -733,11 +805,9 @@ func initAuthConfig() {
configTimestamp.Set(fasttime.UnixTimestamp())
stopCh = make(chan struct{})
authConfigWG.Add(1)
go func() {
defer authConfigWG.Done()
authConfigWG.Go(func() {
authConfigReloader(sighupCh)
}()
})
}
func stopAuthConfig() {
@@ -793,6 +863,9 @@ var (
// authUsers contains the currently loaded auth users
authUsers atomic.Pointer[map[string]*UserInfo]
// jwt authentication cache
jwtAuthCache atomic.Pointer[jwtCache]
authConfigWG sync.WaitGroup
stopCh chan struct{}
)
@@ -832,6 +905,16 @@ func reloadAuthConfigData(data []byte) (bool, error) {
return false, fmt.Errorf("failed to parse auth config: %w", err)
}
jui, oidcDP, err := parseJWTUsers(ac)
if err != nil {
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
}
oidcDP.startDiscovery()
jwtc := &jwtCache{
users: jui,
oidcDP: oidcDP,
}
m, err := parseAuthConfigUsers(ac)
if err != nil {
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
@@ -848,9 +931,15 @@ func reloadAuthConfigData(data []byte) (bool, error) {
}
metrics.RegisterSet(ac.ms)
jwtcPrev := jwtAuthCache.Load()
if jwtcPrev != nil {
jwtcPrev.oidcDP.stopDiscovery()
}
authConfig.Store(ac)
authConfigData.Store(&data)
authUsers.Store(&m)
jwtAuthCache.Store(jwtc)
return true, nil
}
@@ -875,12 +964,18 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
if ui.BearerToken != "" {
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
}
if ui.JWT != nil {
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
}
if ui.AuthToken != "" {
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
}
if ui.Name != "" {
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
}
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
return nil, err
}
if err := ui.initURLs(); err != nil {
return nil, err
}
@@ -921,16 +1016,27 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
}
for i := range uis {
ui := &uis[i]
// users with jwt tokens are parsed by parseJWTUsers function.
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
if ui.JWT != nil {
continue
}
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
if err != nil {
return nil, err
}
for _, at := range ats {
if uiOld := byAuthToken[at]; uiOld != nil {
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
at, ui.Username, ui.Name, uiOld.Username, uiOld.Name)
}
}
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
return nil, err
}
if err := ui.initURLs(); err != nil {
return nil, err
}
@@ -1030,6 +1136,7 @@ func (ui *UserInfo) initURLs() error {
return err
}
}
for _, e := range ui.URLMaps {
if len(e.SrcPaths) == 0 && len(e.SrcHosts) == 0 && len(e.SrcQueryArgs) == 0 && len(e.SrcHeaders) == 0 {
return fmt.Errorf("missing `src_paths`, `src_hosts`, `src_query_args` and `src_headers` in `url_map`")
@@ -1089,6 +1196,9 @@ func (ui *UserInfo) name() string {
h := xxhash.Sum64([]byte(ui.AuthToken))
return fmt.Sprintf("auth_token:hash:%016X", h)
}
if ui.JWT != nil {
return `jwt`
}
return ""
}

View File

@@ -4,8 +4,11 @@ import (
"bytes"
"fmt"
"net"
"net/http"
"net/url"
"strings"
"testing"
"time"
"gopkg.in/yaml.v2"
@@ -276,6 +279,50 @@ users:
url_prefix: http://foo.bar
metric_labels:
not-prometheus-compatible: value
`)
// placeholder in url_prefix
f(`
users:
- username: foo
password: bar
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
`)
// placeholder in a header
f(`
users:
- username: foo
password: bar
headers:
- 'X-Foo: {{a_placeholder}}'
url_prefix: 'http://ahost'
`)
// placeholder in url_prefix
f(`
users:
- username: foo
password: bar
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
`)
// placeholder in a header in url_map
f(`
users:
- username: foo
password: bar
url_map:
- src_paths: ["/select/.*"]
headers:
- 'X-Foo: {{a_placeholder}}'
url_prefix: 'http://ahost'
`)
// placeholder in a header in url_map
f(`
users:
- username: foo
password: bar
url_map:
- src_paths: ["/select/.*"]
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
`)
}
@@ -378,7 +425,7 @@ users:
RetryStatusCodes: []int{500, 501},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
DropSrcPathPrefixParts: intp(1),
DropSrcPathPrefixParts: new(1),
DiscoverBackendIPs: &discoverBackendIPsTrue,
},
}, nil)
@@ -621,6 +668,47 @@ unauthorized_user:
},
},
})
// skip user info with jwt, it is parsed by parseJWTUsers
f(`
users:
- username: foo
password: bar
url_prefix: http://aaa:343/bbb
- jwt: {skip_verify: true}
url_prefix: http://aaa:343/bbb
`, map[string]*UserInfo{
getHTTPAuthBasicToken("foo", "bar"): {
Username: "foo",
Password: "bar",
URLPrefix: mustParseURL("http://aaa:343/bbb"),
},
}, nil)
// Multiple users with access logs enabled
f(`
users:
- username: foo
url_prefix: http://foo
access_log: {}
- username: bar
url_prefix: https://bar/x/
access_log:
filters:
skip_status_codes: [404]
`, map[string]*UserInfo{
getHTTPAuthBasicToken("foo", ""): {
Username: "foo",
URLPrefix: mustParseURL("http://foo"),
AccessLog: &AccessLog{},
},
getHTTPAuthBasicToken("bar", ""): {
Username: "bar",
URLPrefix: mustParseURL("https://bar/x/"),
AccessLog: &AccessLog{Filters: &AccessLogFilters{SkipStatusCodes: []int{404}}},
},
}, nil)
}
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
@@ -831,7 +919,7 @@ func TestBrokenBackend(t *testing.T) {
bus[1].setBroken()
// broken backend should never return while there are healthy backends
for i := 0; i < 1e3; i++ {
for range int(1e3) {
b := up.getBackendURL()
if b.isBroken() {
t.Fatalf("unexpected broken backend %q", b.url)
@@ -908,6 +996,68 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
}
func TestLogRequest(t *testing.T) {
ui := &UserInfo{AccessLog: &AccessLog{}}
testOutput := &bytes.Buffer{}
logger.SetOutputForTests(testOutput)
defer logger.ResetOutputForTest()
req, err := http.NewRequest("GET", "http://localhost:8080/select/0/prometheus", nil)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
f := func(user string, status int, duration time.Duration, expectedLog string) {
t.Helper()
testOutput.Reset()
ui.logRequest(req, user, status, duration)
got := testOutput.String()
if expectedLog == "" && got != "" {
t.Fatalf("expected empty log, got %q", got)
}
if !strings.Contains(got, expectedLog) {
t.Fatalf("output \n%q \nshould contain \n%q", testOutput.String(), expectedLog)
}
}
f("foo", 200, 10*time.Millisecond, `access_log request_host="localhost:8080" request_uri="" status_code=200 remote_addr="" user_agent="" referer="" duration_ms=10 username="foo"`)
f("foo", 404, time.Second, `access_log request_host="localhost:8080" request_uri="" status_code=404 remote_addr="" user_agent="" referer="" duration_ms=1000 username="foo"`)
ui.AccessLog.Filters = &AccessLogFilters{SkipStatusCodes: []int{200}}
f("foo", 200, 10*time.Millisecond, ``)
f("foo", 404, 10*time.Millisecond, `access_log request_host="localhost:8080" request_uri="" status_code=404 remote_addr="" user_agent="" referer="" duration_ms=10 username="foo"`)
}
func TestGetFirstAvailableBackend(t *testing.T) {
f := func(broken []bool, expectedIdx int) {
t.Helper()
bus := make([]*backendURL, len(broken))
for i := range broken {
bus[i] = &backendURL{
url: &url.URL{Host: fmt.Sprintf("server-%d", i)},
}
bus[i].broken.Store(broken[i])
}
bu := getFirstAvailableBackendURL(bus)
if bu == nil {
t.Fatalf("unexpected nil backend")
}
if bu.url.Host != fmt.Sprintf("server-%d", expectedIdx) {
t.Fatalf("unexpected backend, expected server-%d, got %s", expectedIdx, bu.url.Host)
}
}
f([]bool{false, false, false}, 0)
f([]bool{true, true, false}, 2)
// all backend are broken, then return the first one.
f([]bool{true, true, true}, 0)
f([]bool{true}, 0)
}
func getRegexs(paths []string) []*Regex {
var sps []*Regex
for _, path := range paths {
@@ -963,10 +1113,6 @@ func mustParseURLs(us []string) *URLPrefix {
return up
}
func intp(n int) *int {
return &n
}
func mustNewRegex(s string) *Regex {
var re Regex
if err := yaml.Unmarshal([]byte(s), &re); err != nil {

View File

@@ -116,6 +116,20 @@ users:
- "http://default1:8888/unsupported_url_handler"
- "http://default2:8888/unsupported_url_handler"
# A JWT token based routing:
# - Requests with JWT token that has the following structure:
# {"team": "ops", "security": {"read_access": "1"}, "vm_access": {"metrics_account_id": 1000,"metrics_project_id":5}}
# is routed to vmselect nodes and request url placeholder replaced with metrics tenant identificators
- name: jwt-opts-team
jwt:
match_claims:
team: ops
security.read_access: "1"
skip_verify: true
url_prefix:
- "http://vmselect1:8481/select/{{.MetricsTenant}}/prometheus"
- "http://vmselect2:8481/select/{{.MetricsTenant}}/prometheus"
# Requests without Authorization header are proxied according to `unauthorized_user` section.
# Requests are proxied in round-robin fashion between `url_prefix` backends.
# The deny_partial_response query arg is added to all the proxied requests.
@@ -125,3 +139,8 @@ unauthorized_user:
- http://vmselect-az1/?deny_partial_response=1
- http://vmselect-az2/?deny_partial_response=1
retry_status_codes: [503, 500]
# log access for requests routed to this user
access_log:
filters:
# except requests with Status Codes below
skip_status_codes: [200, 202]

486
app/vmauth/jwt.go Normal file
View File

@@ -0,0 +1,486 @@
package main
import (
"fmt"
"net/url"
"os"
"slices"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
const (
metricsTenantPlaceholder = `{{.MetricsTenant}}`
metricsExtraLabelsPlaceholder = `{{.MetricsExtraLabels}}`
metricsExtraFiltersPlaceholder = `{{.MetricsExtraFilters}}`
logsAccountIDPlaceholder = `{{.LogsAccountID}}`
logsProjectIDPlaceholder = `{{.LogsProjectID}}`
logsExtraFiltersPlaceholder = `{{.LogsExtraFilters}}`
logsExtraStreamFiltersPlaceholder = `{{.LogsExtraStreamFilters}}`
placeholderPrefix = `{{`
)
var allPlaceholders = []string{
metricsTenantPlaceholder,
metricsExtraLabelsPlaceholder,
metricsExtraFiltersPlaceholder,
logsAccountIDPlaceholder,
logsProjectIDPlaceholder,
logsExtraFiltersPlaceholder,
logsExtraStreamFiltersPlaceholder,
}
var urlPathPlaceHolders = []string{
metricsTenantPlaceholder,
logsAccountIDPlaceholder,
logsProjectIDPlaceholder,
}
type jwtCache struct {
// users contain UserInfo`s from AuthConfig with JWTConfig set
users []*UserInfo
oidcDP *oidcDiscovererPool
}
type JWTConfig struct {
PublicKeys []string `yaml:"public_keys,omitempty"`
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
SkipVerify bool `yaml:"skip_verify,omitempty"`
OIDC *oidcConfig `yaml:"oidc,omitempty"`
MatchClaims map[string]string `yaml:"match_claims,omitempty"`
parsedMatchClaims []*jwt.Claim
// verifierPool is used to verify JWT tokens.
// It is initialized from PublicKeys and/or PublicKeyFiles.
// In this case, it is initialized once at config reload and never updated until next reload
// In case of OIDC, it is initialized on config reload and periodically updated by discovery process.
verifierPool atomic.Pointer[jwt.VerifierPool]
}
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
jui := make([]*UserInfo, 0, len(ac.Users))
oidcDP := &oidcDiscovererPool{}
uniqClaims := make(map[string]*UserInfo)
var sortedClaims []string
for idx, ui := range ac.Users {
jwtToken := ui.JWT
if jwtToken == nil {
continue
}
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
return nil, nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
}
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify && jwtToken.OIDC == nil {
return nil, nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true")
}
var claimsString string
sortedClaims = sortedClaims[:0]
parsedClaims := make([]*jwt.Claim, 0, len(jwtToken.MatchClaims))
for ck, cv := range jwtToken.MatchClaims {
sortedClaims = append(sortedClaims, fmt.Sprintf("%s=%s", ck, cv))
pc, err := jwt.NewClaim(ck, cv)
if err != nil {
return nil, nil, fmt.Errorf("incorrect match claim, key=%q, value regex=%q: %w", ck, cv, err)
}
parsedClaims = append(parsedClaims, pc)
}
ui.JWT.parsedMatchClaims = parsedClaims
sort.Strings(sortedClaims)
claimsString = strings.Join(sortedClaims, ",")
if oldUI, ok := uniqClaims[claimsString]; ok {
return nil, nil, fmt.Errorf("duplicate match claims=%q found for name=%q at idx=%d; the previous one is set for name=%q", claimsString, ui.Name, idx, oldUI.Name)
}
uniqClaims[claimsString] = &ui
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
keys := make([]any, 0, len(jwtToken.PublicKeys)+len(jwtToken.PublicKeyFiles))
for i := range jwtToken.PublicKeys {
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
if err != nil {
return nil, nil, err
}
keys = append(keys, k)
}
for _, filePath := range jwtToken.PublicKeyFiles {
keyData, err := os.ReadFile(filePath)
if err != nil {
return nil, nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
}
k, err := jwt.ParseKey(keyData)
if err != nil {
return nil, nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
}
keys = append(keys, k)
}
vp, err := jwt.NewVerifierPool(keys)
if err != nil {
return nil, nil, err
}
jwtToken.verifierPool.Store(vp)
}
if jwtToken.OIDC != nil {
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 || jwtToken.SkipVerify {
return nil, nil, fmt.Errorf("jwt with oidc cannot contain public keys or have skip_verify=true")
}
if jwtToken.OIDC.Issuer == "" {
return nil, nil, fmt.Errorf("oidc issuer cannot be empty")
}
isserURL, err := url.Parse(jwtToken.OIDC.Issuer)
if err != nil {
return nil, nil, fmt.Errorf("oidc issuer %q must be a valid URL", jwtToken.OIDC.Issuer)
}
if isserURL.Scheme != "https" && isserURL.Scheme != "http" {
return nil, nil, fmt.Errorf("oidc issuer %q must have http or https scheme", jwtToken.OIDC.Issuer)
}
oidcDP.createOrAdd(ui.JWT.OIDC.Issuer, &ui.JWT.verifierPool)
}
if err := parseJWTPlaceholdersForUserInfo(&ui, true); err != nil {
return nil, nil, err
}
if err := ui.initURLs(); err != nil {
return nil, nil, err
}
metricLabels, err := ui.getMetricLabels()
if err != nil {
return nil, nil, fmt.Errorf("cannot parse metric_labels: %w", err)
}
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
mcr := ui.getMaxConcurrentRequests()
ui.concurrencyLimitCh = make(chan struct{}, mcr)
ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
return float64(cap(ui.concurrencyLimitCh))
})
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
return float64(len(ui.concurrencyLimitCh))
})
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
if err != nil {
return nil, nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
}
ui.rt = rt
jui = append(jui, &ui)
}
// sort by amount of matching claims
// it allows to more specific claim win in case of clash
sort.SliceStable(jui, func(i, j int) bool {
return len(jui[i].JWT.MatchClaims) > len(jui[j].JWT.MatchClaims)
})
return jui, oidcDP, nil
}
var tokenPool sync.Pool
func getToken() *jwt.Token {
tkn := tokenPool.Get()
if tkn == nil {
return &jwt.Token{}
}
return tkn.(*jwt.Token)
}
func putToken(tkn *jwt.Token) {
tkn.Reset()
tokenPool.Put(tkn)
}
func getJWTUserInfo(ats []string) (*UserInfo, *jwt.Token) {
js := *jwtAuthCache.Load()
if len(js.users) == 0 {
return nil, nil
}
tkn := getToken()
for _, at := range ats {
if strings.Count(at, ".") != 2 {
continue
}
at, _ = strings.CutPrefix(at, `http_auth:`)
tkn.Reset()
if err := tkn.Parse(at, true); err != nil {
if *logInvalidAuthTokens {
logger.Infof("cannot parse jwt token: %s", err)
}
continue
}
if tkn.IsExpired(time.Now()) {
if *logInvalidAuthTokens {
// TODO: add more context:
// token claims with issuer
logger.Infof("jwt token is expired")
}
continue
}
if ui := getUserInfoByJWTToken(tkn, js.users); ui != nil {
return ui, tkn
}
}
putToken(tkn)
return nil, nil
}
func getUserInfoByJWTToken(tkn *jwt.Token, users []*UserInfo) *UserInfo {
for _, ui := range users {
if !tkn.MatchClaims(ui.JWT.parsedMatchClaims) {
continue
}
if ui.JWT.SkipVerify {
return ui
}
if ui.JWT.OIDC != nil {
// OIDC requires iss claim.
// It must match the discovery issuer URL set in OIDC config.
// https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata
if tkn.Issuer() == "" {
if *logInvalidAuthTokens {
logger.Infof("jwt token must have issuer filed")
}
return nil
}
if tkn.Issuer() != ui.JWT.OIDC.Issuer {
if *logInvalidAuthTokens {
logger.Infof("jwt token issuer: %q does not match oidc issuer: %q", tkn.Issuer(), ui.JWT.OIDC.Issuer)
}
return nil
}
}
vp := ui.JWT.verifierPool.Load()
if vp == nil {
if *logInvalidAuthTokens {
logger.Infof("jwt verifier not initialed")
}
return nil
}
if err := vp.Verify(tkn); err != nil {
if *logInvalidAuthTokens {
logger.Infof("cannot verify jwt token: %s", err)
}
return nil
}
return ui
}
if *logInvalidAuthTokens {
logger.Infof("no user match jwt token")
}
return nil
}
func replaceJWTPlaceholders(bu *backendURL, hc HeadersConf, vma *jwt.VMAccessClaim) (*url.URL, HeadersConf) {
if !bu.hasPlaceHolders && !hc.hasAnyPlaceHolders {
return bu.url, hc
}
targetURL := bu.url
data := jwtClaimsData(vma)
if bu.hasPlaceHolders {
// template url params and request path
// make a copy of url
uCopy := *bu.url
for _, uph := range urlPathPlaceHolders {
replacement := data[uph]
uCopy.Path = strings.ReplaceAll(uCopy.Path, uph, replacement[0])
}
query := uCopy.Query()
var foundAnyQueryPlaceholder bool
var templatedValues []string
for param, values := range query {
templatedValues = templatedValues[:0]
// filter in-place values with placeholders
// and accumulate replacements
// it will change the order of param values
// but it's not guaranteed
// and will be changed in any way with multiple arg templates
var cnt int
for _, value := range values {
if dv, ok := data[value]; ok {
foundAnyQueryPlaceholder = true
templatedValues = append(templatedValues, dv...)
continue
}
values[cnt] = value
cnt++
}
values = values[:cnt]
values = append(values, templatedValues...)
query[param] = values
}
if foundAnyQueryPlaceholder {
uCopy.RawQuery = query.Encode()
}
targetURL = &uCopy
}
if hc.hasAnyPlaceHolders {
// make a copy of headers and update only values with placeholder
rhs := make([]*Header, 0, len(hc.RequestHeaders))
for _, rh := range hc.RequestHeaders {
if dv, ok := data[rh.Value]; ok {
rh := &Header{
Name: rh.Name,
Value: strings.Join(dv, ","),
}
rhs = append(rhs, rh)
continue
}
rhs = append(rhs, rh)
}
hc.RequestHeaders = rhs
}
return targetURL, hc
}
func jwtClaimsData(vma *jwt.VMAccessClaim) map[string][]string {
data := map[string][]string{
// TODO: optimize at parsing stage
metricsTenantPlaceholder: {fmt.Sprintf("%d:%d", vma.MetricsAccountID, vma.MetricsProjectID)},
metricsExtraLabelsPlaceholder: vma.MetricsExtraLabels,
metricsExtraFiltersPlaceholder: vma.MetricsExtraFilters,
// TODO: optimize at parsing stage
logsAccountIDPlaceholder: {fmt.Sprintf("%d", vma.LogsAccountID)},
logsProjectIDPlaceholder: {fmt.Sprintf("%d", vma.LogsProjectID)},
logsExtraFiltersPlaceholder: vma.LogsExtraFilters,
logsExtraStreamFiltersPlaceholder: vma.LogsExtraStreamFilters,
}
return data
}
func parseJWTPlaceholdersForUserInfo(ui *UserInfo, isAllowed bool) error {
if ui.URLPrefix != nil {
if err := validateJWTPlaceholdersForURL(ui.URLPrefix, isAllowed); err != nil {
return err
}
}
if err := parsePlaceholdersForHC(&ui.HeadersConf, isAllowed); err != nil {
return err
}
if ui.DefaultURL != nil {
if err := validateJWTPlaceholdersForURL(ui.DefaultURL, isAllowed); err != nil {
return fmt.Errorf("invalid `default_url` placeholders: %w", err)
}
}
for i := range ui.URLMaps {
e := &ui.URLMaps[i]
if e.URLPrefix != nil {
if err := validateJWTPlaceholdersForURL(e.URLPrefix, isAllowed); err != nil {
return fmt.Errorf("invalid `url_map` `url_prefix` placeholders: %w", err)
}
}
if err := parsePlaceholdersForHC(&e.HeadersConf, isAllowed); err != nil {
return fmt.Errorf("invalid `url_map` headers placeholders: %w", err)
}
}
return nil
}
func validateJWTPlaceholdersForURL(up *URLPrefix, isAllowed bool) error {
for _, bu := range up.busOriginal {
ok := strings.Contains(bu.Path, placeholderPrefix)
if ok && !isAllowed {
return fmt.Errorf("placeholder: %q is only allowed at JWT token context", bu.Path)
}
if ok {
p := bu.Path
for _, ph := range allPlaceholders {
p = strings.ReplaceAll(p, ph, ``)
}
if strings.Contains(p, placeholderPrefix) {
return fmt.Errorf("invalid placeholder found in URL request path: %q, supported values are: %s", bu.Path, strings.Join(allPlaceholders, ", "))
}
}
for param, values := range bu.Query() {
for _, value := range values {
ok := strings.Contains(value, placeholderPrefix)
if ok && !isAllowed {
return fmt.Errorf("query param: %q with placeholder: %q is only allowed at JWT token context", param, value)
}
if ok {
// possible placeholder
if !slices.Contains(allPlaceholders, value) {
return fmt.Errorf("query param: %q has unsupported placeholder string: %q, supported values are: %s", param, value, strings.Join(allPlaceholders, ", "))
}
}
}
}
}
return nil
}
func parsePlaceholdersForHC(hc *HeadersConf, isAllowed bool) error {
for _, rhs := range hc.RequestHeaders {
ok := strings.Contains(rhs.Value, placeholderPrefix)
if ok && !isAllowed {
return fmt.Errorf("request header: %q placeholder: %q is only supported at JWT context", rhs.Name, rhs.Value)
}
if ok {
if !slices.Contains(allPlaceholders, rhs.Value) {
return fmt.Errorf("request header: %q has unsupported placeholder: %q, supported values are: %s", rhs.Name, rhs.Value, strings.Join(allPlaceholders, ", "))
}
hc.hasAnyPlaceHolders = true
}
}
for _, rhs := range hc.ResponseHeaders {
if strings.Contains(rhs.Value, placeholderPrefix) {
return fmt.Errorf("response header placeholders are not supported; found placeholder prefix at header: %q with value: %q", rhs.Name, rhs.Value)
}
}
return nil
}
func hasAnyPlaceholders(u *url.URL) bool {
if strings.Contains(u.Path, placeholderPrefix) {
return true
}
if len(u.Query()) == 0 {
return false
}
for _, values := range u.Query() {
for _, value := range values {
if strings.HasPrefix(value, placeholderPrefix) {
return true
}
}
}
return false
}

503
app/vmauth/jwt_test.go Normal file
View File

@@ -0,0 +1,503 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
)
func TestJWTParseAuthConfigFailure(t *testing.T) {
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
yQIDAQAB
-----END PUBLIC KEY-----
`
// ECDSA with the P-521 curve
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
XOtclIk1uhc03oL9nOQ=
-----END PUBLIC KEY-----
`
f := func(s string, expErr string) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
if expErr != err.Error() {
t.Fatalf("unexpected error; got\n%q\nwant\n%q", err.Error(), expErr)
}
return
}
users, oidcDP, err := parseJWTUsers(ac)
if err == nil {
t.Fatalf("expecting non-nil error; got %v", users)
}
if expErr != err.Error() {
t.Fatalf("unexpected error; got\n%q\nwant \n%q", err.Error(), expErr)
}
if oidcDP != nil {
t.Fatalf("expecting nil oidcDP; got %v", oidcDP)
}
}
// unauthorized_user cannot be used with jwt
f(`
unauthorized_user:
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `field jwt can't be specified for unauthorized_user section`)
// username and jwt in a single config
f(`
users:
- username: foo
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// bearer_token and jwt in a single config
f(`
users:
- bearer_token: foo
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// bearer_token and jwt in a single config
f(`
users:
- auth_token: "Foo token"
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// jwt public_keys or skip_verify must be set, part 1
f(`
users:
- jwt: {}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
// jwt public_keys or skip_verify must be set, part 2
f(`
users:
- jwt: {public_keys: null}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
// jwt public_keys or skip_verify must be set, part 3
f(`
users:
- jwt: {public_keys: []}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
// jwt public_keys, public_key_files or skip_verify must be set
f(`
users:
- jwt: {public_key_files: []}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
// invalid public key, part 1
f(`
users:
- jwt: {public_keys: [""]}
url_prefix: http://foo.bar
`, `failed to parse key "": failed to decode PEM block containing public key`)
// invalid public key, part 2
f(`
users:
- jwt: {public_keys: ["invalid"]}
url_prefix: http://foo.bar
`, `failed to parse key "invalid": failed to decode PEM block containing public key`)
// invalid public key, part 2
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
- %q
- "invalid"
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey), `failed to parse key "invalid": failed to decode PEM block containing public key`)
// several jwt users
// invalid public key, part 2
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey), `duplicate match claims="" found for name="" at idx=1; the previous one is set for name=""`)
// public key file doesn't exist
f(`
users:
- jwt:
public_key_files:
- /path/to/nonexistent/file.pem
url_prefix: http://foo.bar
`, "cannot read public key from file \"/path/to/nonexistent/file.pem\": open /path/to/nonexistent/file.pem: no such file or directory")
// public key file invalid
// auth with key from file
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
if err := os.WriteFile(publicKeyFile, []byte(`invalidPEM`), 0o644); err != nil {
t.Fatalf("failed to write public key file: %s", err)
}
f(`
users:
- jwt:
public_key_files:
- `+publicKeyFile+`
url_prefix: http://foo.bar
`, "cannot parse public key from file \""+publicKeyFile+"\": failed to parse key \"invalidPEM\": failed to decode PEM block containing public key")
// unsupported placeholder in a header
f(`
users:
- jwt:
skip_verify: true
url_prefix: http://foo.bar/{{.UnsupportedPlaceholder}}/foo`,
"invalid placeholder found in URL request path: \"/{{.UnsupportedPlaceholder}}/foo\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
)
// unsupported placeholder in a header
f(`
users:
- jwt:
skip_verify: true
headers:
- "AccountID: {{.UnsupportedPlaceholder}}"
url_prefix: http://foo.bar
`,
"request header: \"AccountID\" has unsupported placeholder: \"{{.UnsupportedPlaceholder}}\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
)
// spaces in templating not allowed
f(`
users:
- jwt:
skip_verify: true
headers:
- "AccountID: {{ .LogsAccountID }}"
url_prefix: http://foo.bar
`,
"request header: \"AccountID\" has unsupported placeholder: \"{{ .LogsAccountID }}\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
)
// oidc is not an object
f(`
users:
- jwt:
oidc: "not an object"
url_prefix: http://foo.bar
`,
"cannot unmarshal AuthConfig data: yaml: unmarshal errors:\n line 4: cannot unmarshal !!str `not an ...` into main.oidcConfig",
)
// oidc issuer empty
f(`
users:
- jwt:
oidc: {}
url_prefix: http://foo.bar
`,
"oidc issuer cannot be empty",
)
// oidc issuer invalid urls
f(`
users:
- jwt:
oidc:
issuer: "::invalid-url"
url_prefix: http://foo.bar
`,
"oidc issuer \"::invalid-url\" must be a valid URL",
)
// oidc issuer invalid urls
f(`
users:
- jwt:
oidc:
issuer: "invalid-url"
url_prefix: http://foo.bar
`,
"oidc issuer \"invalid-url\" must have http or https scheme",
)
// oidc and public_keys are not allowed
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
oidc:
issuer: https://example.com
url_prefix: http://foo.bar
`, validRSAPublicKey),
"jwt with oidc cannot contain public keys or have skip_verify=true",
)
// oidc and skip_verify are not allowed
f(`
users:
- jwt:
skip_verify: true
oidc:
issuer: https://example.com
url_prefix: http://foo.bar
`,
"jwt with oidc cannot contain public keys or have skip_verify=true",
)
// duplicate claims
f(`
users:
- jwt:
skip_verify: true
match_claims:
team: ops
name: user-1
url_prefix: http://foo.bar
- jwt:
skip_verify: true
match_claims:
team: ops
name: user-2
url_prefix: http://foo.bar`,
"duplicate match claims=\"team=ops\" found for name=\"user-2\" at idx=1; the previous one is set for name=\"user-1\"",
)
}
func TestJWTParseAuthConfigSuccess(t *testing.T) {
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
yQIDAQAB
-----END PUBLIC KEY-----
`
// ECDSA with the P-521 curve
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
XOtclIk1uhc03oL9nOQ=
-----END PUBLIC KEY-----
`
f := func(s string) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
jui, oidcDP, err := parseJWTUsers(ac)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
oidcDP.startDiscovery()
defer oidcDP.stopDiscovery()
for _, ui := range jui {
if ui.JWT == nil {
t.Fatalf("unexpected nil JWTConfig")
}
if ui.JWT.SkipVerify {
if ui.JWT.verifierPool.Load() != nil {
t.Fatalf("unexpected non-nil verifier pool for skip_verify=true")
}
continue
}
if ui.JWT.verifierPool.Load() == nil {
t.Fatalf("unexpected nil verifier pool for non-empty public keys")
}
}
}
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey))
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validECDSAPublicKey))
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey))
f(`
users:
- jwt:
skip_verify: true
url_prefix: http://foo.bar
`)
// combined with other auth methods
f(`
users:
- username: foo
password: bar
url_prefix: http://foo.bar
- jwt:
skip_verify: true
url_prefix: http://foo.bar
- bearer_token: foo
url_prefix: http://foo.bar
`)
rsaKeyFile := filepath.Join(t.TempDir(), "rsa_public_key.pem")
if err := os.WriteFile(rsaKeyFile, []byte(validRSAPublicKey), 0o644); err != nil {
t.Fatalf("failed to write RSA key file: %s", err)
}
ecdsaKeyFile := filepath.Join(t.TempDir(), "ecdsa_public_key.pem")
if err := os.WriteFile(ecdsaKeyFile, []byte(validECDSAPublicKey), 0o644); err != nil {
t.Fatalf("failed to write ECDSA key file: %s", err)
}
// Test single public key file
f(fmt.Sprintf(`
users:
- jwt:
public_key_files:
- %q
url_prefix: http://foo.bar
`, rsaKeyFile))
// Test multiple public key files
f(fmt.Sprintf(`
users:
- jwt:
public_key_files:
- %q
- %q
url_prefix: http://foo.bar
`, rsaKeyFile, ecdsaKeyFile))
// Test combined inline keys and files
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
public_key_files:
- %q
url_prefix: http://foo.bar
`, validECDSAPublicKey, rsaKeyFile))
// oidc stub server
var ipSrv *httptest.Server
ipSrv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/.well-known/openid-configuration" {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]string{
"issuer": ipSrv.URL,
"jwks_uri": fmt.Sprintf("%s/jwks", ipSrv.URL),
})
return
}
if r.URL.Path == "/jwks" {
// resp generated by https://jwkset.com/generate
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`
{
"keys": [
{
"kty": "RSA",
"kid": "f13eee91-f566-4829-80fa-fca847c21f0e",
"d": "Ua1llEFz3LZ05CrK5a2JxKMUEWJGXhBPPF20hHQjzxd1w0IEJK_mhPZQG8dNtBROBNIi1FC9l6QRw-RTnVIVat5Xy4yDFNKXXL3ZLXejOHY8SXrNEIDqQ-cSwIpK9cK7Umib0PcPeEeeAED5mqDH75D8_YssWFF18kLbNB5Z9pZmn6Fshiht7l2Sh4GN-KcReOW6eiQQwckDte3OGmZCRbtEriLWJt5TUGUvfZVIlcclqNMycNB6jGa9E1pO5Up7Ki3ZbI_-6XmRgZPtqnR9oLJ1zn3fj3hYpCXo-zcqLuOu3qxcslsq5igsfBzgGtfIJHY9LfWmHUsaDEa5cAX1gQ",
"n": "xbLXXBTNREk70UCMiqZ53_mTzYh89W-UaPU61GZ-RZ5lYcLgyWOb5mdyRbvJpcgfZpsOeGAUWbk3GkQ4vqn8kUMnnWhUum2Qk9kGubOJGLW6yaURd00j3E-ilQ5xO2R_Hzz8bAojxV8GKdGTQ-iTf8z8nsSHH8kR2SERbNJCFFtwtFU7vyFWyoH4Lmvu2UpICTHFCR9RqwQVjyoKB1JjJ6Dh1L4zPTlsvQEnqoeFQHPYr0QcQSMYXdfPvlt_FiLOAOE89fX_9T2r9WbFAoda3uTRE5_aal0jxUU2cFyeVSIgauNtF07fp422XFb4XPkWQWrdNx0KX53laSIYQ9HOpw",
"e": "AQAB",
"p": "2JT57AD-Q2lamgjgyn0wL7DgYZ3OoCTTrDm5_NHg6h13uDvyIlXSukuUeWm4tzPSDedpstbS7dgXkLw5eQXBHwPYtByTcEZS8Z37CBnhMOOhfo_U1aNIPPanJACvWBgz47-TxHsxW1YhztZqghRoicBZPSSBAj49MgANJ4jF0zc",
"q": "6a4MkeSXJI-ZzQ-bgP8hwJqpLFr0AiNGQcjZMH4Nn4CPGdnGiqqe6flhfLimgbNhbb67B0-8fLIji8zGhGKDL_JSIpAAdmfs2vzeEsY2hScrqVbd1VbfRcRh0J6lsn7obxkbvQthp9sX2DQbeDcEeaFEvd9gDKQSATYEqWo7eBE",
"dp": "haL2yu6Z9RJuuxi7S3YPY33qFZF_y0St71j3L854zzw7gMxMTW9TRWwZQwk-1pv9AmNFzvnK0MNDVyUs-UXZsb932TrApshdqYRnPsppLvdl0GgDVYcYrbUr0IUzrFHSwraVAOlavRbaaXvX4EejcUvkRFvf1nh83fs2Iqy8E-U",
"dq": "Cnf5qC-Ndd3ZDg688LJ9WJuVKJ-Kfu4Fn7zXvgxnn9Wqk4XmFyA9rk21yFidXQIkQz5gMpun3g48-W5bFmMzbVp1w4af_q35NnZNnJm0p5Jxqkxx87TIm9-IYkg5NB3rW87MJ1PzNAnkr5LmCCSu1qQa6Eaxjt9qzxMUcmKH94E",
"qi": "saAeU11iaKHmye3cwCAYkegcyWbXV3xIXEVJtS9Af_yM19UhspwY2VhuwRaajcwYZwtvR9_ITmX9M-ea7uLdd7aDYO1fujC8NGbopeC4Hkr7yb5vTly3pfKf4h-3LwGGUucJUetdz1lmMIYiyuG4_gSf1yIEtPDLKzXiedgEMdI"
}
]
}
`))
return
}
http.NotFound(w, r)
}))
defer ipSrv.Close()
f(`
users:
- jwt:
oidc:
issuer: ` + ipSrv.URL + `
url_prefix: http://foo.bar
`)
// multiple match claims
f(fmt.Sprintf(`
users:
- jwt:
match_claims:
role: ro
team: dev
public_keys:
- %q
url_prefix: http://foo.bar
- jwt:
match_claims:
role: admin
team: dev
public_key_files:
- %q
- %q
url_prefix: http://foo.bar
- jwt:
match_claims:
role: viewer
team: dev
department: ceo
skip_verify: true
url_prefix: http://foo.bar
`, validRSAPublicKey, rsaKeyFile, ecdsaKeyFile))
}

View File

@@ -16,6 +16,7 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
@@ -24,6 +25,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -40,27 +42,38 @@ var (
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
"See also -maxConcurrentRequests")
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
"'429 Too Many Requests' http status code. See also -maxQueueDuration, -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxQueueDuration and -maxConcurrentRequests command-line options "+
"and max_concurrent_requests option in per-user config")
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration the request waits for execution when the number of concurrently executed "+
"requests reach -maxConcurrentRequests or -maxConcurrentPerUserRequests before returning '429 Too Many Requests' error. "+
"This allows graceful handling of short spikes in the number of concurrent requests")
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
"This allows reducing the consumption of backend resources when processing requests from clients connected via slow networks. "+
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables retries. "+
"See also -requestBufferSize")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This protects vmauth itself from overloading and out-of-memory (OOM) failures. See also -maxConcurrentPerUserRequests "+
"and https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 100, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This provides fairness and isolation between users, preventing a single user from consuming all the available resources. "+
"It works in conjunction with -maxConcurrentRequests, which sets the global limit across all users. "+
"This default can be overridden for individual users via max_concurrent_requests option in per-user config. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration to wait before rejecting incoming requests if concurrency limit "+
"specified via -maxConcurrentRequests or -maxConcurrentPerUserRequests command-line flags is reached. "+
"Requests are rejected with '429 Too Many Requests' http status code if the limit is still reached after the -maxQueueDuration duration. "+
"This allows graceful handling of short spikes in concurrent requests. See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
"Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests")
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
@@ -161,7 +174,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
// Process requests for unauthorized users
ui := authConfig.Load().UnauthorizedUser
if ui != nil {
processUserRequest(w, r, ui)
processUserRequest(w, r, ui, nil)
return true
}
@@ -169,29 +182,36 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
ui := getUserInfoByAuthTokens(ats)
if ui == nil {
uu := authConfig.Load().UnauthorizedUser
if uu != nil {
processUserRequest(w, r, uu)
return true
}
invalidAuthTokenRequests.Inc()
if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
err = &httpserver.ErrorWithStatusCode{
Err: err,
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
} else {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
if ui := getUserInfoByAuthTokens(ats); ui != nil {
processUserRequest(w, r, ui, nil)
return true
}
if ui, tkn := getJWTUserInfo(ats); ui != nil {
if tkn == nil {
logger.Panicf("BUG: unexpected nil jwt token for user %q", ui.name())
}
defer putToken(tkn)
processUserRequest(w, r, ui, tkn)
return true
}
processUserRequest(w, r, ui)
uu := authConfig.Load().UnauthorizedUser
if uu != nil {
processUserRequest(w, r, uu, nil)
return true
}
invalidAuthTokenRequests.Inc()
if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
err = &httpserver.ErrorWithStatusCode{
Err: err,
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
} else {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
}
return true
}
@@ -206,7 +226,37 @@ func getUserInfoByAuthTokens(ats []string) *UserInfo {
return nil
}
func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
// responseWriterWithStatus is a wrapper around http.ResponseWriter that captures the status code written to the response.
type responseWriterWithStatus struct {
http.ResponseWriter
status int
}
// WriteHeader records the status so it can be easily retrieved later
func (rws *responseWriterWithStatus) WriteHeader(status int) {
rws.status = status
rws.ResponseWriter.WriteHeader(status)
}
// Flush implements net/http.Flusher interface
//
// This is needed for the copyStreamToClient()
func (rws *responseWriterWithStatus) Flush() {
flusher, ok := rws.ResponseWriter.(http.Flusher)
if !ok {
logger.Panicf("BUG: it is expected http.ResponseWriter (%T) supports http.Flusher interface", rws.ResponseWriter)
}
flusher.Flush()
}
// Unwrap returns the original ResponseWriter wrapped by rws.
//
// This is needed for the net/http.ResponseController - see https://pkg.go.dev/net/http#NewResponseController
func (rws *responseWriterWithStatus) Unwrap() http.ResponseWriter {
return rws.ResponseWriter
}
func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *jwt.Token) {
startTime := time.Now()
defer ui.requestsDuration.UpdateDuration(startTime)
@@ -215,49 +265,133 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
defer cancel()
// Limit the concurrency of requests to backends
userName := ui.name()
if userName == "" {
userName = "unauthorized"
}
if ui.AccessLog != nil {
w = &responseWriterWithStatus{ResponseWriter: w}
defer func() {
rws := w.(*responseWriterWithStatus)
duration := time.Since(startTime)
ui.logRequest(r, userName, rws.status, duration)
}()
}
// Acquire global concurrency limit.
if err := beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer endConcurrencyLimit()
// Set read deadline for reading the initial chunk for the request body.
rc := http.NewResponseController(w)
deadline, ok := ctx.Deadline()
if !ok {
logger.Panicf("BUG: expecting valid deadline for the context")
}
if err := rc.SetReadDeadline(deadline); err != nil {
logger.Panicf("BUG: cannot set read deadline: %s", err)
}
// Read the initial chunk for the request body.
bb, err := bufferRequestBody(ctx, r.Body, userName)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
r.Body = bb
// Disable the read deadline for the rest of the request body.
if err := rc.SetReadDeadline(time.Time{}); err != nil {
logger.Panicf("BUG: cannot reset read deadline: %s", err)
}
// Acquire concurrency limit for the given user.
if err := ui.beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer ui.endConcurrencyLimit()
// Process the request.
processRequest(w, r, ui, tkn)
}
func beginConcurrencyLimit(ctx context.Context) error {
concurrencyLimitOnce.Do(concurrencyLimitInit)
select {
case concurrencyLimitCh <- struct{}{}:
if err := ui.beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
<-concurrencyLimitCh
return
}
return nil
default:
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished,
// so the current request could be executed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case concurrencyLimitCh <- struct{}{}:
if err := ui.beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
<-concurrencyLimitCh
return
}
return nil
case <-ctx.Done():
err := ctx.Err()
concurrentRequestsLimitReached.Inc()
if errors.Is(err, context.DeadlineExceeded) {
err = fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
// The current request couldn't be executed until the request timeout.
concurrentRequestsLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
*maxQueueDuration, cap(concurrencyLimitCh))
handleConcurrencyLimitError(w, r, err)
return
}
err = fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
handleConcurrencyLimitError(w, r, err)
return
return fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
}
}
processRequest(w, r, ui)
ui.endConcurrencyLimit()
}
func endConcurrencyLimit() {
<-concurrencyLimitCh
}
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
if r == nil {
// This is a GET request with nil reader.
return nil, nil
}
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
if maxBufSize <= 0 {
// Request buffering is disabled.
return r, nil
}
lr := ioutil.GetLimitedReader(r, int64(maxBufSize))
defer ioutil.PutLimitedReader(lr)
start := time.Now()
buf, err := io.ReadAll(lr)
bufferRequestBodyDuration.UpdateDuration(start)
if err != nil {
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
rejectSlowClientRequests.Inc()
d := time.Since(start)
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("reject request from the user %s because the request body couldn't be read in -maxQueueDuration=%s; read %d bytes in %s",
userName, *maxQueueDuration, len(buf), d.Truncate(time.Second)),
StatusCode: http.StatusBadRequest,
}
}
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot read request body: %w", err),
StatusCode: http.StatusBadRequest,
}
}
bb := newBufferedBody(r, buf, maxBufSize)
return bb, nil
}
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *jwt.Token) {
u := normalizeURL(r.URL)
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
isDefault := false
@@ -282,28 +416,31 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
isDefault = true
}
rtb := newReadTrackingBody(r.Body, maxRequestBodySizeToRetry.IntN())
r.Body = rtb
maxAttempts := up.getBackendsCount()
for i := 0; i < maxAttempts; i++ {
for range maxAttempts {
bu := up.getBackendURL()
if bu == nil {
break
}
targetURL := bu.url
// Don't change path and add request_path query param for default route.
if tkn != nil {
// for security reasons allow templating only for configured url values and headers
targetURL, hc = replaceJWTPlaceholders(bu, hc, tkn.VMAccess())
}
if isDefault {
// Don't change path and add request_path query param for default route.
targetURLCopy := *targetURL
query := targetURL.Query()
query.Set("request_path", u.String())
targetURL.RawQuery = query.Encode()
} else { // Update path for regular routes.
targetURLCopy.RawQuery = query.Encode()
targetURL = &targetURLCopy
} else {
// Update path for regular routes.
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
}
wasLocalRetry := false
again:
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu)
if needLocalRetry && !wasLocalRetry {
wasLocalRetry = true
goto again
@@ -313,18 +450,19 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
if ok {
return
}
bu.setBroken()
ui.backendErrors.Inc()
}
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
Err: fmt.Errorf("all the %d backends for the user %q are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend", up.getBackendsCount(), ui.name()),
StatusCode: http.StatusBadGateway,
}
httpserver.Errorf(w, r, "%s", err)
ui.requestErrors.Inc()
}
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo, bu *backendURL) (bool, bool) {
ui.backendRequests.Inc()
req := sanitizeRequestHeaders(r)
@@ -339,30 +477,22 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
}
}
rtb, rtbOK := req.Body.(*readTrackingBody)
res, err := ui.rt.RoundTrip(req)
bb, bbOK := req.Body.(*bufferedBody)
canRetry := !bbOK || bb.canRetry()
if ctxErr := r.Context().Err(); ctxErr != nil {
// Override the error returned by the RoundTrip with the context error if it isn't non-nil
// This makes sure the proper logging for canceled and timed out requests - log the real cause of the error
// instead of the random error, which could be returned from RoundTrip because of canceled or timed out request.
err = ctxErr
res, err := ui.rt.RoundTrip(req)
if err == nil {
defer func() { _ = res.Body.Close() }()
}
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
}
if err != nil {
// Do not retry canceled
if errors.Is(err, context.Canceled) {
clientCanceledRequests.Inc()
return true, false
}
// Do not retry timed out requests
if errors.Is(err, context.DeadlineExceeded) {
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
// Timed out request must be counted as errors, since this usually means that the backend is slow.
logger.Warnf("remoteAddr: %s; requestURI: %s; timeout while proxying the response from %s: %s", remoteAddr, requestURI, targetURL, err)
return false, false
}
if !rtbOK || !rtb.canRetry() {
if !canRetry {
// Request body cannot be re-sent to another backend. Return the error to the client then.
err = &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
@@ -371,27 +501,32 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
httpserver.Errorf(w, r, "%s", err)
ui.backendErrors.Inc()
ui.requestErrors.Inc()
bu.setBroken()
return true, false
}
if netutil.IsTrivialNetworkError(err) {
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
if bbOK {
bb.resetReader()
}
return false, true
}
// Request body wasn't read yet, this usually means that the backend isn't reachable; retry the request at another backend
// Retry the request at another backend
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
// NOTE: do not use httpserver.GetRequestURI
// it explicitly reads request body, which may fail retries.
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, req.URL, targetURL, err)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, requestURI, targetURL, err)
if bbOK {
bb.resetReader()
}
return false, false
}
if slices.Contains(retryStatusCodes, res.StatusCode) {
_ = res.Body.Close()
if !rtbOK || !rtb.canRetry() {
if !canRetry {
// If we get an error from the retry_status_codes list, but cannot execute retry,
// we consider such a request an error as well.
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request has been already consumed",
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request body has been already consumed",
res.StatusCode, targetURL),
StatusCode: http.StatusServiceUnavailable,
}
@@ -400,13 +535,16 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
ui.requestErrors.Inc()
return true, false
}
// Retry requests at other backends if it matches retryStatusCodes.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
// NOTE: do not use httpserver.GetRequestURI
// it explicitly reads request body, which may fail retries.
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d",
remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
remoteAddr, requestURI, targetURL, res.StatusCode, retryStatusCodes)
if bbOK {
bb.resetReader()
}
return false, false
}
removeHopHeaders(res.Header)
@@ -415,14 +553,16 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
w.WriteHeader(res.StatusCode)
err = copyStreamToClient(w, res.Body)
_ = res.Body.Close()
if errors.Is(err, context.Canceled) {
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
} else if err != nil && !netutil.IsTrivialNetworkError(err) {
}
if err != nil && !netutil.IsTrivialNetworkError(err) {
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
ui.requestErrors.Inc()
return true, false
@@ -553,6 +693,9 @@ var (
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
clientCanceledRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="client_canceled"}`)
rejectSlowClientRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="reject_slow_client"}`)
bufferRequestBodyDuration = metrics.NewSummary(`vmauth_buffer_request_body_duration_seconds`)
)
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
@@ -623,7 +766,7 @@ var concurrentRequestsLimitReached = metrics.NewCounter("vmauth_concurrent_reque
func usage() {
const s = `
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics components or any other HTTP backends.
See the docs at https://docs.victoriametrics.com/victoriametrics/vmauth/ .
`
@@ -636,8 +779,7 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
}
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
ctx := r.Context()
if errors.Is(ctx.Err(), context.Canceled) {
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not return any response for the request canceled by the client,
// since the connection to the client is already closed.
clientCanceledRequests.Inc()
@@ -652,123 +794,83 @@ func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err err
httpserver.Errorf(w, r, "%s", err)
}
// readTrackingBody must be obtained via getReadTrackingBody()
type readTrackingBody struct {
// maxBodySize is the maximum body size to cache in buf.
// bufferedBody serves two purposes:
//
// 1. It enables request retries when the request body size does not exceed maxBufSize
// by fully buffering the request body in memory.
// 2. It prevents slow clients from reducing effective server capacity
// by buffering the request body before acquiring a per-user concurrency slot.
//
// See bufferRequestBody for details on how bufferedBody is used.
type bufferedBody struct {
// r contains reader for reading the data after buf is read.
//
// Bigger bodies cannot be retried.
maxBodySize int
// r contains reader for initial data reading
// r is nil if buf contains all the data.
r io.ReadCloser
// buf is a buffer for data read from r. Buf size is limited by maxBodySize.
// If more than maxBodySize is read from r, then cannotRetry is set to true.
// buf contains the initial buffer read from r.
buf []byte
// readBuf points to the cached data at buf, which must be read in the next call to Read().
readBuf []byte
// bufOffset is the offset at buf for already read bytes.
bufOffset int
// cannotRetry is set to true when more than maxBodySize bytes are read from r.
// In this case the read data cannot fit buf, so it cannot be re-read from buf.
// cannotRetry is set to true after Close() call on non-nil r.
cannotRetry bool
// bufComplete is set to true when buf contains complete request body read from r.
bufComplete bool
}
func newReadTrackingBody(r io.ReadCloser, maxBodySize int) *readTrackingBody {
// do not use sync.Pool there
// since http.RoundTrip may still use request body after return
// See this issue for details https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
rtb := &readTrackingBody{}
if maxBodySize < 0 {
maxBodySize = 0
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody {
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
if len(buf) < maxBufSize {
// The full request body has been already read into buf.
r = nil
}
rtb.maxBodySize = maxBodySize
if r == nil {
// This is GET request without request body
r = (*zeroReader)(nil)
return &bufferedBody{
r: r,
buf: buf,
}
rtb.r = r
return rtb
}
type zeroReader struct{}
func (r *zeroReader) Read(_ []byte) (int, error) {
return 0, io.EOF
}
func (r *zeroReader) Close() error {
return nil
}
// Read implements io.Reader interface.
func (rtb *readTrackingBody) Read(p []byte) (int, error) {
if len(rtb.readBuf) > 0 {
n := copy(p, rtb.readBuf)
rtb.readBuf = rtb.readBuf[n:]
func (bb *bufferedBody) Read(p []byte) (int, error) {
if bb.cannotRetry {
return 0, fmt.Errorf("cannot read already closed request body")
}
if bb.bufOffset < len(bb.buf) {
n := copy(p, bb.buf[bb.bufOffset:])
bb.bufOffset += n
return n, nil
}
if rtb.r == nil {
if rtb.bufComplete {
return 0, io.EOF
}
return 0, fmt.Errorf("cannot read client request body after closing client reader")
if bb.r == nil {
return 0, io.EOF
}
n, err := rtb.r.Read(p)
if rtb.cannotRetry {
return n, err
}
if len(rtb.buf)+n > rtb.maxBodySize {
rtb.cannotRetry = true
return n, err
}
rtb.buf = append(rtb.buf, p[:n]...)
if err == io.EOF {
rtb.bufComplete = true
}
return n, err
return bb.r.Read(p)
}
func (rtb *readTrackingBody) canRetry() bool {
if rtb.cannotRetry {
func (bb *bufferedBody) canRetry() bool {
if bb.r != nil {
return false
}
if rtb.bufComplete {
return true
}
return rtb.r != nil
maxRetrySize := maxRequestBodySizeToRetry.IntN()
return len(bb.buf) == 0 || (maxRetrySize > 0 && len(bb.buf) <= maxRetrySize)
}
// Close implements io.Closer interface.
func (rtb *readTrackingBody) Close() error {
if !rtb.cannotRetry {
rtb.readBuf = rtb.buf
} else {
rtb.readBuf = nil
func (bb *bufferedBody) Close() error {
bb.resetReader()
bb.cannotRetry = !bb.canRetry()
if bb.r != nil {
return bb.r.Close()
}
// Close rtb.r only if the request body is completely read or if it is too big.
// http.Roundtrip performs body.Close call even without any Read calls,
// so this hack allows us to reuse request body.
if rtb.bufComplete || rtb.cannotRetry {
if rtb.r == nil {
return nil
}
err := rtb.r.Close()
rtb.r = nil
return err
}
return nil
}
func (bb *bufferedBody) resetReader() {
bb.bufOffset = 0
}
func debugInfo(u *url.URL, r *http.Request) string {
s := &strings.Builder{}
fmt.Fprintf(s, " (host: %q; ", r.Host)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,194 @@
package main
import (
"crypto"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"encoding/base64"
"encoding/json"
"encoding/pem"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
func BenchmarkJWTRequestHandler(b *testing.B) {
// Generate RSA key pair for testing
privateKey, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil {
b.Fatalf("cannot generate RSA key: %s", err)
}
// Generate public key PEM
publicKeyBytes, err := x509.MarshalPKIXPublicKey(&privateKey.PublicKey)
if err != nil {
b.Fatalf("cannot marshal public key: %s", err)
}
publicKeyPEM := pem.EncodeToMemory(&pem.Block{
Type: "PUBLIC KEY",
Bytes: publicKeyBytes,
})
genToken := func(t *testing.B, body map[string]any, valid bool) string {
t.Helper()
headerJSON, err := json.Marshal(map[string]any{
"alg": "RS256",
"typ": "JWT",
})
if err != nil {
t.Fatalf("cannot marshal header: %s", err)
}
headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
bodyJSON, err := json.Marshal(body)
if err != nil {
t.Fatalf("cannot marshal body: %s", err)
}
bodyB64 := base64.RawURLEncoding.EncodeToString(bodyJSON)
payload := headerB64 + "." + bodyB64
var signatureB64 string
if valid {
// Create real RSA signature
hash := crypto.SHA256
h := hash.New()
h.Write([]byte(payload))
digest := h.Sum(nil)
signature, err := rsa.SignPKCS1v15(rand.Reader, privateKey, hash, digest)
if err != nil {
t.Fatalf("cannot sign token: %s", err)
}
signatureB64 = base64.RawURLEncoding.EncodeToString(signature)
} else {
signatureB64 = base64.RawURLEncoding.EncodeToString([]byte("invalid_signature"))
}
return payload + "." + signatureB64
}
f := func(name string, cfgStr string, r *http.Request, statusCodeExpected int) {
b.Helper()
b.ReportAllocs()
b.ResetTimer()
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
if _, err := w.Write([]byte("path: " + r.URL.Path + "\n")); err != nil {
panic(fmt.Errorf("cannot write response: %w", err))
}
}))
defer ts.Close()
cfgStr = strings.ReplaceAll(cfgStr, "{BACKEND}", ts.URL)
cfgOrigP := authConfigData.Load()
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
b.Fatalf("cannot load config data: %s", err)
}
defer func() {
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
if cfgOrigP != nil {
cfgOrig = *cfgOrigP
}
_, err := reloadAuthConfigData(cfgOrig)
if err != nil {
b.Fatalf("cannot load the original config: %s", err)
}
}()
b.Run(name, func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
w := &fakeResponseWriter{}
for pb.Next() {
w.reset()
if !requestHandlerWithInternalRoutes(w, r) {
b.Fatalf("unexpected false is returned from requestHandler")
}
if w.statusCode != statusCodeExpected {
b.Fatalf("unexpected response code (-%d;+%d)", statusCodeExpected, w.statusCode)
}
}
})
})
}
simpleCfgStr := fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: {BACKEND}/foo`, string(publicKeyPEM))
noVMAccessClaimToken := genToken(b, nil, true)
expiredToken := genToken(b, map[string]any{
"exp": 10,
"vm_access": map[string]any{},
}, true)
fullToken := genToken(b, map[string]any{
"exp": time.Now().Add(10 * time.Minute).Unix(),
"scope": "email id",
"vm_access": map[string]any{
"extra_labels": map[string]string{
"label": "value1",
"label2": "value3",
},
"extra_filters": []string{"stream_filter1", "stream_filter2"},
"metrics_account_id": 123,
"metrics_project_id": 234,
"metrics_extra_labels": []string{
"label1=value1",
"label2=value2",
},
"metrics_extra_filters": []string{
`{label3="value3"}`,
`{label4="value4"}`,
},
"logs_account_id": 345,
"logs_project_id": 456,
"logs_extra_filters": []string{
`{"namespace":"my-app","env":"prod"}`,
},
"logs_extra_stream_filters": []string{
`{"team":"dev"}`,
},
},
}, true)
// tenant headers are overwritten if set as placeholders
// extra_filters extra_stream_filters from vm_access claim merged with statically defined
request := httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
request.Header.Set(`Authorization`, `Bearer `+fullToken)
f("full_template",
fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
headers:
- "AccountID: {{.LogsAccountID}}"
- "ProjectID: {{.LogsProjectID}}"
url_prefix: {BACKEND}/select/logsql/?extra_filters=aStaticFilter&extra_stream_filters=aStaticStreamFilter&extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
request,
http.StatusOK,
)
// token without vm_access claim
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+noVMAccessClaimToken)
f("token_without_claim", simpleCfgStr, request, http.StatusUnauthorized)
// expired token
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+expiredToken)
f("expired_token", simpleCfgStr, request, http.StatusUnauthorized)
}

195
app/vmauth/oidc.go Normal file
View File

@@ -0,0 +1,195 @@
package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
type oidcConfig struct {
Issuer string `yaml:"issuer"`
}
type oidcDiscovererPool struct {
ds map[string]*oidcDiscoverer
context context.Context
cancel func()
wg *sync.WaitGroup
}
func (dp *oidcDiscovererPool) createOrAdd(issuer string, vp *atomic.Pointer[jwt.VerifierPool]) {
if dp.ds == nil {
dp.ds = make(map[string]*oidcDiscoverer)
dp.context, dp.cancel = context.WithCancel(context.Background())
dp.wg = &sync.WaitGroup{}
}
ds, found := dp.ds[issuer]
if !found {
ds = &oidcDiscoverer{
issuer: issuer,
}
dp.ds[issuer] = ds
}
ds.vps = append(ds.vps, vp)
}
func (dp *oidcDiscovererPool) startDiscovery() {
if len(dp.ds) == 0 {
return
}
for _, d := range dp.ds {
dp.wg.Go(func() {
if err := d.refreshVerifierPools(dp.context); err != nil {
logger.Errorf("failed to initialize OIDC verifier pool at start for issuer %q: %s", d.issuer, err)
}
})
}
dp.wg.Wait()
for _, d := range dp.ds {
dp.wg.Go(func() {
d.run(dp.context)
})
}
}
func (dp *oidcDiscovererPool) stopDiscovery() {
if len(dp.ds) == 0 {
return
}
dp.cancel()
dp.wg.Wait()
}
type oidcDiscoverer struct {
issuer string
vps []*atomic.Pointer[jwt.VerifierPool]
}
func (d *oidcDiscoverer) run(ctx context.Context) {
t := time.NewTimer(timeutil.AddJitterToDuration(time.Second * 10))
defer t.Stop()
for {
select {
case <-t.C:
if err := d.refreshVerifierPools(ctx); errors.Is(err, context.Canceled) {
return
} else if err != nil {
t.Reset(timeutil.AddJitterToDuration(time.Second * 10))
logger.Errorf("failed to refresh OIDC verifier pool for issuer %q: %v", d.issuer, err)
continue
}
// OIDC may return Cache-Control header with max-age directive.
// It could be used as time range for next refresh.
// https://openid.net/specs/openid-connect-core-1_0.html#RotateEncKeys
t.Reset(timeutil.AddJitterToDuration(time.Minute * 5))
case <-ctx.Done():
return
}
}
}
func (d *oidcDiscoverer) refreshVerifierPools(ctx context.Context) error {
cfg, err := getOpenIDConfiguration(ctx, d.issuer)
if err != nil {
return err
}
// The issuer in the OIDC configuration must match the expected issuer.
// https://openid.net/specs/openid-connect-core-1_0.html#RotateEncKeys
if cfg.Issuer != d.issuer {
return fmt.Errorf("openid configuration issuer %q does not match expected issuer %q", cfg.Issuer, d.issuer)
}
verifierPool, err := fetchAndParseJWKs(ctx, cfg.JWKsURI)
if err != nil {
return err
}
for _, vp := range d.vps {
vp.Store(verifierPool)
}
return nil
}
// See https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata for details.
type openidConfig struct {
Issuer string `json:"issuer"`
JWKsURI string `json:"jwks_uri"`
}
var oidcHTTPClient = &http.Client{
Timeout: time.Second * 5,
}
func fetchAndParseJWKs(ctx context.Context, jwksURI string) (*jwt.VerifierPool, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, jwksURI, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request for fetching jwks keys from %q: %w", jwksURI, err)
}
resp, err := oidcHTTPClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to fetch jwks keys from %q: %w", jwksURI, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code %d when fetching jwks keys from %q", resp.StatusCode, jwksURI)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body from %q: %w", jwksURI, err)
}
vp, err := jwt.ParseJWKs(b)
if err != nil {
return nil, fmt.Errorf("failed to parse jwks keys from %q: %v", jwksURI, err)
}
return vp, nil
}
func getOpenIDConfiguration(ctx context.Context, issuer string) (openidConfig, error) {
issuer, _ = strings.CutSuffix(issuer, "/")
configURL := fmt.Sprintf("%s/.well-known/openid-configuration", issuer)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, configURL, nil)
if err != nil {
return openidConfig{}, fmt.Errorf("failed to create request for fetching openid config from %q: %w", configURL, err)
}
resp, err := oidcHTTPClient.Do(req)
if err != nil {
return openidConfig{}, fmt.Errorf("failed to fetch openid config from %q: %w", configURL, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return openidConfig{}, fmt.Errorf("unexpected status code %d when fetching openid config from %q", resp.StatusCode, configURL)
}
var cfg openidConfig
if err := json.NewDecoder(resp.Body).Decode(&cfg); err != nil {
return openidConfig{}, fmt.Errorf("failed to decode openid config from %q: %s", configURL, err)
}
return cfg, nil
}

View File

@@ -174,7 +174,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
RetryStatusCodes: []int{503, 501},
LoadBalancingPolicy: "first_available",
DropSrcPathPrefixParts: intp(2),
DropSrcPathPrefixParts: new(2),
}, "/a/b/c", "http://foo.bar/c", `bb: aaa`, `x: y`, []int{503, 501}, "first_available", 2)
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar/federate"),
@@ -219,13 +219,13 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
RetryStatusCodes: []int{503, 500, 501},
LoadBalancingPolicy: "first_available",
DropSrcPathPrefixParts: intp(1),
DropSrcPathPrefixParts: new(1),
},
{
SrcPaths: getRegexs([]string{"/api/v1/write"}),
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
RetryStatusCodes: []int{},
DropSrcPathPrefixParts: intp(0),
DropSrcPathPrefixParts: new(0),
},
{
SrcPaths: getRegexs([]string{"/metrics"}),
@@ -242,7 +242,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
},
RetryStatusCodes: []int{502},
DropSrcPathPrefixParts: intp(2),
DropSrcPathPrefixParts: new(2),
}
f(ui, "http://host42/vmsingle/api/v1/query?query=up&db=foo", "http://vmselect/0/prometheus/api/v1/query?db=foo&query=up",
"xx: aa\nyy: asdf", "qwe: rty", []int{503, 500, 501}, "first_available", 1)
@@ -259,7 +259,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
SrcPaths: getRegexs([]string{"/api/v1/write"}),
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
RetryStatusCodes: []int{},
DropSrcPathPrefixParts: intp(0),
DropSrcPathPrefixParts: new(0),
},
{
SrcPaths: getRegexs([]string{"/metrics/a/b"}),
@@ -275,7 +275,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
},
RetryStatusCodes: []int{502},
DropSrcPathPrefixParts: intp(2),
DropSrcPathPrefixParts: new(2),
}
f(ui, "https://foo-host/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "", "", []int{}, "least_loaded", 0)
f(ui, "https://foo-host/metrics/a/b", "http://metrics-server/b", "", "", []int{502}, "least_loaded", 2)

View File

@@ -21,6 +21,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/snapshot"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/snapshot/snapshotutil"
)

View File

@@ -7,6 +7,8 @@ import (
"math"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -45,7 +47,7 @@ func New(retries int, factor float64, minDuration time.Duration) (*Backoff, erro
// Retry process retries until all attempts are completed
func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
var attempt uint64
for i := 0; i < b.retries; i++ {
for i := range b.retries {
err := cb()
if err == nil {
return attempt, nil
@@ -55,6 +57,7 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
return attempt, err // fail fast if not recoverable
}
attempt++
retriesTotal.Inc()
backoff := float64(b.minDuration) * math.Pow(b.factor, float64(i))
dur := time.Duration(backoff)
logger.Errorf("got error: %s on attempt: %d; will retry in %v", err, attempt, dur)
@@ -74,3 +77,7 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
}
return attempt, fmt.Errorf("execution failed after %d retry attempts", b.retries)
}
var (
retriesTotal = metrics.NewCounter(`vmctl_backoff_retries_total`)
)

View File

@@ -14,6 +14,12 @@ const (
globalSilent = "s"
globalVerbose = "verbose"
globalDisableProgressBar = "disable-progress-bar"
globalPushMetricsURL = "pushmetrics.url"
globalPushMetricsInterval = "pushmetrics.interval"
globalPushExtraLabels = "pushmetrics.extraLabel"
globalPushHeaders = "pushmetrics.header"
globalPushDisableCompression = "pushmetrics.disableCompression"
)
var (
@@ -33,6 +39,29 @@ var (
Value: false,
Usage: "Whether to disable progress bar during the import.",
},
&cli.StringSliceFlag{
Name: globalPushMetricsURL,
Usage: "Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics",
},
&cli.DurationFlag{
Name: globalPushMetricsInterval,
Value: 10 * time.Second,
Usage: "Interval for pushing metrics to every -pushmetrics.url",
},
&cli.StringSliceFlag{
Name: globalPushExtraLabels,
Usage: "Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. " +
"Flag can be set multiple times, to add few additional labels. " +
"For example, -pushmetrics.extraLabel='instance=\"foo\"' adds instance=\"foo\" label to all the metrics pushed to every -pushmetrics.url",
},
&cli.StringSliceFlag{
Name: globalPushHeaders,
Usage: "Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.",
},
&cli.BoolFlag{
Name: globalPushDisableCompression,
Usage: "Whether to disable compression when pushing metrics.",
},
}
)
@@ -123,32 +152,32 @@ var (
Name: vmExtraLabel,
Value: nil,
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
"will have priority. Flag can be set multiple times, to add few additional labels.",
" will have priority. Flag can be set multiple times, to add few additional labels.",
},
&cli.Int64Flag{
Name: vmRateLimit,
Usage: "Optional data transfer rate limit in bytes per second.\n" +
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vmAddr' destination.",
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vm-addr' destination.",
},
&cli.StringFlag{
Name: vmCertFile,
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vmAddr'",
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vm-addr'",
},
&cli.StringFlag{
Name: vmKeyFile,
Usage: "Optional path to client-side TLS key to use when connecting to '--vmAddr'",
Usage: "Optional path to client-side TLS key to use when connecting to '--vm-addr'",
},
&cli.StringFlag{
Name: vmCAFile,
Usage: "Optional path to TLS CA file to use for verifying connections to '--vmAddr'. By default, system CA is used",
Usage: "Optional path to TLS CA file to use for verifying connections to '--vm-addr'. By default, system CA is used",
},
&cli.StringFlag{
Name: vmServerName,
Usage: "Optional TLS server name to use for connections to '--vmAddr'. By default, the server name from '--vmAddr' is used",
Usage: "Optional TLS server name to use for connections to '--vm-addr'. By default, the server name from '--vm-addr' is used",
},
&cli.BoolFlag{
Name: vmInsecureSkipVerify,
Usage: "Whether to skip tls verification when connecting to '--vmAddr'",
Usage: "Whether to skip tls verification when connecting to '--vm-addr'",
Value: false,
},
&cli.IntFlag{
@@ -387,6 +416,16 @@ const (
promTemporaryDirPath = "prom-tmp-dir-path"
)
const (
thanosSnapshot = "thanos-snapshot"
thanosConcurrency = "thanos-concurrency"
thanosFilterTimeStart = "thanos-filter-time-start"
thanosFilterTimeEnd = "thanos-filter-time-end"
thanosFilterLabel = "thanos-filter-label"
thanosFilterLabelValue = "thanos-filter-label-value"
thanosAggrTypes = "thanos-aggr-types"
)
var (
promFlags = []cli.Flag{
&cli.StringFlag{
@@ -422,6 +461,43 @@ var (
Value: os.TempDir(),
},
}
thanosFlags = []cli.Flag{
&cli.StringFlag{
Name: thanosSnapshot,
Usage: "Path to Thanos snapshot directory containing raw and/or downsampled blocks.",
Required: true,
},
&cli.IntFlag{
Name: thanosConcurrency,
Usage: "Number of concurrently running snapshot readers",
Value: 1,
},
&cli.StringFlag{
Name: thanosFilterTimeStart,
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
},
&cli.StringFlag{
Name: thanosFilterTimeEnd,
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
},
&cli.StringFlag{
Name: thanosFilterLabel,
Usage: "Thanos label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
},
&cli.StringFlag{
Name: thanosFilterLabelValue,
Usage: fmt.Sprintf("Thanos regular expression to filter label from %q flag.", thanosFilterLabel),
Value: ".*",
},
&cli.StringSliceFlag{
Name: thanosAggrTypes,
Usage: "Aggregate types to import from Thanos downsampled blocks. Supported values: count, sum, min, max, counter. " +
"Each aggregate will be imported as a separate metric with the aggregate type as suffix (e.g., metric_name:5m:count). " +
"If not specified, all aggregate types will be imported from downsampled blocks.",
Value: nil,
},
}
)
const (
@@ -598,7 +674,7 @@ var (
Name: vmExtraLabel,
Value: nil,
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
"will have priority. Flag can be set multiple times, to add few additional labels.",
" will have priority. Flag can be set multiple times, to add few additional labels.",
},
&cli.Int64Flag{
Name: vmRateLimit,
@@ -625,8 +701,8 @@ var (
&cli.BoolFlag{
Name: vmNativeDisableBinaryProtocol,
Usage: "Whether to use https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-json-line-format " +
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API." +
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks." +
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API. " +
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks. " +
"Non-binary export/import API is less efficient, but supports deduplication if it is configured on vm-native-src-addr side.",
Value: false,
},

View File

@@ -7,6 +7,8 @@ import (
"log"
"sync"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
@@ -52,6 +54,7 @@ func (ip *influxProcessor) run(ctx context.Context) error {
return nil
}
influxSeriesTotal.Add(len(series))
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing series"), len(series))
if err := barpool.Start(); err != nil {
return err
@@ -63,18 +66,18 @@ func (ip *influxProcessor) run(ctx context.Context) error {
ip.im.ResetStats()
var wg sync.WaitGroup
wg.Add(ip.cc)
for i := 0; i < ip.cc; i++ {
go func() {
defer wg.Done()
for range ip.cc {
wg.Go(func() {
for s := range seriesCh {
if err := ip.do(s); err != nil {
influxErrorsTotal.Inc()
errCh <- fmt.Errorf("request failed for %q.%q: %s", s.Measurement, s.Field, err)
return
}
influxSeriesProcessed.Inc()
bar.Increment()
}
}()
})
}
// any error breaks the import
@@ -83,6 +86,7 @@ func (ip *influxProcessor) run(ctx context.Context) error {
case infErr := <-errCh:
return fmt.Errorf("influx error: %s", infErr)
case vmErr := <-ip.im.Errors():
influxErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
case seriesCh <- s:
}
@@ -95,6 +99,7 @@ func (ip *influxProcessor) run(ctx context.Context) error {
// drain import errors channel
for vmErr := range ip.im.Errors() {
if vmErr.Err != nil {
influxErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
}
}
@@ -169,3 +174,9 @@ func (ip *influxProcessor) do(s *influx.Series) error {
}
}
}
var (
influxSeriesTotal = metrics.NewCounter(`vmctl_influx_migration_series_total`)
influxSeriesProcessed = metrics.NewCounter(`vmctl_influx_migration_series_processed`)
influxErrorsTotal = metrics.NewCounter(`vmctl_influx_migration_errors_total`)
)

View File

@@ -4,6 +4,8 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
)
@@ -45,9 +47,16 @@ func (l *Limiter) Register(dataLen int) {
t := timerpool.Get(d)
<-t.C
timerpool.Put(t)
limiterThrottleEventsTotal.Inc()
}
l.budget += limit
l.deadline = time.Now().Add(time.Second)
}
l.budget -= int64(dataLen)
limiterBytesProcessed.Add(dataLen)
}
var (
limiterBytesProcessed = metrics.NewCounter(`vmctl_limiter_bytes_processed_total`)
limiterThrottleEventsTotal = metrics.NewCounter(`vmctl_limiter_throttle_events_total`)
)

View File

@@ -2,6 +2,7 @@ package main
import (
"context"
"flag"
"fmt"
"log"
"net/http"
@@ -19,11 +20,14 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/native"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/thanos"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
@@ -41,11 +45,20 @@ func main() {
ctx, cancelCtx := context.WithCancel(context.Background())
start := time.Now()
beforeFn := func(c *cli.Context) error {
flag.Parse()
logger.Init()
isSilent = c.Bool(globalSilent)
if c.Bool(globalDisableProgressBar) {
barpool.Disable(true)
}
netutil.EnableIPv6()
pushmetrics.InitWith(&pushmetrics.Config{
URLs: c.StringSlice(globalPushMetricsURL),
Interval: c.Duration(globalPushMetricsInterval),
ExtraLabels: c.StringSlice(globalPushExtraLabels),
DisableCompression: c.Bool(globalPushDisableCompression),
Headers: c.StringSlice(globalPushHeaders),
})
return nil
}
app := &cli.App{
@@ -273,6 +286,7 @@ func main() {
if err != nil {
return fmt.Errorf("failed to create prometheus client: %s", err)
}
pp := prometheusProcessor{
cl: cl,
im: importer,
@@ -282,6 +296,59 @@ func main() {
return pp.run(ctx)
},
},
{
Name: "thanos",
Usage: "Migrate time series from Thanos blocks (supports raw and downsampled data)",
Flags: mergeFlags(globalFlags, thanosFlags, vmFlags),
Before: beforeFn,
Action: func(c *cli.Context) error {
fmt.Println("Thanos import mode")
vmCfg, err := initConfigVM(c)
if err != nil {
return fmt.Errorf("failed to init VM configuration: %s", err)
}
importer, err = vm.NewImporter(ctx, vmCfg)
if err != nil {
return fmt.Errorf("failed to create VM importer: %s", err)
}
thanosCfg := thanos.Config{
Snapshot: c.String(thanosSnapshot),
Filter: thanos.Filter{
TimeMin: c.String(thanosFilterTimeStart),
TimeMax: c.String(thanosFilterTimeEnd),
Label: c.String(thanosFilterLabel),
LabelValue: c.String(thanosFilterLabelValue),
},
}
cl, err := thanos.NewClient(thanosCfg)
if err != nil {
return fmt.Errorf("failed to create thanos client: %s", err)
}
var aggrTypes []thanos.AggrType
if aggrTypesStr := c.StringSlice(thanosAggrTypes); len(aggrTypesStr) > 0 {
for _, typeStr := range aggrTypesStr {
aggrType, err := thanos.ParseAggrType(typeStr)
if err != nil {
return fmt.Errorf("failed to parse aggregate type %q: %s", typeStr, err)
}
aggrTypes = append(aggrTypes, aggrType)
}
}
tp := thanosProcessor{
cl: cl,
im: importer,
cc: c.Int(thanosConcurrency),
isVerbose: c.Bool(globalVerbose),
aggrTypes: aggrTypes,
}
return tp.run(ctx)
},
},
{
Name: "vm-native",
Usage: "Migrate time series between VictoriaMetrics installations",
@@ -451,6 +518,7 @@ func main() {
log.Fatalln(err)
}
log.Printf("Total time: %v", time.Since(start))
pushmetrics.StopAndPush()
}
func initConfigVM(c *cli.Context) (vm.Config, error) {

View File

@@ -8,6 +8,8 @@ import (
"net/http"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/auth"
)
@@ -36,12 +38,15 @@ type Response struct {
// Explore finds metric names by provided filter from api/v1/label/__name__/values
func (c *Client) Explore(ctx context.Context, f Filter, tenantID string, start, end time.Time) ([]string, error) {
startTime := time.Now()
exploreRequestsTotal.Inc()
url := fmt.Sprintf("%s/%s", c.Addr, nativeMetricNamesAddr)
if tenantID != "" {
url = fmt.Sprintf("%s/select/%s/prometheus/%s", c.Addr, tenantID, nativeMetricNamesAddr)
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
exploreRequestsErrorsTotal.Inc()
return nil, fmt.Errorf("cannot create request to %q: %s", url, err)
}
@@ -53,37 +58,53 @@ func (c *Client) Explore(ctx context.Context, f Filter, tenantID string, start,
resp, err := c.do(req, http.StatusOK)
if err != nil {
exploreRequestsErrorsTotal.Inc()
exploreDuration.UpdateDuration(startTime)
return nil, fmt.Errorf("series request failed: %s", err)
}
var response Response
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
exploreRequestsErrorsTotal.Inc()
exploreDuration.UpdateDuration(startTime)
return nil, fmt.Errorf("cannot decode series response: %s", err)
}
exploreDuration.UpdateDuration(startTime)
return response.MetricNames, resp.Body.Close()
}
// ImportPipe uses pipe reader in request to process data
func (c *Client) ImportPipe(ctx context.Context, dstURL string, pr *io.PipeReader) error {
startTime := time.Now()
importRequestsTotal.Inc()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, dstURL, pr)
if err != nil {
importRequestsErrorsTotal.Inc()
return fmt.Errorf("cannot create import request to %q: %s", c.Addr, err)
}
importResp, err := c.do(req, http.StatusNoContent)
if err != nil {
importRequestsErrorsTotal.Inc()
importDuration.UpdateDuration(startTime)
return fmt.Errorf("import request failed: %s", err)
}
if err := importResp.Body.Close(); err != nil {
importRequestsErrorsTotal.Inc()
importDuration.UpdateDuration(startTime)
return fmt.Errorf("cannot close import response body: %s", err)
}
importDuration.UpdateDuration(startTime)
return nil
}
// ExportPipe makes request by provided filter and return io.ReadCloser which can be used to get data
func (c *Client) ExportPipe(ctx context.Context, url string, f Filter) (io.ReadCloser, error) {
startTime := time.Now()
exportRequestsTotal.Inc()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
exportRequestsErrorsTotal.Inc()
return nil, fmt.Errorf("cannot create request to %q: %s", c.Addr, err)
}
@@ -102,8 +123,11 @@ func (c *Client) ExportPipe(ctx context.Context, url string, f Filter) (io.ReadC
resp, err := c.do(req, http.StatusOK)
if err != nil {
exportRequestsErrorsTotal.Inc()
exportDuration.UpdateDuration(startTime)
return nil, fmt.Errorf("export request failed: %w", err)
}
exportDuration.UpdateDuration(startTime)
return resp.Body, nil
}
@@ -162,3 +186,16 @@ func (c *Client) do(req *http.Request, expSC int) (*http.Response, error) {
}
return resp, err
}
var (
importRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="import"}`)
exportRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="export"}`)
exploreRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="explore"}`)
importRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="import"}`)
exportRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="export"}`)
exploreRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="explore"}`)
importDuration = metrics.NewHistogram(`vmctl_vm_native_import_duration_seconds`)
exportDuration = metrics.NewHistogram(`vmctl_vm_native_export_duration_seconds`)
exploreDuration = metrics.NewHistogram(`vmctl_vm_native_explore_duration_seconds`)
)

View File

@@ -7,9 +7,11 @@ import (
"sync"
"time"
vmetrics "github.com/VictoriaMetrics/metrics"
"github.com/cheggaaa/pb/v3"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/cheggaaa/pb/v3"
)
type otsdbProcessor struct {
@@ -57,6 +59,7 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
if !prompt(ctx, question) {
return nil
}
op.im.ResetStats()
var startTime int64
if op.oc.HardTS != 0 {
@@ -84,63 +87,44 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
seriesCh := make(chan queryObj, op.otsdbcc)
errCh := make(chan error)
// we're going to make serieslist * queryRanges queries, so we should represent that in the progress bar
otsdbSeriesTotal.Add(len(serieslist) * queryRanges)
bar := pb.StartNew(len(serieslist) * queryRanges)
defer func(bar *pb.ProgressBar) {
bar.Finish()
}(bar)
var wg sync.WaitGroup
wg.Add(op.otsdbcc)
for i := 0; i < op.otsdbcc; i++ {
go func() {
defer wg.Done()
for range op.otsdbcc {
wg.Go(func() {
for s := range seriesCh {
if err := op.do(s); err != nil {
otsdbErrorsTotal.Inc()
errCh <- fmt.Errorf("couldn't retrieve series for %s : %s", metric, err)
return
}
otsdbSeriesProcessed.Inc()
bar.Increment()
}
}()
})
}
/*
Loop through all series for this metric, processing all retentions and time ranges
requested. This loop is our primary "collect data from OpenTSDB loop" and should
be async, sending data to VictoriaMetrics over time.
runErr := op.sendQueries(ctx, serieslist, seriesCh, errCh, startTime)
The idea with having the select at the inner-most loop is to ensure quick
short-circuiting on error.
*/
for _, series := range serieslist {
for _, rt := range op.oc.Retentions {
for _, tr := range rt.QueryRanges {
select {
case otsdbErr := <-errCh:
return fmt.Errorf("opentsdb error: %s", otsdbErr)
case vmErr := <-op.im.Errors():
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
case seriesCh <- queryObj{
Tr: tr, StartTime: startTime,
Series: series, Rt: opentsdb.RetentionMeta{
FirstOrder: rt.FirstOrder, SecondOrder: rt.SecondOrder, AggTime: rt.AggTime}}:
}
}
}
}
// Drain channels per metric
// Always drain channels and wait for workers to prevent goroutine leaks
close(seriesCh)
wg.Wait()
close(errCh)
// check for any lingering errors on the query side
for otsdbErr := range errCh {
return fmt.Errorf("import process failed: \n%s", otsdbErr)
if runErr == nil {
runErr = fmt.Errorf("import process failed: \n%s", otsdbErr)
}
}
bar.Finish()
if runErr != nil {
return runErr
}
log.Print(op.im.Stats())
}
op.im.Close()
for vmErr := range op.im.Errors() {
if vmErr.Err != nil {
otsdbErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
}
}
@@ -149,6 +133,34 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
return nil
}
// sendQueries iterates over all series and retention ranges, sending queries to workers.
// It returns early if ctx is canceled or an error is received.
func (op *otsdbProcessor) sendQueries(ctx context.Context, serieslist []opentsdb.Meta, seriesCh chan<- queryObj, errCh <-chan error, startTime int64) error {
for _, series := range serieslist {
for _, rt := range op.oc.Retentions {
for _, tr := range rt.QueryRanges {
select {
case <-ctx.Done():
return fmt.Errorf("context canceled: %s", ctx.Err())
case otsdbErr := <-errCh:
otsdbErrorsTotal.Inc()
return fmt.Errorf("opentsdb error: %s", otsdbErr)
case vmErr := <-op.im.Errors():
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
case seriesCh <- queryObj{
Tr: tr, StartTime: startTime,
Series: series, Rt: opentsdb.RetentionMeta{
FirstOrder: rt.FirstOrder,
SecondOrder: rt.SecondOrder,
AggTime: rt.AggTime,
}}:
}
}
}
}
return nil
}
func (op *otsdbProcessor) do(s queryObj) error {
start := s.StartTime - s.Tr.Start
end := s.StartTime - s.Tr.End
@@ -157,6 +169,7 @@ func (op *otsdbProcessor) do(s queryObj) error {
return fmt.Errorf("failed to collect data for %v in %v:%v :: %v", s.Series, s.Rt, s.Tr, err)
}
if len(data.Timestamps) < 1 || len(data.Values) < 1 {
log.Printf("no data found for %v in %v:%v...skipping", s.Series, s.Rt, s.Tr)
return nil
}
labels := make([]vm.LabelPair, 0, len(data.Tags))
@@ -171,3 +184,9 @@ func (op *otsdbProcessor) do(s queryObj) error {
}
return op.im.Input(&ts)
}
var (
otsdbSeriesTotal = vmetrics.NewCounter(`vmctl_opentsdb_migration_series_total`)
otsdbSeriesProcessed = vmetrics.NewCounter(`vmctl_opentsdb_migration_series_processed`)
otsdbErrorsTotal = vmetrics.NewCounter(`vmctl_opentsdb_migration_errors_total`)
)

View File

@@ -108,10 +108,10 @@ func (c Client) FindMetrics(q string) ([]string, error) {
if err != nil {
return nil, fmt.Errorf("failed to send GET request to %q: %s", q, err)
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad return from OpenTSDB: %q: %v", resp.StatusCode, resp)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("could not retrieve metric data from %q: %s", q, err)
@@ -130,12 +130,12 @@ func (c Client) FindSeries(metric string) ([]Meta, error) {
q := fmt.Sprintf("%s/api/search/lookup?m=%s&limit=%d", c.Addr, metric, c.Limit)
resp, err := c.c.Get(q)
if err != nil {
return nil, fmt.Errorf("failed to set GET request to %q: %s", q, err)
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad return from OpenTSDB: %q: %v", resp.StatusCode, resp)
return nil, fmt.Errorf("failed to send GET request to %q: %s", q, err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("could not retrieve series data from %q: %s", q, err)
@@ -185,6 +185,7 @@ func (c Client) GetData(series Meta, rt RetentionMeta, start int64, end int64, m
if err != nil {
return Metric{}, fmt.Errorf("failed to send GET request to %q: %s", q, err)
}
defer func() { _ = resp.Body.Close() }()
/*
There are three potential failures here, none of which should kill the entire
migration run:
@@ -196,7 +197,6 @@ func (c Client) GetData(series Meta, rt RetentionMeta, start int64, end int64, m
log.Printf("bad response code from OpenTSDB query %v for %q...skipping", resp.StatusCode, q)
return Metric{}, nil
}
defer func() { _ = resp.Body.Close() }()
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Println("couldn't read response body from OpenTSDB query...skipping")
@@ -239,27 +239,20 @@ func (c Client) GetData(series Meta, rt RetentionMeta, start int64, end int64, m
In all "bad" cases, we don't end the migration, we just don't process that particular message
*/
if len(output) < 1 {
// no results returned...return an empty object without error
return Metric{}, nil
}
if len(output) > 1 {
// multiple series returned for a single query. We can't process this right, so...
return Metric{}, nil
return Metric{}, fmt.Errorf("unexpected number of series returned: %d for query %q; expected 1", len(output), q)
}
if len(output[0].AggregateTags) > 0 {
// This failure means we've suppressed potential series somehow...
return Metric{}, nil
return Metric{}, fmt.Errorf("aggregate tags %v present in response for query %q; series may be suppressed", output[0].AggregateTags, q)
}
data := Metric{}
data.Metric = output[0].Metric
data.Tags = output[0].Tags
/*
We evaluate data for correctness before formatting the actual values
to skip a little bit of time if the series has invalid formatting
*/
data, err = modifyData(data, c.Normalize)
if err != nil {
return Metric{}, nil
return Metric{}, fmt.Errorf("failed to convert metric data for query %q: %w", q, err)
}
/*

View File

@@ -32,7 +32,7 @@ func convertDuration(duration string) (time.Duration, error) {
var err error
var timeValue int
if strings.HasSuffix(duration, "y") {
timeValue, err = strconv.Atoi(strings.Trim(duration, "y"))
timeValue, err = strconv.Atoi(strings.TrimSuffix(duration, "y"))
if err != nil {
return 0, fmt.Errorf("invalid time range: %q", duration)
}
@@ -42,7 +42,7 @@ func convertDuration(duration string) (time.Duration, error) {
return 0, fmt.Errorf("invalid time range: %q", duration)
}
} else if strings.HasSuffix(duration, "w") {
timeValue, err = strconv.Atoi(strings.Trim(duration, "w"))
timeValue, err = strconv.Atoi(strings.TrimSuffix(duration, "w"))
if err != nil {
return 0, fmt.Errorf("invalid time range: %q", duration)
}
@@ -52,7 +52,7 @@ func convertDuration(duration string) (time.Duration, error) {
return 0, fmt.Errorf("invalid time range: %q", duration)
}
} else if strings.HasSuffix(duration, "d") {
timeValue, err = strconv.Atoi(strings.Trim(duration, "d"))
timeValue, err = strconv.Atoi(strings.TrimSuffix(duration, "d"))
if err != nil {
return 0, fmt.Errorf("invalid time range: %q", duration)
}
@@ -95,6 +95,9 @@ func convertRetention(retention string, offset int64, msecTime bool) (Retention,
if !msecTime {
queryLength = queryLength / 1000
}
if queryLength <= 0 {
return Retention{}, fmt.Errorf("ttl %q resolves to non-positive query range %d; use a larger duration", chunks[2], queryLength)
}
queryRange := queryLength
// bump by the offset so we don't look at empty ranges any time offset > ttl
queryLength += offset
@@ -138,16 +141,29 @@ func convertRetention(retention string, offset int64, msecTime bool) (Retention,
2. we discover the actual size of each "chunk"
This is second division step
*/
querySize = int64(queryRange / (queryRange / (rowLength * 4)))
divisor := queryRange / (rowLength * 4)
if divisor == 0 {
querySize = queryRange
} else {
querySize = queryRange / divisor
}
} else {
/*
Unless the aggTime (how long a range of data we're requesting per individual point)
is greater than the row size. Then we'll need to use that to determine
how big each individual query should be
*/
querySize = int64(queryRange / (queryRange / (aggTime * 4)))
divisor := queryRange / (aggTime * 4)
if divisor == 0 {
querySize = queryRange
} else {
querySize = queryRange / divisor
}
}
if querySize <= 0 {
return Retention{}, fmt.Errorf("computed non-positive querySize=%d for retention %q; check parameters", querySize, retention)
}
var timeChunks []TimeRange
var i int64
for i = offset; i <= queryLength; i = i + querySize {

View File

@@ -11,6 +11,8 @@ import (
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
@@ -113,6 +115,7 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
}
func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
promBlocksTotal.Add(len(blocks))
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing blocks"), len(blocks))
if err := barpool.Start(); err != nil {
return err
@@ -124,18 +127,18 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
pp.im.ResetStats()
var wg sync.WaitGroup
wg.Add(pp.cc)
for i := 0; i < pp.cc; i++ {
go func() {
defer wg.Done()
for range pp.cc {
wg.Go(func() {
for br := range blockReadersCh {
if err := pp.do(br); err != nil {
promErrorsTotal.Inc()
errCh <- fmt.Errorf("read failed for block %q: %s", br.Meta().ULID, err)
return
}
promBlocksProcessed.Inc()
bar.Increment()
}
}()
})
}
// any error breaks the import
for _, br := range blocks {
@@ -145,6 +148,7 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
return fmt.Errorf("prometheus error: %s", promErr)
case vmErr := <-pp.im.Errors():
close(blockReadersCh)
promErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, pp.isVerbose))
case blockReadersCh <- br:
}
@@ -158,6 +162,7 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
// drain import errors channel
for vmErr := range pp.im.Errors() {
if vmErr.Err != nil {
promErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, pp.isVerbose))
}
}
@@ -167,3 +172,9 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
return nil
}
var (
promBlocksTotal = metrics.NewCounter(`vmctl_prometheus_migration_blocks_total`)
promBlocksProcessed = metrics.NewCounter(`vmctl_prometheus_migration_blocks_processed`)
promErrorsTotal = metrics.NewCounter(`vmctl_prometheus_migration_errors_total`)
)

View File

@@ -7,6 +7,8 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/stepper"
@@ -51,6 +53,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
return nil
}
remoteReadRangesTotal.Add(len(ranges))
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing ranges"), len(ranges))
if err := barpool.Start(); err != nil {
return err
@@ -66,18 +69,18 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
errCh := make(chan error)
var wg sync.WaitGroup
wg.Add(rrp.cc)
for i := 0; i < rrp.cc; i++ {
go func() {
defer wg.Done()
for range rrp.cc {
wg.Go(func() {
for r := range rangeC {
if err := rrp.do(ctx, r); err != nil {
remoteReadErrorsTotal.Inc()
errCh <- fmt.Errorf("request failed for: %s", err)
return
}
remoteReadRangesProcessed.Inc()
bar.Increment()
}
}()
})
}
for _, r := range ranges {
@@ -85,6 +88,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
case infErr := <-errCh:
return fmt.Errorf("remote read error: %s", infErr)
case vmErr := <-rrp.dst.Errors():
remoteReadErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, rrp.isVerbose))
case rangeC <- &remoteread.Filter{
StartTimestampMs: r[0].UnixMilli(),
@@ -100,6 +104,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
// drain import errors channel
for vmErr := range rrp.dst.Errors() {
if vmErr.Err != nil {
remoteReadErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, rrp.isVerbose))
}
}
@@ -120,3 +125,9 @@ func (rrp *remoteReadProcessor) do(ctx context.Context, filter *remoteread.Filte
return nil
})
}
var (
remoteReadRangesTotal = metrics.NewCounter(`vmctl_remote_read_migration_ranges_total`)
remoteReadRangesProcessed = metrics.NewCounter(`vmctl_remote_read_migration_ranges_processed`)
remoteReadErrorsTotal = metrics.NewCounter(`vmctl_remote_read_migration_errors_total`)
)

View File

@@ -0,0 +1,233 @@
package thanos
import (
"encoding/binary"
"errors"
"fmt"
"github.com/prometheus/prometheus/tsdb/chunkenc"
)
// ChunkEncAggr is the top level encoding byte for the AggrChunk.
// It is defined by Thanos as 0xff to prevent collisions with Prometheus encodings.
const ChunkEncAggr = chunkenc.Encoding(0xff)
// AggrType represents an aggregation type in Thanos downsampled blocks.
type AggrType uint8
// AggrTypeNone indicates raw blocks with no aggregation.
// It is used as a sentinel to distinguish raw block processing from downsampled.
const AggrTypeNone AggrType = 255
// Valid aggregation types matching Thanos definitions.
const (
AggrCount AggrType = iota
AggrSum
AggrMin
AggrMax
AggrCounter
)
// AllAggrTypes contains all supported aggregation types.
var AllAggrTypes = []AggrType{AggrCount, AggrSum, AggrMin, AggrMax, AggrCounter}
func (t AggrType) String() string {
switch t {
case AggrCount:
return "count"
case AggrSum:
return "sum"
case AggrMin:
return "min"
case AggrMax:
return "max"
case AggrCounter:
return "counter"
}
return "<unknown>"
}
// ParseAggrType parses aggregate type from string.
func ParseAggrType(s string) (AggrType, error) {
switch s {
case "count":
return AggrCount, nil
case "sum":
return AggrSum, nil
case "min":
return AggrMin, nil
case "max":
return AggrMax, nil
case "counter":
return AggrCounter, nil
}
return 0, fmt.Errorf("unknown aggregate type: %q", s)
}
// ErrAggrNotExist is returned if a requested aggregation is not present in an AggrChunk.
var ErrAggrNotExist = errors.New("aggregate does not exist")
// AggrChunk is a chunk that is composed of a set of aggregates for the same underlying data.
// Not all aggregates must be present.
// This is a read-only implementation for decoding Thanos downsampled blocks.
type AggrChunk []byte
// IsAggrChunk checks if the encoding byte indicates this is an AggrChunk.
func IsAggrChunk(enc chunkenc.Encoding) bool {
return enc == ChunkEncAggr
}
// Get returns the sub-chunk for the given aggregate type if it exists.
func (c AggrChunk) Get(t AggrType) (chunkenc.Chunk, error) {
b := c[:]
var x []byte
for i := AggrType(0); i <= t; i++ {
l, n := binary.Uvarint(b)
if n < 1 {
return nil, errors.New("invalid size: failed to read uvarint")
}
if l > uint64(len(b[n:])) || l+1 > uint64(len(b[n:])) {
if l > 0 {
return nil, errors.New("invalid size: not enough bytes")
}
}
b = b[n:]
// If length is set to zero explicitly, that means the aggregate is unset.
if l == 0 {
if i == t {
return nil, ErrAggrNotExist
}
continue
}
chunkLen := int(l) + 1
x = b[:chunkLen]
b = b[chunkLen:]
}
if len(x) == 0 {
return nil, ErrAggrNotExist
}
return chunkenc.FromData(chunkenc.Encoding(x[0]), x[1:])
}
// Encoding returns the encoding type for AggrChunk.
func (c AggrChunk) Encoding() chunkenc.Encoding {
return ChunkEncAggr
}
// errIterator wraps a nop iterator but reports an error via Err().
// It embeds chunkenc.Iterator to inherit all methods (including Seek)
// which avoids go vet stdmethods warning about Seek signature.
type errIterator struct {
chunkenc.Iterator
err error
}
// Err returns the underlying error.
func (it *errIterator) Err() error {
return it.err
}
// newAggrChunkIterator creates a new iterator for the specified aggregate type.
// If the aggregate is not present in the chunk (ErrAggrNotExist), a nop iterator
// is returned without error — the caller will simply see zero samples.
// Real decoding/corruption errors are reported via the iterator's Err() method.
func newAggrChunkIterator(data []byte, aggrType AggrType) chunkenc.Iterator {
chunk := AggrChunk(data)
subChunk, err := chunk.Get(aggrType)
if err != nil {
if errors.Is(err, ErrAggrNotExist) {
return chunkenc.NewNopIterator()
}
return &errIterator{
Iterator: chunkenc.NewNopIterator(),
err: err,
}
}
return subChunk.Iterator(nil)
}
// AggrChunkWrapper wraps AggrChunk to implement chunkenc.Chunk interface.
// It delegates iteration to a specific aggregate type.
type AggrChunkWrapper struct {
data []byte
aggrType AggrType
}
// NewAggrChunkWrapper creates a new AggrChunk wrapper for the specified aggregate type.
func NewAggrChunkWrapper(data []byte, aggrType AggrType) *AggrChunkWrapper {
return &AggrChunkWrapper{
data: data,
aggrType: aggrType,
}
}
// Bytes returns the underlying byte slice.
func (c *AggrChunkWrapper) Bytes() []byte {
return c.data
}
// Encoding returns the AggrChunk encoding.
func (c *AggrChunkWrapper) Encoding() chunkenc.Encoding {
return ChunkEncAggr
}
// Appender returns an error since AggrChunk is read-only.
func (c *AggrChunkWrapper) Appender() (chunkenc.Appender, error) {
return nil, errors.New("AggrChunk is read-only")
}
// Iterator returns an iterator for the specified aggregate type.
func (c *AggrChunkWrapper) Iterator(it chunkenc.Iterator) chunkenc.Iterator {
return newAggrChunkIterator(c.data, c.aggrType)
}
// NumSamples returns the number of samples in the aggregate.
func (c *AggrChunkWrapper) NumSamples() int {
chunk := AggrChunk(c.data)
subChunk, err := chunk.Get(c.aggrType)
if err != nil {
return 0
}
return subChunk.NumSamples()
}
// Compact is a no-op for read-only AggrChunk.
func (c *AggrChunkWrapper) Compact() {}
// Reset resets the chunk with new data.
func (c *AggrChunkWrapper) Reset(stream []byte) {
c.data = stream
}
// AggrChunkPool is a custom Pool that understands AggrChunk encoding (0xff).
// It delegates standard encodings to the default pool and handles AggrChunk specially.
type AggrChunkPool struct {
defaultPool chunkenc.Pool
aggrType AggrType
}
// NewAggrChunkPool creates a new pool that handles AggrChunk encoding.
func NewAggrChunkPool(aggrType AggrType) *AggrChunkPool {
return &AggrChunkPool{
defaultPool: chunkenc.NewPool(),
aggrType: aggrType,
}
}
// Get returns a chunk for the given encoding and data.
func (p *AggrChunkPool) Get(e chunkenc.Encoding, b []byte) (chunkenc.Chunk, error) {
if e == ChunkEncAggr {
return NewAggrChunkWrapper(b, p.aggrType), nil
}
return p.defaultPool.Get(e, b)
}
// Put returns a chunk to the pool.
func (p *AggrChunkPool) Put(c chunkenc.Chunk) error {
if c.Encoding() == ChunkEncAggr {
// AggrChunk wrappers are not pooled
return nil
}
return p.defaultPool.Put(c)
}

View File

@@ -0,0 +1,110 @@
package thanos
import (
"encoding/json"
"os"
"path/filepath"
)
// BlockMeta extends Prometheus BlockMeta with Thanos-specific fields.
type BlockMeta struct {
// Thanos-specific metadata
Thanos ThanosMeta `json:"thanos,omitempty"`
}
// ThanosMeta contains Thanos-specific block metadata.
type ThanosMeta struct {
// Labels are external labels identifying the producer.
Labels map[string]string `json:"labels,omitempty"`
// Downsample contains downsampling information.
Downsample ThanosDownsample `json:"downsample,omitempty"`
// Source indicates where the block came from.
Source string `json:"source,omitempty"`
// SegmentFiles contains list of segment files in the block.
SegmentFiles []string `json:"segment_files,omitempty"`
// Files contains metadata about files in the block.
Files []ThanosFile `json:"files,omitempty"`
}
// ThanosDownsample contains downsampling resolution info.
type ThanosDownsample struct {
// Resolution is the downsampling resolution in milliseconds.
// 0 means raw data (no downsampling).
// 300000 (5 minutes) or 3600000 (1 hour) for downsampled data.
Resolution int64 `json:"resolution"`
}
// ThanosFile contains metadata about a file in the block.
type ThanosFile struct {
RelPath string `json:"rel_path"`
SizeBytes int64 `json:"size_bytes,omitempty"`
}
// ResolutionLevel represents the downsampling resolution.
type ResolutionLevel int64
const (
// ResolutionRaw is for raw, non-downsampled data.
ResolutionRaw ResolutionLevel = 0
// Resolution5m is for 5-minute downsampled data (300000 ms).
Resolution5m ResolutionLevel = 300000
// Resolution1h is for 1-hour downsampled data (3600000 ms).
Resolution1h ResolutionLevel = 3600000
)
// String returns human-readable resolution string.
func (r ResolutionLevel) String() string {
switch r {
case ResolutionRaw:
return "raw"
case Resolution5m:
return "5m"
case Resolution1h:
return "1h"
default:
return "unknown"
}
}
// ReadBlockMeta reads Thanos-extended block metadata from meta.json.
func ReadBlockMeta(blockDir string) (*BlockMeta, error) {
metaPath := filepath.Join(blockDir, "meta.json")
data, err := os.ReadFile(metaPath)
if err != nil {
return nil, err
}
var meta BlockMeta
if err := json.Unmarshal(data, &meta); err != nil {
return nil, err
}
return &meta, nil
}
// IsDownsampled returns true if the block contains downsampled data.
func (m *BlockMeta) IsDownsampled() bool {
return m.Thanos.Downsample.Resolution > 0
}
// Resolution returns the block's downsampling resolution.
func (m *BlockMeta) Resolution() ResolutionLevel {
return ResolutionLevel(m.Thanos.Downsample.Resolution)
}
// ResolutionSuffix returns a suffix string for metric names based on resolution.
// For example: ":5m" or ":1h" for downsampled data, empty for raw data.
func (m *BlockMeta) ResolutionSuffix() string {
switch m.Resolution() {
case Resolution5m:
return ":5m"
case Resolution1h:
return ":1h"
default:
return ""
}
}

View File

@@ -0,0 +1,83 @@
package thanos
import (
"fmt"
"io"
"os"
"path/filepath"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/chunkenc"
)
// BlockInfo contains information about a block including Thanos metadata.
type BlockInfo struct {
Block tsdb.BlockReader
Resolution ResolutionLevel
IsThanos bool
// Closer releases the block's resources (file descriptors, mmap).
// Must be called only after all queriers on this block have been closed.
Closer io.Closer
}
// OpenBlocksWithInfo opens all blocks and returns them with their metadata.
// snapshotDir must be a snapshot directory containing block directories.
func OpenBlocksWithInfo(snapshotDir string, aggrType AggrType) ([]BlockInfo, error) {
entries, err := os.ReadDir(snapshotDir)
if err != nil {
return nil, fmt.Errorf("failed to read snapshot directory: %w", err)
}
var blocks []BlockInfo
for _, entry := range entries {
if !entry.IsDir() {
continue
}
blockDir := filepath.Join(snapshotDir, entry.Name())
metaPath := filepath.Join(blockDir, "meta.json")
// Check if this is a valid block directory (has meta.json)
if _, err := os.Stat(metaPath); os.IsNotExist(err) {
continue
}
meta, err := ReadBlockMeta(blockDir)
if err != nil {
CloseBlocks(blocks)
return nil, fmt.Errorf("failed to read Thanos metadata for block %s: %w", blockDir, err)
}
var pool chunkenc.Pool
if meta.IsDownsampled() {
// Use AggrChunkPool for downsampled blocks
pool = NewAggrChunkPool(aggrType)
}
block, err := tsdb.OpenBlock(nil, blockDir, pool, nil)
if err != nil {
// Close previously opened blocks before returning error
CloseBlocks(blocks)
return nil, fmt.Errorf("failed to open block %s: %w", blockDir, err)
}
blocks = append(blocks, BlockInfo{
Block: block,
Resolution: meta.Resolution(),
IsThanos: true,
Closer: block,
})
}
return blocks, nil
}
// CloseBlocks closes all blocks in the slice.
// Must be called only after all queriers on these blocks have been closed.
func CloseBlocks(blocks []BlockInfo) {
for _, bi := range blocks {
if bi.Closer != nil {
_ = bi.Closer.Close()
}
}
}

198
app/vmctl/thanos/client.go Normal file
View File

@@ -0,0 +1,198 @@
package thanos
import (
"context"
"fmt"
"time"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb"
)
// Config contains parameters for reading Thanos snapshots.
type Config struct {
Snapshot string
Filter Filter
}
// Filter contains configuration for filtering the timeseries.
type Filter struct {
TimeMin string
TimeMax string
Label string
LabelValue string
}
// Client reads Thanos snapshot blocks, including downsampled blocks with AggrChunk encoding.
type Client struct {
snapshotPath string
filter filter
statsPrinted bool
}
type filter struct {
min, max int64
label string
labelValue string
}
func (f filter) inRange(minV, maxV int64) bool {
fmin, fmax := f.min, f.max
if fmin == 0 {
fmin = minV
}
if fmax == 0 {
fmax = maxV
}
return minV <= fmax && fmin <= maxV
}
// NewClient creates a new Thanos snapshot client.
func NewClient(cfg Config) (*Client, error) {
minTime, maxTime, err := parseTime(cfg.Filter.TimeMin, cfg.Filter.TimeMax)
if err != nil {
return nil, fmt.Errorf("failed to parse time in filter: %s", err)
}
return &Client{
snapshotPath: cfg.Snapshot,
filter: filter{
min: minTime,
max: maxTime,
label: cfg.Filter.Label,
labelValue: cfg.Filter.LabelValue,
},
}, nil
}
// Explore fetches all available blocks from the snapshot with support for
// Thanos AggrChunk (downsampled blocks). It opens blocks with a custom pool
// that can decode AggrChunk encoding (0xff).
func (c *Client) Explore(aggrType AggrType) ([]BlockInfo, error) {
blockInfos, err := OpenBlocksWithInfo(c.snapshotPath, aggrType)
if err != nil {
return nil, fmt.Errorf("failed to open blocks: %w", err)
}
s := &Stats{
Filtered: c.filter.min != 0 || c.filter.max != 0 || c.filter.label != "",
Blocks: len(blockInfos),
}
var blocksToImport []BlockInfo
for _, bi := range blockInfos {
meta := bi.Block.Meta()
if s.MinTime == 0 || meta.MinTime < s.MinTime {
s.MinTime = meta.MinTime
}
if s.MaxTime == 0 || meta.MaxTime > s.MaxTime {
s.MaxTime = meta.MaxTime
}
if !c.filter.inRange(meta.MinTime, meta.MaxTime) {
s.SkippedBlocks++
if bi.Closer != nil {
_ = bi.Closer.Close()
}
continue
}
s.Samples += meta.Stats.NumSamples
s.Series += meta.Stats.NumSeries
blocksToImport = append(blocksToImport, bi)
}
if !c.statsPrinted {
fmt.Println(s)
c.statsPrinted = true
}
return blocksToImport, nil
}
// querierSeriesSet wraps a SeriesSet and its underlying Querier, ensuring
// the querier is closed once the SeriesSet has been fully consumed.
// This releases the querier's read reference on the block, which is required
// for Block.Close() to complete without hanging.
type querierSeriesSet struct {
storage.SeriesSet
q storage.Querier
closed bool
}
// Next advances the iterator. When the underlying SeriesSet is exhausted,
// it closes the querier to release resources.
func (s *querierSeriesSet) Next() bool {
if s.SeriesSet.Next() {
return true
}
if !s.closed {
_ = s.q.Close()
s.closed = true
}
return false
}
// Close explicitly closes the underlying querier.
// This must be called if iteration is stopped early (before Next returns false)
// to release block read references and prevent Block.Close() from hanging.
func (s *querierSeriesSet) Close() {
if !s.closed {
_ = s.q.Close()
s.closed = true
}
}
// ClosableSeriesSet extends storage.SeriesSet with a Close method for explicit cleanup.
type ClosableSeriesSet interface {
storage.SeriesSet
Close()
}
// Read reads the given BlockInfo according to configured time and label filters.
// The returned ClosableSeriesSet automatically closes the underlying querier when fully consumed,
// but Close() should be called explicitly (e.g., via defer) to handle early returns.
func (c *Client) Read(bi BlockInfo) (ClosableSeriesSet, error) {
minTime, maxTime := bi.Block.Meta().MinTime, bi.Block.Meta().MaxTime
if c.filter.min != 0 {
minTime = c.filter.min
}
if c.filter.max != 0 {
maxTime = c.filter.max
}
q, err := tsdb.NewBlockQuerier(bi.Block, minTime, maxTime)
if err != nil {
return nil, err
}
ss := q.Select(
context.Background(),
false,
nil,
labels.MustNewMatcher(labels.MatchRegexp, c.filter.label, c.filter.labelValue),
)
return &querierSeriesSet{
SeriesSet: ss,
q: q,
}, nil
}
func parseTime(start, end string) (int64, int64, error) {
var s, e int64
if start == "" && end == "" {
return 0, 0, nil
}
if start != "" {
v, err := time.Parse(time.RFC3339, start)
if err != nil {
return 0, 0, fmt.Errorf("failed to parse %q: %s", start, err)
}
s = v.UnixNano() / int64(time.Millisecond)
}
if end != "" {
v, err := time.Parse(time.RFC3339, end)
if err != nil {
return 0, 0, fmt.Errorf("failed to parse %q: %s", end, err)
}
e = v.UnixNano() / int64(time.Millisecond)
}
return s, e, nil
}

Some files were not shown because too many files have changed in this diff Show More