mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-07-01 14:44:35 +03:00
Compare commits
3 Commits
testonly-m
...
RequestErr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2edf1729d3 | ||
|
|
8e5ca7e9b0 | ||
|
|
119ba0fb5b |
@@ -18,7 +18,7 @@ groups:
|
||||
concurrency: 2
|
||||
rules:
|
||||
- alert: RequestErrorsToAPI
|
||||
expr: increase(vm_http_request_errors_total[5m]) > 0
|
||||
expr: increase(vm_http_request_errors_total{path=~".+"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -37,4 +37,4 @@ groups:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=67&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
|
||||
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
|
||||
Worth to check logs for specific error messages."
|
||||
Worth to check logs for specific error messages."
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand/v2"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/textproto"
|
||||
@@ -31,6 +32,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -206,6 +208,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
|
||||
invalidAuthTokenRequests.Inc()
|
||||
slowdownUnauthorizedResponse(r)
|
||||
if *logInvalidAuthTokens {
|
||||
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
@@ -889,3 +892,20 @@ func debugInfo(u *url.URL, r *http.Request) string {
|
||||
fmt.Fprint(s, ")")
|
||||
return s.String()
|
||||
}
|
||||
|
||||
// slowdownUnauthorizedResponse adds a random delay in the [2..3] seconds range before returning an unauthorized response.
|
||||
// This reduces the effectiveness of brute-force.
|
||||
//
|
||||
// Recommended by OWASP Top10:
|
||||
// https://owasp.org/Top10/2025/A07_2025-Authentication_Failures
|
||||
func slowdownUnauthorizedResponse(r *http.Request) {
|
||||
|
||||
d := 2*time.Second + time.Duration(rand.IntN(1000))*time.Millisecond
|
||||
t := timerpool.Get(d)
|
||||
|
||||
select {
|
||||
case <-t.C:
|
||||
case <-r.Context().Done():
|
||||
}
|
||||
timerpool.Put(t)
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ groups:
|
||||
Consider to limit the ingestion rate, decrease retention or scale the disk space if possible."
|
||||
|
||||
- alert: RequestErrorsToAPI
|
||||
expr: increase(vm_http_request_errors_total[5m]) > 0
|
||||
expr: increase(vm_http_request_errors_total{path=~".+"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -207,9 +207,9 @@ groups:
|
||||
annotations:
|
||||
summary: "IndexDB skipped registering items during data ingestion with reason={{ $labels.reason }}."
|
||||
description: |
|
||||
VictoriaMetrics could skip registering new timeseries during ingestion if they fail the validation process.
|
||||
For example, `reason=too_long_item` means that time series cannot exceed 64KB. Please, reduce the number
|
||||
of labels or label values for such series. Or enforce these limits via `-maxLabelsPerTimeseries` and
|
||||
VictoriaMetrics could skip registering new timeseries during ingestion if they fail the validation process.
|
||||
For example, `reason=too_long_item` means that time series cannot exceed 64KB. Please, reduce the number
|
||||
of labels or label values for such series. Or enforce these limits via `-maxLabelsPerTimeseries` and
|
||||
`-maxLabelValueLen` command-line flags.
|
||||
|
||||
- alert: TooManyTSIDMisses
|
||||
@@ -223,4 +223,4 @@ groups:
|
||||
Unexpected TSID misses for \"{{ $labels.job }}\" ({{ $labels.instance }}) for the last 15 minutes.
|
||||
If this happens after unclean shutdown of VictoriaMetrics process (via \"kill -9\", OOM or power off),
|
||||
then this is OK - the alert must go away in a few minutes after the restart.
|
||||
Otherwise this may point to the corruption of index data.
|
||||
Otherwise this may point to the corruption of index data.
|
||||
|
||||
@@ -75,7 +75,7 @@ groups:
|
||||
Consider to limit the ingestion rate, decrease retention or scale the disk space if possible."
|
||||
|
||||
- alert: RequestErrorsToAPI
|
||||
expr: increase(vm_http_request_errors_total[5m]) > 0
|
||||
expr: increase(vm_http_request_errors_total{path=~".+"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -173,9 +173,9 @@ groups:
|
||||
annotations:
|
||||
summary: "IndexDB skipped registering items during data ingestion with reason={{ $labels.reason }}."
|
||||
description: |
|
||||
VictoriaMetrics could skip registering new timeseries during ingestion if they fail the validation process.
|
||||
For example, `reason=too_long_item` means that time series cannot exceed 64KB. Please, reduce the number
|
||||
of labels or label values for such series. Or enforce these limits via `-maxLabelsPerTimeseries` and
|
||||
VictoriaMetrics could skip registering new timeseries during ingestion if they fail the validation process.
|
||||
For example, `reason=too_long_item` means that time series cannot exceed 64KB. Please, reduce the number
|
||||
of labels or label values for such series. Or enforce these limits via `-maxLabelsPerTimeseries` and
|
||||
`-maxLabelValueLen` command-line flags.
|
||||
|
||||
- alert: TooManyTSIDMisses
|
||||
@@ -189,4 +189,4 @@ groups:
|
||||
Unexpected TSID misses for \"{{ $labels.job }}\" ({{ $labels.instance }}) for the last 15 minutes.
|
||||
If this happens after unclean shutdown of VictoriaMetrics process (via \"kill -9\", OOM or power off),
|
||||
then this is OK - the alert must go away in a few minutes after the restart.
|
||||
Otherwise this may point to the corruption of index data.
|
||||
Otherwise this may point to the corruption of index data.
|
||||
|
||||
@@ -28,15 +28,16 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
* SECURITY: upgrade base docker image (Alpine) from 3.23.4 to 3.24.1. See [Alpine 3.24.1 release notes](https://www.alpinelinux.org/posts/Alpine-3.24.1-released.html).
|
||||
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): add `default_vm_access_claim` field into `jwt` section of auth config. It could be used at [JWT claim placeholders](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating), if `JWT` token doesn't have `vm_access` claim. See [#11054](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11054).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): reduces CPU usage by 10% at [sharding among remote storages](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages). See [#11113](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11113). Thanks to @bennf for contribution.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `optimize_repeated_binary_op_subexprs=1` query arg to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query) for executing binary operator sides sequentially when they share the same optimized aggregate rollup result expression. This allows the second side to reuse rollup result cache populated by the first side. See [#10575](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10575).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): prevent possible password brute-force attacks with an artificial 2-3 second delay as recommended by [OWASP](https://owasp.org/Top10/2025/A07_2025-Authentication_Failures). See [#11180](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11180).
|
||||
|
||||
* BUGFIX: all VictoriaMetrics components: cancel in-flight HTTP requests shortly before `-http.maxGracefulShutdownDuration` elapses during graceful shutdown, so they can drain and the shutdown completes cleanly within that window instead of timing out and exiting via `logger.Fatalf` -> `os.Exit`. This prevents skipping the storage flush and losing in-memory data when long-lived requests are in flight (such as VictoriaLogs live tailing). See [#1502](https://github.com/VictoriaMetrics/VictoriaLogs/issues/1502).
|
||||
* BUGFIX: `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fixes unexpected rare rerouting. See [#11162](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11162).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): propagate cache reset operation to `selectNode` when `/internal/resetRollupResultCache` is called. Previously, the propagation only happened when the `delete_series` API was called. See [#11112](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11112).
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix possible unexpected increases in `rate_avg` and `rate_sum` if an out-of-order sample is ingested after the previous flush. See [#11140](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11140).
|
||||
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): add `default_vm_access_claim` field into `jwt` section of auth config. It could be used at [JWT claim placeholders](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating), if `JWT` token doesn't have `vm_access` claim. See [#11054](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11054).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): reduces CPU usage by 10% at [sharding among remote storages](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages). See [#11113](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11113). Thanks to @bennf for contribution.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `optimize_repeated_binary_op_subexprs=1` query arg to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query) for executing binary operator sides sequentially when they share the same optimized aggregate rollup result expression. This allows the second side to reuse rollup result cache populated by the first side. See [#10575](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10575).
|
||||
|
||||
## [v1.146.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.146.0)
|
||||
|
||||
Released at 2026-06-22
|
||||
|
||||
Reference in New Issue
Block a user