mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-16 23:33:11 +03:00
Compare commits
1 Commits
fix/reset-
...
cluster
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb8a6c9902 |
@@ -263,7 +263,8 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if !httpserver.CheckAuthFlag(w, r, resetCacheAuthKey) {
|
||||
return true
|
||||
}
|
||||
return prometheus.ResetRollupResultCacheHandler(w, r)
|
||||
promql.ResetRollupResultCache()
|
||||
return true
|
||||
}
|
||||
if path == "/admin/tenants" {
|
||||
tenantsRequests.Inc()
|
||||
|
||||
@@ -51,8 +51,8 @@ var (
|
||||
"If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored")
|
||||
maxStepForPointsAdjustment = flag.Duration("search.maxStepForPointsAdjustment", time.Minute, "The maximum step when /api/v1/query_range handler adjusts "+
|
||||
"points with timestamps closer than -search.latencyOffset to the current time. The adjustment is needed because such points may contain incomplete data")
|
||||
selectNodes = flagutil.NewArrayString("selectNode", "A list of vmselect node addresses to propagate the '/internal/resetRollupResultCache' call with 'propagate=1' argument. "+
|
||||
"If this flag or the 'propagate' argument isn't set, then cache need to be purged from each vmselect individually. "+
|
||||
selectNodes = flagutil.NewArrayString("selectNode", "A list of vmselect node addresses to propagate the '/internal/resetRollupResultCache' call. "+
|
||||
"If this flag isn't set, then cache need to be purged from each vmselect individually. "+
|
||||
"Comma-separated addresses of vmselect nodes; usage: -selectNode=vmselect-host1,...,vmselect-hostN")
|
||||
|
||||
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. "+
|
||||
@@ -562,7 +562,7 @@ func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error {
|
||||
// Reset rollup result cache on all the vmselect nodes,
|
||||
// since the cache may contain deleted data.
|
||||
// TODO: reset only cache for (account, project)
|
||||
resetRollupResultCachesAndPropagate()
|
||||
resetRollupResultCaches()
|
||||
}
|
||||
logger.Infof("/api/v1/admin/tsdb/delete_series has been called for %q. Deleted %d series.", sq.FiltersString(), deletedCount)
|
||||
return nil
|
||||
@@ -570,30 +570,11 @@ func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error {
|
||||
|
||||
var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/admin/tsdb/delete_series"}`)
|
||||
|
||||
// ResetRollupResultCacheHandler handle request for `/internal/resetRollupResultCache` API.
|
||||
// It propagates the request if `propagate` argument is set.
|
||||
func ResetRollupResultCacheHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
// check if this is a propagated request from another vmselect, by propagate argument.
|
||||
// - if yes: simply execute and return.
|
||||
propagate := httputil.GetBool(r, "propagate")
|
||||
if !propagate {
|
||||
resetRollupResultCaches()
|
||||
return true
|
||||
}
|
||||
// - if no: it's manual request and need to propagate to other vmselect(s).
|
||||
resetRollupResultCachesAndPropagate()
|
||||
return true
|
||||
}
|
||||
|
||||
func resetRollupResultCaches() {
|
||||
resetRollupResultCacheCalls.Inc()
|
||||
// Reset local cache before checking whether selectNodes list is empty.
|
||||
// This guarantees that at least local cache is reset if selectNodes list is empty.
|
||||
promql.ResetRollupResultCache()
|
||||
}
|
||||
|
||||
func resetRollupResultCachesAndPropagate() {
|
||||
resetRollupResultCaches()
|
||||
if len(*selectNodes) == 0 {
|
||||
logger.Warnf("missing -selectNode flag, cache reset request wont be propagated to the other vmselect nodes." +
|
||||
"This can be fixed by enumerating all the vmselect node addresses in `-selectNode` command line flag. " +
|
||||
|
||||
@@ -35,7 +35,6 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
+* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): propagate cache reset operation to `selectNode` when `/internal/resetRollupResultCache` is called. Previously, the propagation only happened when the `delete_series` API was called. See [#11112](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11112).
|
||||
|
||||
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)
|
||||
|
||||
|
||||
@@ -622,13 +622,11 @@ curl -Is http://localhost:8428/internal/resetRollupResultCache
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache?propagate=1
|
||||
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache
|
||||
```
|
||||
|
||||
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag when `propagate=1` argument is set.
|
||||
If this flag or the `propagate` argument isn't set, then cache need to be purged from each vmselect individually.
|
||||
|
||||
If `-search.resetCacheAuthKey` is set, automatic propagation will fail and must be triggered manually.
|
||||
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag. If this
|
||||
flag isn't set, then cache need to be purged from each vmselect individually.
|
||||
|
||||
### TCP and UDP
|
||||
|
||||
|
||||
@@ -811,10 +811,20 @@ func LogError(req *http.Request, errStr string) {
|
||||
logger.Errorf("uri: %s, remote address: %q: %s", uri, remoteAddr, errStr)
|
||||
}
|
||||
|
||||
// tlsErrorSkipLogger must be passed as the out argument to log.New only.
|
||||
// It suppresses noisy TCP probe errors on TLS connections to avoid log pollution.
|
||||
//
|
||||
// This cannot be implemented in net.Listener because a TLS handshake may take seconds,
|
||||
// during which no other connections can be accepted. Therefor, the implementation inside net.Listener can lead to DoS.
|
||||
// Once a connection is passed to the conn serve goroutine, there is no direct access to the handshake logic, so this indirect
|
||||
// approach is used instead.
|
||||
type tlsErrorSkipLogger struct{}
|
||||
|
||||
// Write filters out TLS handshake errors from health-check probes.
|
||||
// log.Logger guarantees that each complete message is delivered in a single Write call
|
||||
// and that calls are serialized, so we can safely inspect p for a "TLS handshake error".
|
||||
// See https://github.com/golang/go/blob/38e988efb4b8f5e73e887027f386a342c138b649/src/log/log.go#L53-L57
|
||||
func (*tlsErrorSkipLogger) Write(p []byte) (int, error) {
|
||||
// skip common health check errors produced by Kubernetes and other tools
|
||||
if bytes.Contains(p, []byte("TLS handshake error")) &&
|
||||
(bytes.Contains(p, []byte("EOF")) || bytes.Contains(p, []byte("connection reset by peer"))) {
|
||||
return len(p), nil
|
||||
|
||||
Reference in New Issue
Block a user