Compare commits

..

1 Commits

5 changed files with 19 additions and 30 deletions

View File

@@ -263,7 +263,8 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
if !httpserver.CheckAuthFlag(w, r, resetCacheAuthKey) {
return true
}
return prometheus.ResetRollupResultCacheHandler(w, r)
promql.ResetRollupResultCache()
return true
}
if path == "/admin/tenants" {
tenantsRequests.Inc()

View File

@@ -51,8 +51,8 @@ var (
"If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored")
maxStepForPointsAdjustment = flag.Duration("search.maxStepForPointsAdjustment", time.Minute, "The maximum step when /api/v1/query_range handler adjusts "+
"points with timestamps closer than -search.latencyOffset to the current time. The adjustment is needed because such points may contain incomplete data")
selectNodes = flagutil.NewArrayString("selectNode", "A list of vmselect node addresses to propagate the '/internal/resetRollupResultCache' call with 'propagate=1' argument. "+
"If this flag or the 'propagate' argument isn't set, then cache need to be purged from each vmselect individually. "+
selectNodes = flagutil.NewArrayString("selectNode", "A list of vmselect node addresses to propagate the '/internal/resetRollupResultCache' call. "+
"If this flag isn't set, then cache need to be purged from each vmselect individually. "+
"Comma-separated addresses of vmselect nodes; usage: -selectNode=vmselect-host1,...,vmselect-hostN")
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. "+
@@ -562,7 +562,7 @@ func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error {
// Reset rollup result cache on all the vmselect nodes,
// since the cache may contain deleted data.
// TODO: reset only cache for (account, project)
resetRollupResultCachesAndPropagate()
resetRollupResultCaches()
}
logger.Infof("/api/v1/admin/tsdb/delete_series has been called for %q. Deleted %d series.", sq.FiltersString(), deletedCount)
return nil
@@ -570,30 +570,11 @@ func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error {
var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/admin/tsdb/delete_series"}`)
// ResetRollupResultCacheHandler handle request for `/internal/resetRollupResultCache` API.
// It propagates the request if `propagate` argument is set.
func ResetRollupResultCacheHandler(w http.ResponseWriter, r *http.Request) bool {
// check if this is a propagated request from another vmselect, by propagate argument.
// - if yes: simply execute and return.
propagate := httputil.GetBool(r, "propagate")
if !propagate {
resetRollupResultCaches()
return true
}
// - if no: it's manual request and need to propagate to other vmselect(s).
resetRollupResultCachesAndPropagate()
return true
}
func resetRollupResultCaches() {
resetRollupResultCacheCalls.Inc()
// Reset local cache before checking whether selectNodes list is empty.
// This guarantees that at least local cache is reset if selectNodes list is empty.
promql.ResetRollupResultCache()
}
func resetRollupResultCachesAndPropagate() {
resetRollupResultCaches()
if len(*selectNodes) == 0 {
logger.Warnf("missing -selectNode flag, cache reset request wont be propagated to the other vmselect nodes." +
"This can be fixed by enumerating all the vmselect node addresses in `-selectNode` command line flag. " +

View File

@@ -35,7 +35,6 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
+* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): propagate cache reset operation to `selectNode` when `/internal/resetRollupResultCache` is called. Previously, the propagation only happened when the `delete_series` API was called. See [#11112](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11112).
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)

View File

@@ -622,13 +622,11 @@ curl -Is http://localhost:8428/internal/resetRollupResultCache
Cluster version of VictoriaMetrics:
```sh
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache?propagate=1
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache
```
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag when `propagate=1` argument is set.
If this flag or the `propagate` argument isn't set, then cache need to be purged from each vmselect individually.
If `-search.resetCacheAuthKey` is set, automatic propagation will fail and must be triggered manually.
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag. If this
flag isn't set, then cache need to be purged from each vmselect individually.
### TCP and UDP

View File

@@ -811,10 +811,20 @@ func LogError(req *http.Request, errStr string) {
logger.Errorf("uri: %s, remote address: %q: %s", uri, remoteAddr, errStr)
}
// tlsErrorSkipLogger must be passed as the out argument to log.New only.
// It suppresses noisy TCP probe errors on TLS connections to avoid log pollution.
//
// This cannot be implemented in net.Listener because a TLS handshake may take seconds,
// during which no other connections can be accepted. Therefor, the implementation inside net.Listener can lead to DoS.
// Once a connection is passed to the conn serve goroutine, there is no direct access to the handshake logic, so this indirect
// approach is used instead.
type tlsErrorSkipLogger struct{}
// Write filters out TLS handshake errors from health-check probes.
// log.Logger guarantees that each complete message is delivered in a single Write call
// and that calls are serialized, so we can safely inspect p for a "TLS handshake error".
// See https://github.com/golang/go/blob/38e988efb4b8f5e73e887027f386a342c138b649/src/log/log.go#L53-L57
func (*tlsErrorSkipLogger) Write(p []byte) (int, error) {
// skip common health check errors produced by Kubernetes and other tools
if bytes.Contains(p, []byte("TLS handshake error")) &&
(bytes.Contains(p, []byte("EOF")) || bytes.Contains(p, []byte("connection reset by peer"))) {
return len(p), nil