mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-19 16:53:32 +03:00
Compare commits
9 Commits
cluster
...
fix/reset-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
47176dbc37 | ||
|
|
43bac32761 | ||
|
|
1ea4884d8a | ||
|
|
83d387c6bb | ||
|
|
9ef8cc0036 | ||
|
|
c27ca50fd1 | ||
|
|
34e858888f | ||
|
|
7e72ecb00a | ||
|
|
3ad3ae7f16 |
@@ -56,7 +56,6 @@ var (
|
||||
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
|
||||
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication for details")
|
||||
deleteAuthKey = flagutil.NewPassword("deleteAuthKey", "authKey for metrics' deletion via /prometheus/api/v1/admin/tsdb/delete_series and /graphite/tags/delSeries. It could be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
resetCacheAuthKey = flagutil.NewPassword("search.resetCacheAuthKey", "Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call. It could be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
metricNamesStatsResetAuthKey = flagutil.NewPassword("metricNamesStatsResetAuthKey", "authKey for resetting metric names usage cache via /api/v1/admin/status/metric_names_stats/reset. It overrides -httpAuth.*. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#track-ingested-metrics-usage")
|
||||
|
||||
@@ -260,11 +259,10 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
|
||||
if path == "/internal/resetRollupResultCache" {
|
||||
if !httpserver.CheckAuthFlag(w, r, resetCacheAuthKey) {
|
||||
if !httpserver.CheckAuthFlag(w, r, prometheus.GetResetCacheAuthKey()) {
|
||||
return true
|
||||
}
|
||||
promql.ResetRollupResultCache()
|
||||
return true
|
||||
return prometheus.ResetRollupResultCacheHandler(w, r)
|
||||
}
|
||||
if path == "/admin/tenants" {
|
||||
tenantsRequests.Inc()
|
||||
|
||||
@@ -51,9 +51,10 @@ var (
|
||||
"If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored")
|
||||
maxStepForPointsAdjustment = flag.Duration("search.maxStepForPointsAdjustment", time.Minute, "The maximum step when /api/v1/query_range handler adjusts "+
|
||||
"points with timestamps closer than -search.latencyOffset to the current time. The adjustment is needed because such points may contain incomplete data")
|
||||
selectNodes = flagutil.NewArrayString("selectNode", "A list of vmselect node addresses to propagate the '/internal/resetRollupResultCache' call. "+
|
||||
"If this flag isn't set, then cache need to be purged from each vmselect individually. "+
|
||||
selectNodes = flagutil.NewArrayString("selectNode", "A list of vmselect node addresses to propagate the '/internal/resetRollupResultCache' call with 'propagate=1' argument. "+
|
||||
"If this flag or the 'propagate' argument isn't set, then cache need to be purged from each vmselect individually. "+
|
||||
"Comma-separated addresses of vmselect nodes; usage: -selectNode=vmselect-host1,...,vmselect-hostN")
|
||||
resetCacheAuthKey = flagutil.NewPassword("search.resetCacheAuthKey", "Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call. It could be passed via authKey query arg. It overrides -httpAuth.*. It'll be used when reset request is propagate to other -selectNode.")
|
||||
|
||||
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. "+
|
||||
"The limit can't exceed the explicitly set corresponding value `-search.maxUniqueTimeseries` on vmstorage side.")
|
||||
@@ -562,7 +563,7 @@ func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error {
|
||||
// Reset rollup result cache on all the vmselect nodes,
|
||||
// since the cache may contain deleted data.
|
||||
// TODO: reset only cache for (account, project)
|
||||
resetRollupResultCaches()
|
||||
resetRollupResultCachesAndPropagate()
|
||||
}
|
||||
logger.Infof("/api/v1/admin/tsdb/delete_series has been called for %q. Deleted %d series.", sq.FiltersString(), deletedCount)
|
||||
return nil
|
||||
@@ -570,33 +571,69 @@ func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error {
|
||||
|
||||
var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/admin/tsdb/delete_series"}`)
|
||||
|
||||
// ResetRollupResultCacheHandler handle request for `/internal/resetRollupResultCache` API.
|
||||
// It propagates the request if `propagate` argument is set.
|
||||
func ResetRollupResultCacheHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
// check if this is a propagated request from another vmselect, by propagate argument.
|
||||
// - if yes: simply execute and return.
|
||||
propagate := httputil.GetBool(r, "propagate")
|
||||
if !propagate {
|
||||
resetRollupResultCaches()
|
||||
return true
|
||||
}
|
||||
// - if no: it's manual request and need to propagate to other vmselect(s).
|
||||
resetRollupResultCachesAndPropagate()
|
||||
return true
|
||||
}
|
||||
|
||||
// GetResetCacheAuthKey returns resetCacheAuthKey value in *Password.
|
||||
func GetResetCacheAuthKey() *flagutil.Password {
|
||||
return resetCacheAuthKey
|
||||
}
|
||||
|
||||
func resetRollupResultCaches() {
|
||||
resetRollupResultCacheCalls.Inc()
|
||||
// Reset local cache before checking whether selectNodes list is empty.
|
||||
// This guarantees that at least local cache is reset if selectNodes list is empty.
|
||||
promql.ResetRollupResultCache()
|
||||
}
|
||||
|
||||
func resetRollupResultCachesAndPropagate() {
|
||||
resetRollupResultCaches()
|
||||
if len(*selectNodes) == 0 {
|
||||
logger.Warnf("missing -selectNode flag, cache reset request wont be propagated to the other vmselect nodes." +
|
||||
"This can be fixed by enumerating all the vmselect node addresses in `-selectNode` command line flag. " +
|
||||
" For example: -selectNode=select-addr-1:8481,select-addr-2:8481")
|
||||
return
|
||||
}
|
||||
rcAuthKey := GetResetCacheAuthKey().Get()
|
||||
for _, selectNode := range *selectNodes {
|
||||
normalizedAddr, err := netutil.NormalizeAddr(selectNode, 8481)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot normalize -selectNode=%q: %s", selectNode, err)
|
||||
}
|
||||
selectNode = normalizedAddr
|
||||
callURL := fmt.Sprintf("http://%s/internal/resetRollupResultCache", selectNode)
|
||||
resp, err := httpClient.Get(callURL)
|
||||
req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://%s/internal/resetRollupResultCache", selectNode), nil)
|
||||
if err != nil {
|
||||
logger.Errorf("error when accessing %q: %s", callURL, err)
|
||||
logger.Errorf("cannot create cache reset request for %q: %s", selectNode, err)
|
||||
resetRollupResultCacheErrors.Inc()
|
||||
continue
|
||||
}
|
||||
// usually `-search.resetCacheAuthKey` is set to the same on each vmselect. it's good to propagate with this argument.
|
||||
if rcAuthKey != "" {
|
||||
q := req.URL.Query()
|
||||
q.Add("authKey", rcAuthKey)
|
||||
req.URL.RawQuery = q.Encode()
|
||||
}
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
logger.Errorf("error when accessing %q: %s", req.URL.String(), err)
|
||||
resetRollupResultCacheErrors.Inc()
|
||||
continue
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_ = resp.Body.Close()
|
||||
logger.Errorf("unexpected status code at %q; got %d; want %d", callURL, resp.StatusCode, http.StatusOK)
|
||||
logger.Errorf("unexpected status code at %q; got %d; want %d", req.URL.String(), resp.StatusCode, http.StatusOK)
|
||||
resetRollupResultCacheErrors.Inc()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): propagate cache reset operation to `selectNode` when `/internal/resetRollupResultCache` is called. Previously, the propagation only happened when the `delete_series` API was called. See [#11112](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11112).
|
||||
|
||||
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)
|
||||
|
||||
|
||||
@@ -622,11 +622,13 @@ curl -Is http://localhost:8428/internal/resetRollupResultCache
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache
|
||||
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache?propagate=1
|
||||
```
|
||||
|
||||
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag. If this
|
||||
flag isn't set, then cache need to be purged from each vmselect individually.
|
||||
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag when `propagate=1` argument is set.
|
||||
If this flag or the `propagate` argument isn't set, then cache need to be purged from each vmselect individually.
|
||||
|
||||
If `-search.resetCacheAuthKey` is set, it will be attached to the propagation request as query argument.
|
||||
|
||||
### TCP and UDP
|
||||
|
||||
|
||||
Reference in New Issue
Block a user