Compare commits

..

1 Commits

Author SHA1 Message Date
Artem Fetishev
c4e58c6644 ib/storage: initial implementation of disabling global index
Existing tests pass, all except TestUnitTest in app/vmalert-tool/unittest/unittest_test.go.
Some changes to existing unit and apptests tests were necessary (such as explicitly enabling global index).
Tests that verify the behavior with disabled global index will be added in subsequent commits.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-06-30 19:17:53 +02:00
9 changed files with 56 additions and 85 deletions

View File

@@ -6,7 +6,6 @@ import (
"flag"
"fmt"
"io"
"math/rand/v2"
"net"
"net/http"
"net/textproto"
@@ -32,7 +31,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
)
var (
@@ -208,7 +206,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
invalidAuthTokenRequests.Inc()
slowdownUnauthorizedResponse(r)
if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
err = &httpserver.ErrorWithStatusCode{
@@ -892,20 +889,3 @@ func debugInfo(u *url.URL, r *http.Request) string {
fmt.Fprint(s, ")")
return s.String()
}
// slowdownUnauthorizedResponse adds a random delay in the [2..3] seconds range before returning an unauthorized response.
// This reduces the effectiveness of brute-force.
//
// Recommended by OWASP Top10:
// https://owasp.org/Top10/2025/A07_2025-Authentication_Failures
func slowdownUnauthorizedResponse(r *http.Request) {
d := 2*time.Second + time.Duration(rand.IntN(1000))*time.Millisecond
t := timerpool.Get(d)
select {
case <-t.C:
case <-r.Context().Done():
}
timerpool.Put(t)
}

View File

@@ -1687,10 +1687,6 @@ func assertInstantValues(tss []*timeseries) {
var memoryIntensiveQueries = metrics.NewCounter(`vm_memory_intensive_queries_total`)
var _ = metrics.NewGauge(`vm_max_memory_per_query`, func() float64 {
return float64(maxMemoryPerQuery.N)
})
func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc,
expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowExpr *metricsql.DurationExpr,
) ([]*timeseries, error) {

View File

@@ -83,9 +83,13 @@ var (
cacheSizeIndexDBTagFilters = flagutil.NewBytes("storage.cacheSizeIndexDBTagFilters", 0, "Overrides max size for indexdb/tagFiltersToMetricIDs cache. "+
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
enableGlobalIndex = flag.Bool("enableGlobalIndex", false, "Enable global index. "+
"Deployments with high churn rate should have this index disabled as this decreases disk space usage. "+
"Such deployments may enable global index if the dominant query time range is > 1m as it may slightly improve query performance. "+
"Also see -disablePerDayIndex and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#index-tuning")
disablePerDayIndex = flag.Bool("disablePerDayIndex", false, "Disable per-day index and use global index for all searches. "+
"This may improve performance and decrease disk space usage for the use cases with fixed set of timeseries scattered across a "+
"big time range (for example, when loading years of historical data). "+
"This may improve performance and decrease disk space usage for deployment with no/low churn rate. "+
"Disabling per-day index forces enabling global index and the -enableGlobalIndex flag value is ignored."+
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#index-tuning")
trackMetricNamesStats = flag.Bool("storage.trackMetricNamesStats", true, "Whether to track ingest and query requests for timeseries metric names. "+
"This feature allows to track metric names unused at query requests. "+
@@ -137,6 +141,12 @@ func Init(vmselectMaxConcurrentRequests int, resetCacheIfNeeded func(mrs []stora
if *idbPrefillStart > 23*time.Hour {
logger.Panicf("-storage.idbPrefillStart cannot exceed 23 hours; got %s", idbPrefillStart)
}
disableGlobalIndex := !*enableGlobalIndex
if *disablePerDayIndex {
// In case if per-day index has been disabled, forcibly enable global
// index even if -enableGlobalIndex flag is false.
disableGlobalIndex = false
}
fs.RegisterPathFsMetrics(*storageDataPath)
logger.Infof("opening storage at %q with -retentionPeriod=%s", *storageDataPath, retentionPeriod)
startTime := time.Now()
@@ -146,6 +156,7 @@ func Init(vmselectMaxConcurrentRequests int, resetCacheIfNeeded func(mrs []stora
DenyQueriesOutsideRetention: *denyQueriesOutsideRetention,
MaxHourlySeries: getMaxHourlySeries(),
MaxDailySeries: getMaxDailySeries(),
DisableGlobalIndex: disableGlobalIndex,
DisablePerDayIndex: *disablePerDayIndex,
TrackMetricNamesStats: *trackMetricNamesStats,
IDBPrefillStart: *idbPrefillStart,

View File

@@ -16,6 +16,7 @@ func TestSingleSearchWithDisabledPerDayIndex(t *testing.T) {
"-storageDataPath=" + tc.Dir() + "/vmsingle",
"-retentionPeriod=100y",
"-search.maxStalenessInterval=1m",
"-enableGlobalIndex=true",
fmt.Sprintf("-disablePerDayIndex=%t", disablePerDayIndex),
})
})
@@ -34,6 +35,7 @@ func TestClusterSearchWithDisabledPerDayIndex(t *testing.T) {
"-httpListenAddr=127.0.0.1:61001",
"-vminsertAddr=127.0.0.1:61002",
"-vmselectAddr=127.0.0.1:61003",
"-enableGlobalIndex=true",
fmt.Sprintf("-disablePerDayIndex=%t", disablePerDayIndex),
})
vmstorage2 := tc.MustStartVmstorage("vmstorage2-"+name, []string{
@@ -42,6 +44,7 @@ func TestClusterSearchWithDisabledPerDayIndex(t *testing.T) {
"-httpListenAddr=127.0.0.1:62001",
"-vminsertAddr=127.0.0.1:62002",
"-vmselectAddr=127.0.0.1:62003",
"-enableGlobalIndex=true",
fmt.Sprintf("-disablePerDayIndex=%t", disablePerDayIndex),
})
vminsert := tc.MustStartVminsert("vminsert-"+name, []string{

View File

@@ -223,16 +223,4 @@ groups:
Unexpected TSID misses for \"{{ $labels.job }}\" ({{ $labels.instance }}) for the last 15 minutes.
If this happens after unclean shutdown of VictoriaMetrics process (via \"kill -9\", OOM or power off),
then this is OK - the alert must go away in a few minutes after the restart.
Otherwise this may point to the corruption of index data.
- alert: VMSelectConcurrentQueriesExceedMemoryLimit
expr: (vm_max_memory_per_query * on(job, instance) vm_concurrent_select_capacity) > on(job, instance) vm_available_memory_bytes
for: 5m
labels:
severity: warning
annotations:
summary: "vmselect ({{ $labels.instance }}) concurrent query memory may exceed pod limit"
description: "Current concurrent queries ({{ $value | humanize1024 }} combined max memory) exceed
the available memory on instance {{ $labels.instance }}.
This may result in OOM kills. Consider reducing -maxConcurrentRequests,
lowering -maxMemoryPerQuery, or scaling up pod memory limits."
Otherwise this may point to the corruption of index data.

View File

@@ -28,18 +28,15 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
* SECURITY: upgrade base docker image (Alpine) from 3.23.4 to 3.24.1. See [Alpine 3.24.1 release notes](https://www.alpinelinux.org/posts/Alpine-3.24.1-released.html).
* FEATURE: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): expose `vm_max_memory_per_query` metric reflecting the `-search.maxMemoryPerQuery` limit. Create `VMSelectConcurrentQueriesExceedMemoryLimit` alert to warn when OOMs are possible due to misconfiguration of `-search.maxMemoryPerQuery` and max concurrent queries.
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): add `default_vm_access_claim` field into `jwt` section of auth config. It could be used at [JWT claim placeholders](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating), if `JWT` token doesn't have `vm_access` claim. See [#11054](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11054).
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): reduces CPU usage by 10% at [sharding among remote storages](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages). See [#11113](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11113). Thanks to @bennf for contribution.
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `optimize_repeated_binary_op_subexprs=1` query arg to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query) for executing binary operator sides sequentially when they share the same optimized aggregate rollup result expression. This allows the second side to reuse rollup result cache populated by the first side. See [#10575](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10575).
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): prevent possible password brute-force attacks with an artificial 2-3 second delay as recommended by [OWASP](https://owasp.org/Top10/2025/A07_2025-Authentication_Failures). See [#11180](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11180).
* BUGFIX: all VictoriaMetrics components: cancel in-flight HTTP requests shortly before `-http.maxGracefulShutdownDuration` elapses during graceful shutdown, so they can drain and the shutdown completes cleanly within that window instead of timing out and exiting via `logger.Fatalf` -> `os.Exit`. This prevents skipping the storage flush and losing in-memory data when long-lived requests are in flight (such as VictoriaLogs live tailing). See [#1502](https://github.com/VictoriaMetrics/VictoriaLogs/issues/1502).
* BUGFIX: `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fixes unexpected rare rerouting. See [#11162](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11162).
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): propagate cache reset operation to `selectNode` when `/internal/resetRollupResultCache` is called. Previously, the propagation only happened when the `delete_series` API was called. See [#11112](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11112).
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix possible unexpected increases in `rate_avg` and `rate_sum` if an out-of-order sample is ingested after the previous flush. See [#11140](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11140).
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): add `default_vm_access_claim` field into `jwt` section of auth config. It could be used at [JWT claim placeholders](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating), if `JWT` token doesn't have `vm_access` claim. See [#11054](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11054).
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): reduces CPU usage by 10% at [sharding among remote storages](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages). See [#11113](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11113). Thanks to @bennf for contribution.
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `optimize_repeated_binary_op_subexprs=1` query arg to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query) for executing binary operator sides sequentially when they share the same optimized aggregate rollup result expression. This allows the second side to reuse rollup result cache populated by the first side. See [#10575](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10575).
## [v1.146.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.146.0)
Released at 2026-06-22

View File

@@ -436,18 +436,6 @@ func (db *indexDB) createGlobalIndexes(tsid *TSID, mn *MetricName) {
ii := getIndexItems()
defer putIndexItems(ii)
if db.s.disablePerDayIndex {
// Create metricName -> TSID entry.
// This index is used for searching a TSID by metric name during data
// ingestion or metric name registration when -disablePerDayIndex flag
// is set.
ii.B = marshalCommonPrefix(ii.B, nsPrefixMetricNameToTSID)
ii.B = mn.Marshal(ii.B)
ii.B = append(ii.B, kvSeparatorChar)
ii.B = tsid.Marshal(ii.B)
ii.Next()
}
// Create metricID -> metricName entry.
ii.B = marshalCommonPrefix(ii.B, nsPrefixMetricIDToMetricName)
ii.B = encoding.MarshalUint64(ii.B, tsid.MetricID)
@@ -460,11 +448,20 @@ func (db *indexDB) createGlobalIndexes(tsid *TSID, mn *MetricName) {
ii.B = tsid.Marshal(ii.B)
ii.Next()
// Create tag -> metricID entries for every tag in mn.
kb := kbPool.Get()
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
ii.registerTagIndexes(kb.B, mn, tsid.MetricID)
kbPool.Put(kb)
if !db.s.disableGlobalIndex {
// Create metricName -> TSID entry.
ii.B = marshalCommonPrefix(ii.B, nsPrefixMetricNameToTSID)
ii.B = mn.Marshal(ii.B)
ii.B = append(ii.B, kvSeparatorChar)
ii.B = tsid.Marshal(ii.B)
ii.Next()
// Create tag -> metricID entries for every tag in mn.
kb := kbPool.Get()
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
ii.registerTagIndexes(kb.B, mn, tsid.MetricID)
kbPool.Put(kb)
}
db.tb.AddItems(ii.Items)
}
@@ -759,6 +756,7 @@ func (db *indexDB) SearchLabelValues(qt *querytracer.Tracer, labelName string, t
func filterLabelValues(lvs map[string]struct{}, tf *tagFilter, key string) {
var b []byte
for lv := range lvs {
// TODO
b = marshalCommonPrefix(b[:0], nsPrefixTagToMetricIDs)
b = marshalTagValue(b, bytesutil.ToUnsafeBytes(key))
b = marshalTagValue(b, bytesutil.ToUnsafeBytes(lv))
@@ -1242,12 +1240,9 @@ func (db *indexDB) GetSeriesCount(deadline uint64) (uint64, error) {
func (is *indexSearch) getSeriesCount() (uint64, error) {
ts := &is.ts
kb := &is.kb
mp := &is.mp
loopsPaceLimiter := 0
var metricIDsLen uint64
// Extract the number of series from ((__name__=value): metricIDs) rows
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
kb.B = marshalTagValue(kb.B, nil)
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToTSID)
ts.Seek(kb.B)
for ts.NextItem() {
if loopsPaceLimiter&paceLimiterFastIterationsMask == 0 {
@@ -1260,19 +1255,10 @@ func (is *indexSearch) getSeriesCount() (uint64, error) {
if !bytes.HasPrefix(item, kb.B) {
break
}
tail := item[len(kb.B):]
n := bytes.IndexByte(tail, tagSeparatorChar)
if n < 0 {
return 0, fmt.Errorf("invalid tag->metricIDs line %q: cannot find tagSeparatorChar %d", item, tagSeparatorChar)
}
tail = tail[n+1:]
if err := mp.InitOnlyTail(item, tail); err != nil {
return 0, err
}
// Take into account deleted timeseries too.
// It is OK if series can be counted multiple times in rare cases -
// the returned number is an estimation.
metricIDsLen += uint64(mp.MetricIDsLen())
metricIDsLen++
}
if err := ts.Error(); err != nil {
return 0, fmt.Errorf("error when counting unique timeseries: %w", err)
@@ -1529,10 +1515,11 @@ func (db *indexDB) DeleteSeries(qt *querytracer.Tracer, tfss []*TagFilters, maxM
is := db.getIndexSearch(noDeadline)
defer db.putIndexSearch(is)
// Unconditionally search global index since a given day in per-day
// index may not contain the full set of metricIDs that correspond
// to the tfss.
metricIDs, err := is.searchMetricIDs(qt, tfss, globalIndexTimeRange, maxMetrics)
tr := globalIndexTimeRange
if db.s.disableGlobalIndex {
tr = db.tr
}
metricIDs, err := is.searchMetricIDs(qt, tfss, tr, maxMetrics)
if err != nil {
return nil, db.wrapError("delete series", err)
}
@@ -1979,6 +1966,7 @@ func (is *indexSearch) updateMetricIDsByMetricNameMatch(qt *querytracer.Tracer,
qt.Printf("sort %d metric ids", len(sortedMetricIDs))
kb := &is.kb
// TODO
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
tfs = removeCompositeTagFilters(tfs, kb.B)
@@ -2089,6 +2077,7 @@ func hasCompositeTagFilters(tfs []*tagFilter, prefix []byte) bool {
}
func matchTagFilters(mn *MetricName, tfs []*tagFilter, kb *bytesutil.ByteBuffer) (bool, error) {
// TODO
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
for i, tf := range tfs {
if bytes.Equal(tf.key, graphiteReverseTagKey) {
@@ -3248,6 +3237,7 @@ func (mp *tagToMetricIDsRowParser) GetMatchingSeriesCount(filter, negativeFilter
}
func mergeTagToMetricIDsRows(data []byte, items []mergeset.Item) ([]byte, []mergeset.Item) {
// TODO
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixTagToMetricIDs)
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixDateTagToMetricIDs)
return data, items

View File

@@ -2,10 +2,10 @@ package storage
import (
"bytes"
"math"
"path/filepath"
"strconv"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
@@ -79,7 +79,7 @@ func mustOpenLegacyIndexDB(path string, s *Storage) *legacyIndexDB {
tr := TimeRange{
MinTimestamp: 0,
MaxTimestamp: math.MaxInt64,
MaxTimestamp: time.Now().UnixMilli(),
}
idb := mustOpenIndexDB(id, tr, name, path, s, &s.isReadOnly, true)
legacyIDB := &legacyIndexDB{idb: idb}

View File

@@ -79,6 +79,7 @@ type Storage struct {
// compatibility with partition index.
legacyIndexDBs atomic.Pointer[legacyIndexDBs]
disableGlobalIndex bool
disablePerDayIndex bool
tb *table
@@ -167,6 +168,7 @@ type OpenOptions struct {
DenyQueriesOutsideRetention bool
MaxHourlySeries int
MaxDailySeries int
DisableGlobalIndex bool
DisablePerDayIndex bool
TrackMetricNamesStats bool
IDBPrefillStart time.Duration
@@ -262,6 +264,10 @@ func MustOpenStorage(path string, opts OpenOptions) *Storage {
fs.MustMkdirIfNotExist(metadataDir)
s.minTimestampForCompositeIndex = mustGetMinTimestampForCompositeIndex(metadataDir, isEmptyDB)
if opts.DisableGlobalIndex && opts.DisablePerDayIndex {
logger.Panicf("BUG: global and per-day indexes cannot be disabled at the same time")
}
s.disableGlobalIndex = opts.DisableGlobalIndex
s.disablePerDayIndex = opts.DisablePerDayIndex
// Load legacy indexDBs.
@@ -1733,7 +1739,7 @@ func (s *Storage) adjustTimeRange(searchTR, idbTR TimeRange) TimeRange {
// For legacy IndexDBs only, partition indexDBs can't span more than a
// month.
minDate, maxDate := tr.DateRange()
if maxDate-minDate > maxDaysForPerDaySearch {
if !s.disableGlobalIndex && maxDate-minDate > maxDaysForPerDaySearch {
return globalIndexTimeRange
}
@@ -1741,7 +1747,7 @@ func (s *Storage) adjustTimeRange(searchTR, idbTR TimeRange) TimeRange {
// the idb time range, then return globalIndexTimeRange to indicate that we
// want to search the global index since the entire index db needs to be
// searched anyway.
if tr == idbTR {
if !s.disableGlobalIndex && tr == idbTR {
return globalIndexTimeRange
}