Compare commits

...

1 Commits

Author SHA1 Message Date
Artem Fetishev
c4e58c6644 ib/storage: initial implementation of disabling global index
Existing tests pass, all except TestUnitTest in app/vmalert-tool/unittest/unittest_test.go.
Some changes to existing unit and apptests tests were necessary (such as explicitly enabling global index).
Tests that verify the behavior with disabled global index will be added in subsequent commits.

Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-06-30 19:17:53 +02:00
5 changed files with 51 additions and 41 deletions

View File

@@ -83,9 +83,13 @@ var (
cacheSizeIndexDBTagFilters = flagutil.NewBytes("storage.cacheSizeIndexDBTagFilters", 0, "Overrides max size for indexdb/tagFiltersToMetricIDs cache. "+
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
enableGlobalIndex = flag.Bool("enableGlobalIndex", false, "Enable global index. "+
"Deployments with high churn rate should have this index disabled as this decreases disk space usage. "+
"Such deployments may enable global index if the dominant query time range is > 1m as it may slightly improve query performance. "+
"Also see -disablePerDayIndex and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#index-tuning")
disablePerDayIndex = flag.Bool("disablePerDayIndex", false, "Disable per-day index and use global index for all searches. "+
"This may improve performance and decrease disk space usage for the use cases with fixed set of timeseries scattered across a "+
"big time range (for example, when loading years of historical data). "+
"This may improve performance and decrease disk space usage for deployment with no/low churn rate. "+
"Disabling per-day index forces enabling global index and the -enableGlobalIndex flag value is ignored."+
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#index-tuning")
trackMetricNamesStats = flag.Bool("storage.trackMetricNamesStats", true, "Whether to track ingest and query requests for timeseries metric names. "+
"This feature allows to track metric names unused at query requests. "+
@@ -137,6 +141,12 @@ func Init(vmselectMaxConcurrentRequests int, resetCacheIfNeeded func(mrs []stora
if *idbPrefillStart > 23*time.Hour {
logger.Panicf("-storage.idbPrefillStart cannot exceed 23 hours; got %s", idbPrefillStart)
}
disableGlobalIndex := !*enableGlobalIndex
if *disablePerDayIndex {
// In case if per-day index has been disabled, forcibly enable global
// index even if -enableGlobalIndex flag is false.
disableGlobalIndex = false
}
fs.RegisterPathFsMetrics(*storageDataPath)
logger.Infof("opening storage at %q with -retentionPeriod=%s", *storageDataPath, retentionPeriod)
startTime := time.Now()
@@ -146,6 +156,7 @@ func Init(vmselectMaxConcurrentRequests int, resetCacheIfNeeded func(mrs []stora
DenyQueriesOutsideRetention: *denyQueriesOutsideRetention,
MaxHourlySeries: getMaxHourlySeries(),
MaxDailySeries: getMaxDailySeries(),
DisableGlobalIndex: disableGlobalIndex,
DisablePerDayIndex: *disablePerDayIndex,
TrackMetricNamesStats: *trackMetricNamesStats,
IDBPrefillStart: *idbPrefillStart,

View File

@@ -16,6 +16,7 @@ func TestSingleSearchWithDisabledPerDayIndex(t *testing.T) {
"-storageDataPath=" + tc.Dir() + "/vmsingle",
"-retentionPeriod=100y",
"-search.maxStalenessInterval=1m",
"-enableGlobalIndex=true",
fmt.Sprintf("-disablePerDayIndex=%t", disablePerDayIndex),
})
})
@@ -34,6 +35,7 @@ func TestClusterSearchWithDisabledPerDayIndex(t *testing.T) {
"-httpListenAddr=127.0.0.1:61001",
"-vminsertAddr=127.0.0.1:61002",
"-vmselectAddr=127.0.0.1:61003",
"-enableGlobalIndex=true",
fmt.Sprintf("-disablePerDayIndex=%t", disablePerDayIndex),
})
vmstorage2 := tc.MustStartVmstorage("vmstorage2-"+name, []string{
@@ -42,6 +44,7 @@ func TestClusterSearchWithDisabledPerDayIndex(t *testing.T) {
"-httpListenAddr=127.0.0.1:62001",
"-vminsertAddr=127.0.0.1:62002",
"-vmselectAddr=127.0.0.1:62003",
"-enableGlobalIndex=true",
fmt.Sprintf("-disablePerDayIndex=%t", disablePerDayIndex),
})
vminsert := tc.MustStartVminsert("vminsert-"+name, []string{

View File

@@ -436,18 +436,6 @@ func (db *indexDB) createGlobalIndexes(tsid *TSID, mn *MetricName) {
ii := getIndexItems()
defer putIndexItems(ii)
if db.s.disablePerDayIndex {
// Create metricName -> TSID entry.
// This index is used for searching a TSID by metric name during data
// ingestion or metric name registration when -disablePerDayIndex flag
// is set.
ii.B = marshalCommonPrefix(ii.B, nsPrefixMetricNameToTSID)
ii.B = mn.Marshal(ii.B)
ii.B = append(ii.B, kvSeparatorChar)
ii.B = tsid.Marshal(ii.B)
ii.Next()
}
// Create metricID -> metricName entry.
ii.B = marshalCommonPrefix(ii.B, nsPrefixMetricIDToMetricName)
ii.B = encoding.MarshalUint64(ii.B, tsid.MetricID)
@@ -460,11 +448,20 @@ func (db *indexDB) createGlobalIndexes(tsid *TSID, mn *MetricName) {
ii.B = tsid.Marshal(ii.B)
ii.Next()
// Create tag -> metricID entries for every tag in mn.
kb := kbPool.Get()
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
ii.registerTagIndexes(kb.B, mn, tsid.MetricID)
kbPool.Put(kb)
if !db.s.disableGlobalIndex {
// Create metricName -> TSID entry.
ii.B = marshalCommonPrefix(ii.B, nsPrefixMetricNameToTSID)
ii.B = mn.Marshal(ii.B)
ii.B = append(ii.B, kvSeparatorChar)
ii.B = tsid.Marshal(ii.B)
ii.Next()
// Create tag -> metricID entries for every tag in mn.
kb := kbPool.Get()
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
ii.registerTagIndexes(kb.B, mn, tsid.MetricID)
kbPool.Put(kb)
}
db.tb.AddItems(ii.Items)
}
@@ -759,6 +756,7 @@ func (db *indexDB) SearchLabelValues(qt *querytracer.Tracer, labelName string, t
func filterLabelValues(lvs map[string]struct{}, tf *tagFilter, key string) {
var b []byte
for lv := range lvs {
// TODO
b = marshalCommonPrefix(b[:0], nsPrefixTagToMetricIDs)
b = marshalTagValue(b, bytesutil.ToUnsafeBytes(key))
b = marshalTagValue(b, bytesutil.ToUnsafeBytes(lv))
@@ -1242,12 +1240,9 @@ func (db *indexDB) GetSeriesCount(deadline uint64) (uint64, error) {
func (is *indexSearch) getSeriesCount() (uint64, error) {
ts := &is.ts
kb := &is.kb
mp := &is.mp
loopsPaceLimiter := 0
var metricIDsLen uint64
// Extract the number of series from ((__name__=value): metricIDs) rows
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
kb.B = marshalTagValue(kb.B, nil)
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToTSID)
ts.Seek(kb.B)
for ts.NextItem() {
if loopsPaceLimiter&paceLimiterFastIterationsMask == 0 {
@@ -1260,19 +1255,10 @@ func (is *indexSearch) getSeriesCount() (uint64, error) {
if !bytes.HasPrefix(item, kb.B) {
break
}
tail := item[len(kb.B):]
n := bytes.IndexByte(tail, tagSeparatorChar)
if n < 0 {
return 0, fmt.Errorf("invalid tag->metricIDs line %q: cannot find tagSeparatorChar %d", item, tagSeparatorChar)
}
tail = tail[n+1:]
if err := mp.InitOnlyTail(item, tail); err != nil {
return 0, err
}
// Take into account deleted timeseries too.
// It is OK if series can be counted multiple times in rare cases -
// the returned number is an estimation.
metricIDsLen += uint64(mp.MetricIDsLen())
metricIDsLen++
}
if err := ts.Error(); err != nil {
return 0, fmt.Errorf("error when counting unique timeseries: %w", err)
@@ -1529,10 +1515,11 @@ func (db *indexDB) DeleteSeries(qt *querytracer.Tracer, tfss []*TagFilters, maxM
is := db.getIndexSearch(noDeadline)
defer db.putIndexSearch(is)
// Unconditionally search global index since a given day in per-day
// index may not contain the full set of metricIDs that correspond
// to the tfss.
metricIDs, err := is.searchMetricIDs(qt, tfss, globalIndexTimeRange, maxMetrics)
tr := globalIndexTimeRange
if db.s.disableGlobalIndex {
tr = db.tr
}
metricIDs, err := is.searchMetricIDs(qt, tfss, tr, maxMetrics)
if err != nil {
return nil, db.wrapError("delete series", err)
}
@@ -1979,6 +1966,7 @@ func (is *indexSearch) updateMetricIDsByMetricNameMatch(qt *querytracer.Tracer,
qt.Printf("sort %d metric ids", len(sortedMetricIDs))
kb := &is.kb
// TODO
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
tfs = removeCompositeTagFilters(tfs, kb.B)
@@ -2089,6 +2077,7 @@ func hasCompositeTagFilters(tfs []*tagFilter, prefix []byte) bool {
}
func matchTagFilters(mn *MetricName, tfs []*tagFilter, kb *bytesutil.ByteBuffer) (bool, error) {
// TODO
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
for i, tf := range tfs {
if bytes.Equal(tf.key, graphiteReverseTagKey) {
@@ -3248,6 +3237,7 @@ func (mp *tagToMetricIDsRowParser) GetMatchingSeriesCount(filter, negativeFilter
}
func mergeTagToMetricIDsRows(data []byte, items []mergeset.Item) ([]byte, []mergeset.Item) {
// TODO
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixTagToMetricIDs)
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixDateTagToMetricIDs)
return data, items

View File

@@ -2,10 +2,10 @@ package storage
import (
"bytes"
"math"
"path/filepath"
"strconv"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
@@ -79,7 +79,7 @@ func mustOpenLegacyIndexDB(path string, s *Storage) *legacyIndexDB {
tr := TimeRange{
MinTimestamp: 0,
MaxTimestamp: math.MaxInt64,
MaxTimestamp: time.Now().UnixMilli(),
}
idb := mustOpenIndexDB(id, tr, name, path, s, &s.isReadOnly, true)
legacyIDB := &legacyIndexDB{idb: idb}

View File

@@ -79,6 +79,7 @@ type Storage struct {
// compatibility with partition index.
legacyIndexDBs atomic.Pointer[legacyIndexDBs]
disableGlobalIndex bool
disablePerDayIndex bool
tb *table
@@ -167,6 +168,7 @@ type OpenOptions struct {
DenyQueriesOutsideRetention bool
MaxHourlySeries int
MaxDailySeries int
DisableGlobalIndex bool
DisablePerDayIndex bool
TrackMetricNamesStats bool
IDBPrefillStart time.Duration
@@ -262,6 +264,10 @@ func MustOpenStorage(path string, opts OpenOptions) *Storage {
fs.MustMkdirIfNotExist(metadataDir)
s.minTimestampForCompositeIndex = mustGetMinTimestampForCompositeIndex(metadataDir, isEmptyDB)
if opts.DisableGlobalIndex && opts.DisablePerDayIndex {
logger.Panicf("BUG: global and per-day indexes cannot be disabled at the same time")
}
s.disableGlobalIndex = opts.DisableGlobalIndex
s.disablePerDayIndex = opts.DisablePerDayIndex
// Load legacy indexDBs.
@@ -1733,7 +1739,7 @@ func (s *Storage) adjustTimeRange(searchTR, idbTR TimeRange) TimeRange {
// For legacy IndexDBs only, partition indexDBs can't span more than a
// month.
minDate, maxDate := tr.DateRange()
if maxDate-minDate > maxDaysForPerDaySearch {
if !s.disableGlobalIndex && maxDate-minDate > maxDaysForPerDaySearch {
return globalIndexTimeRange
}
@@ -1741,7 +1747,7 @@ func (s *Storage) adjustTimeRange(searchTR, idbTR TimeRange) TimeRange {
// the idb time range, then return globalIndexTimeRange to indicate that we
// want to search the global index since the entire index db needs to be
// searched anyway.
if tr == idbTR {
if !s.disableGlobalIndex && tr == idbTR {
return globalIndexTimeRange
}