lib/storage: make lrucache ttl configurable

Previously, the `lrucache` ttl was hardcoded to 3 minutes and the code comment said it was enough for most queries that are sent to vmstorage repeatedly. But it appears that it is not always the case. `indexDB` has this `tagFilters loop cache` that is used in index search optimizations (see `getMetricIDsForDateAndFilters()`). Until recently, this cache was implemented with `workingsetcache`. In [10154](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10154), this implementation was changed to `lrucache`. After this change, some users reported huge CPU utilization increase in vmstorage (see [10297](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10297)). `workingsetcache` evicts an entry after two rotations that happen every 30 minutes. I.e. the entry ttl is 1h since the last time it was retrieved from the cache. Hence the assumption that `lrucache` causes the CPU to raise because of its too aggressive eviction policy. Reverting back to `workingsetcache` helped but `lrucache` is still preferred because it occupies less memory. Since `lrucache` is preferred, its ttl needs to be increased. Instead changing the hardcoded value, the ttl was made configurable. The `tagFilters loops cache` was configured to have ttl of 1h to match the default behavior of `workingsetcache`. Although no one complained about this yet, the ttl was also increased for `tfssCache` because previously it was also implemented with `workingsetcache`. Finally, the `regexpCache` and `prefixesCache` were also configured to have 1h ttl because these caches are also during the data retrieval. Signed-off-by: Artem Fetishev <rtm@victoriametrics.com>
2026-05-17 08:36:55 +03:00 · 2026-02-09 12:01:24 +01:00
4 changed files with 22 additions and 12 deletions
--- a/lib/lrucache/lrucache.go
+++ b/lib/lrucache/lrucache.go
@@ -28,8 +28,12 @@ type Cache struct {
 // NewCache creates new cache.
 //
 // Cache size in bytes is limited by the value returned by getMaxSizeBytes() callback.
+//
+// Entries will be removed from the cache if not accessed longer than ttl
+// duration but not sooner than ~53 seconds (see cleaner() below).
+//
 // Call MustStop() in order to free up resources occupied by Cache.
-func NewCache(getMaxSizeBytes func() uint64) *Cache {
+func NewCache(getMaxSizeBytes func() uint64, ttl time.Duration) *Cache {
 	cpusCount := cgroup.AvailableCPUs()
 	shardsCount := cgroup.AvailableCPUs()
 	// Increase the number of shards with the increased number of available CPU cores.
@@ -45,7 +49,7 @@ func NewCache(getMaxSizeBytes func() uint64) *Cache {
 		return n / uint64(shardsCount)
 	}
 	for i := range shards {
-		shards[i] = newCache(getMaxShardBytes)
+		shards[i] = newCache(getMaxShardBytes, ttl)
 	}
 	c := &Cache{
 		shards:            shards,
@@ -175,6 +179,11 @@ type cache struct {
 	// getMaxSizeBytes() is a callback, which returns the maximum allowed cache size in bytes.
 	getMaxSizeBytes func() uint64

+	// ttl is the duration in seconds during which an entry is allowed to stay
+	// in the cache without being retrieved. Once this time passes the entry
+	// will be removed from the cache.
+	ttl uint64
+
 	// mu protects all the fields below.
 	mu sync.Mutex

@@ -210,9 +219,10 @@ type cacheEntry struct {
 	e Entry
 }

-func newCache(getMaxSizeBytes func() uint64) *cache {
+func newCache(getMaxSizeBytes func() uint64, ttl time.Duration) *cache {
 	var c cache
 	c.getMaxSizeBytes = getMaxSizeBytes
+	c.ttl = uint64(ttl.Seconds())
 	c.m = make(map[string]*cacheEntry)
 	return &c
 }
@@ -231,9 +241,7 @@ func (c *cache) updateSizeBytes(n uint64) {
 }

 func (c *cache) cleanByTimeout() {
-	// Delete items accessed more than three minutes ago.
-	// This time should be enough for repeated queries.
-	lastAccessTime := fasttime.UnixTimestamp() - 3*60
+	lastAccessTime := fasttime.UnixTimestamp() - c.ttl
 	c.mu.Lock()
 	defer c.mu.Unlock()

--- a/lib/lrucache/lrucache_test.go
+++ b/lib/lrucache/lrucache_test.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"sync"
 	"testing"
+	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
 )
@@ -18,7 +19,7 @@ func TestCache(t *testing.T) {
 	getMaxSize := func() uint64 {
 		return sizeMaxBytes
 	}
-	c := NewCache(getMaxSize)
+	c := NewCache(getMaxSize, 3*time.Minute)
 	defer c.MustStop()
 	if n := c.SizeBytes(); n != 0 {
 		t.Fatalf("unexpected SizeBytes(); got %d; want %d", n, 0)
@@ -123,7 +124,7 @@ func TestCacheConcurrentAccess(_ *testing.T) {
 	getMaxSize := func() uint64 {
 		return sizeMaxBytes
 	}
-	c := NewCache(getMaxSize)
+	c := NewCache(getMaxSize, 3*time.Minute)
 	defer c.MustStop()

 	workers := 5
--- a/lib/storage/index_db.go
+++ b/lib/storage/index_db.go
@@ -173,7 +173,7 @@ func mustOpenIndexDB(id uint64, tr TimeRange, name, path string, s *Storage, isR
 		logger.Panicf("BUG: Storage must not be nil")
 	}

-	tfssCache := lrucache.NewCache(getTagFiltersCacheSize)
+	tfssCache := lrucache.NewCache(getTagFiltersCacheSize, 1*time.Hour)
 	tb := mergeset.MustOpenTable(path, dataFlushInterval, tfssCache.Reset, mergeTagToMetricIDsRows, isReadOnly)
 	db := &indexDB{
 		legacyMinMissingTimestampByKey: make(map[string]int64),
@@ -183,7 +183,7 @@ func mustOpenIndexDB(id uint64, tr TimeRange, name, path string, s *Storage, isR
 		tb:                             tb,
 		s:                              s,
 		tagFiltersToMetricIDsCache:     tfssCache,
-		loopsPerDateTagFilterCache:     lrucache.NewCache(getTagFiltersLoopsCacheSize),
+		loopsPerDateTagFilterCache:     lrucache.NewCache(getTagFiltersLoopsCacheSize, 1*time.Hour),
 		metricIDCache:                  newMetricIDCache(),
 		dateMetricIDCache:              newDateMetricIDCache(),
 	}
--- a/lib/storage/tag_filters.go
+++ b/lib/storage/tag_filters.go
@@ -8,6 +8,7 @@ import (
 	"sort"
 	"strings"
 	"sync"
+	"time"
 	"unsafe"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/atomicutil"
@@ -866,7 +867,7 @@ var (
 )

 var (
-	regexpCache = lrucache.NewCache(getMaxRegexpCacheSize)
+	regexpCache = lrucache.NewCache(getMaxRegexpCacheSize, 1*time.Hour)
 )

 type regexpCacheValue struct {
@@ -921,7 +922,7 @@ var (
 )

 var (
-	prefixesCache = lrucache.NewCache(getMaxPrefixesCacheSize)
+	prefixesCache = lrucache.NewCache(getMaxPrefixesCacheSize, 1*time.Hour)
 )

 // RegexpPrefixesCacheSize returns the number of cached regexp prefixes for tag filters.