diff --git a/docs/victoriametrics/changelog/CHANGELOG.md b/docs/victoriametrics/changelog/CHANGELOG.md index c37d30aeb5..853fa590de 100644 --- a/docs/victoriametrics/changelog/CHANGELOG.md +++ b/docs/victoriametrics/changelog/CHANGELOG.md @@ -37,6 +37,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel * FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): calculate the lookbehind window as the median of the intervals between the last 20 raw samples within the requested time range for range queries. Previously, this calculation was based on the first 20 samples, using the last 20 samples should improve accuracy for recent data. See [#10281](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10281). * BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix configuration reloading for `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig` when vmagent is launched with empty files. Previously, if vmagent started with an empty config, subsequent config reloads were ignored. See [#10211](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10211). +* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent slow ingestion requests and CPU usage spikes during midnight daily-index creation. See [#10064](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10064). * BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix a missing path error for `http://:8428/zabbixconnector/api/v1/history`. See [10214](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10214). * BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): reduce default value for `storage.vminsertConnsShutdownDuration` flag from `25s` to `10s` seconds. It reduces probability of ungraceful storage shutdown at Kubernetes based environments, which has 30 seconds default graceful termination period value. See [#10273](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10273) * BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): remove legacy `tenantID` query param and use the URL path as the single source of truth for multitenancy. See [#10232](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10232). diff --git a/lib/storage/date_metric_id_cache.go b/lib/storage/date_metric_id_cache.go index 3a9980eef9..ec73171b47 100644 --- a/lib/storage/date_metric_id_cache.go +++ b/lib/storage/date_metric_id_cache.go @@ -220,7 +220,8 @@ func (dmc *dateMetricIDCache) syncLocked() { } func (dmc *dateMetricIDCache) startRotation() { - d := timeutil.AddJitterToDuration(10 * time.Minute) + // 1 hour was chosen based on https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10064#issuecomment-3749046726 + d := timeutil.AddJitterToDuration(time.Hour) ticker := time.NewTicker(d) defer ticker.Stop() for { diff --git a/lib/storage/storage_synctest_test.go b/lib/storage/storage_synctest_test.go index 0f014d86ff..9b2dbec843 100644 --- a/lib/storage/storage_synctest_test.go +++ b/lib/storage/storage_synctest_test.go @@ -463,6 +463,7 @@ func TestStorageAddRows_nextDayIndexPrefill(t *testing.T) { MaxTimestamp: time.Now().Add(+15 * time.Minute).UnixMilli(), }) s := MustOpenStorage(t.Name(), OpenOptions{}) + defer s.MustClose() s.AddRows(mrs0, defaultPrecisionBits) s.DebugFlush() if got, want := countMetricIDs(t, s, "metric0", today), numSeries; got != want { @@ -484,12 +485,6 @@ func TestStorageAddRows_nextDayIndexPrefill(t *testing.T) { t.Fatalf("unexpected metric id count for next day: got %d, want %d", got, want) } - // Close the storage and reopen it 15m later instead of keeping it open - // and waiting. This is to make the test faster. Storage has a lot of - // background tasks that are activated every 1-10 seconds and synctest's - // time.Sleep() will wake them up many times. Closing storage before - // sleeping seems to eliminate this. - // // At 23:15 the prefill must work. // // However, the mrs1 timestamps are not within the current hour and @@ -498,9 +493,7 @@ func TestStorageAddRows_nextDayIndexPrefill(t *testing.T) { // // The mrs2 timestamps are within the current hour so some next day index // entries will be created. - s.MustClose() time.Sleep(15 * time.Minute) // 2000-01-01T23:15:00Z - s = MustOpenStorage(t.Name(), OpenOptions{}) mrs1 := testGenerateMetricRowsWithPrefixForTenantID(rng, accountID, projectID, numSeries, "metric1", TimeRange{ MinTimestamp: time.Now().Add(-30 * time.Minute).UnixMilli(), MaxTimestamp: time.Now().Add(-15 * time.Minute).UnixMilli(), @@ -526,13 +519,7 @@ func TestStorageAddRows_nextDayIndexPrefill(t *testing.T) { t.Fatalf("unexpected metric id count for next day: got 0, want > 0") } - // Close the storage and reopen it at 23:30. - // - // Since we are now closer to midnight than we were at 23:15, more next - // day entries must be created. - s.MustClose() time.Sleep(15 * time.Minute) // 2000-01-01T23:30:00Z - s = MustOpenStorage(t.Name(), OpenOptions{}) mrs3 := testGenerateMetricRowsWithPrefixForTenantID(rng, accountID, projectID, numSeries, "metric3", TimeRange{ MinTimestamp: time.Now().Add(-15 * time.Minute).UnixMilli(), MaxTimestamp: time.Now().UnixMilli(), @@ -547,13 +534,7 @@ func TestStorageAddRows_nextDayIndexPrefill(t *testing.T) { t.Fatalf("unexpected metric id count for next day: got %d, want > %d", got30min, got15min) } - // Close the storage and reopen it at 23:45. - // - // Since we are now closer to midnight than we were at 23:30, more next - // day entries must be created. - s.MustClose() time.Sleep(15 * time.Minute) // 2000-01-01T23:45:00Z - s = MustOpenStorage(t.Name(), OpenOptions{}) mrs4 := testGenerateMetricRowsWithPrefixForTenantID(rng, accountID, projectID, numSeries, "metric4", TimeRange{ MinTimestamp: time.Now().Add(-15 * time.Minute).UnixMilli(), MaxTimestamp: time.Now().UnixMilli(), @@ -565,7 +546,33 @@ func TestStorageAddRows_nextDayIndexPrefill(t *testing.T) { t.Fatalf("unexpected metric id count for next day: got %d, want > %d", got45min, got30min) } - s.MustClose() + // Sleep until the next day + // do not close storage, it resets dataMetricID cache and it will result into slow inserts + // since dateMetricID cache is not persisted on-disk + + time.Sleep(35 * time.Minute) // 2000-01-02T00:20:00Z + synctest.Wait() + + // Ingest data for the next day, it must hit dateMetricID cache and + // do not result into significant amount of slow inserts. + var m Metrics + s.UpdateMetrics(&m) + currDaySlowInserts := m.SlowPerDayIndexInserts + mrs3NextDay := testGenerateMetricRowsWithPrefixForTenantID(rng, accountID, projectID, numSeries, "metric3", TimeRange{ + MinTimestamp: time.Now().Add(-5 * time.Minute).UnixMilli(), + MaxTimestamp: time.Now().UnixMilli(), + }) + + s.AddRows(mrs3NextDay, defaultPrecisionBits) + s.DebugFlush() + m.Reset() + s.UpdateMetrics(&m) + nextDaySlowInserts := m.SlowPerDayIndexInserts + slowInserts := nextDaySlowInserts - currDaySlowInserts + if slowInserts >= numSeries { + t.Errorf("unexpected amount of slow inserts: got %d, want < %d", slowInserts, numSeries) + } + }) }