lib/storage: prefer numerical values over stale markers when samples share the same timestamp during deduplication

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196

Prefer the non StaleNaN  value when both StaleNaN and non-StaleNaN samples share the timestamp during deduplication(downsampling). The scenario can occur when:
1. Multiple vmagent instances scrape the same target(without -promscrape.cluster.name flag), one instance fails to scrape due to issues such as network, while others succeed.
2. Multiple vmalert instances evaluate the same recording rule, with one instance receiving a partial response while others receive a complete response.

In both cases, since the samples share the same timestamp and represent the metric state at that moment,
the non-StaleNaN value is entirely valid, whereas the StaleNaN could be caused by other unknown issues.
Therefore, it is reasonable to prioritize the non-StaleNaN value.
This commit is contained in:
Haley Wang
2026-01-12 21:10:21 +08:00
committed by Max Kotliar
parent 8d18bc288f
commit 7bd5d19f62
5 changed files with 50 additions and 47 deletions

View File

@@ -193,9 +193,10 @@ func testDeduplication(tc *apptest.TestCase, sut apptest.PrometheusWriteQuerier,
}},
{Metric: map[string]string{"__name__": "metric4"}, Samples: []*apptest.Sample{
// If multiple raw samples have the same timestamp on the
// given -dedup.minScrapeInterval discrete interval, then
// stale markers are preferred over any other value.
{Timestamp: ts10, Value: decimal.StaleNaN},
// given -dedup.minScrapeInterval discrete interval,
// always prefer a non-decimal.StaleNaN value,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
{Timestamp: ts10, Value: 50},
}},
},
},

View File

@@ -1345,7 +1345,7 @@ This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/
If multiple raw samples have **the same timestamp** on the given `-dedup.minScrapeInterval` discrete interval,
then the sample with **the biggest value** is kept.
[Stale markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) are preferred over any other value.
Numerical values are preferred over [stale markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers).
[Prometheus staleness markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) are processed as any other value during de-duplication.
If raw sample with the biggest timestamp on `-dedup.minScrapeInterval` contains a stale marker, then it is kept after the deduplication.

View File

@@ -43,6 +43,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): reduce default value for `storage.vminsertConnsShutdownDuration` flag from `25s` to `10s` seconds. It reduces probability of ungraceful storage shutdown at Kubernetes based environments, which has 30 seconds default graceful termination period value. See [#10273](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10273)
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): remove legacy `tenantID` query param and use the URL path as the single source of truth for multitenancy. See [#10232](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10232).
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix heatmap rendering issues where charts could break or appear empty when bucket values were uniform or sparse. See [#10240](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10240).
* BUGFIX: all VictoriaMetrics components: prefer numerical values over [stale markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) when samples share the same timestamp during deduplication. See [#10196](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196#issuecomment-3738433938).
## [v1.133.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.133.0)

View File

@@ -47,13 +47,16 @@ func DeduplicateSamples(srcTimestamps []int64, srcValues []float64, dedupInterva
j := i
tsPrev := srcTimestamps[j]
vPrev := srcValues[j]
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
for j > 0 && srcTimestamps[j-1] == tsPrev {
j--
if decimal.IsStaleNaN(srcValues[j]) {
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
continue
}
if decimal.IsStaleNaN(vPrev) {
vPrev = srcValues[j]
break
continue
}
if srcValues[j] > vPrev {
vPrev = srcValues[j]
@@ -70,14 +73,16 @@ func DeduplicateSamples(srcTimestamps []int64, srcValues []float64, dedupInterva
j := len(srcTimestamps) - 1
tsPrev := srcTimestamps[j]
vPrev := srcValues[j]
// Invariant: vPrev > srcValues[j]
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
for j > 0 && srcTimestamps[j-1] == tsPrev {
j--
if decimal.IsStaleNaN(srcValues[j]) {
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
continue
}
if decimal.IsStaleNaN(vPrev) {
vPrev = srcValues[j]
break
continue
}
if srcValues[j] > vPrev {
vPrev = srcValues[j]
@@ -106,13 +111,16 @@ func deduplicateSamplesDuringMerge(srcTimestamps, srcValues []int64, dedupInterv
j := i
tsPrev := srcTimestamps[j]
vPrev := srcValues[j]
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
for j > 0 && srcTimestamps[j-1] == tsPrev {
j--
if decimal.IsStaleNaNInt64(srcValues[j]) {
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
continue
}
if decimal.IsStaleNaNInt64(vPrev) {
vPrev = srcValues[j]
break
continue
}
if srcValues[j] > vPrev {
vPrev = srcValues[j]
@@ -129,19 +137,16 @@ func deduplicateSamplesDuringMerge(srcTimestamps, srcValues []int64, dedupInterv
j := len(srcTimestamps) - 1
tsPrev := srcTimestamps[j]
vPrev := srcValues[j]
if decimal.IsStaleNaNInt64(vPrev) {
// fast path - decimal.StaleNaN is always preferred to other values on interval
dstTimestamps = append(dstTimestamps, tsPrev)
dstValues = append(dstValues, vPrev)
return dstTimestamps, dstValues
}
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
for j > 0 && srcTimestamps[j-1] == tsPrev {
j--
if decimal.IsStaleNaNInt64(srcValues[j]) {
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
continue
}
if decimal.IsStaleNaNInt64(vPrev) {
vPrev = srcValues[j]
break
continue
}
if srcValues[j] > vPrev {
vPrev = srcValues[j]

View File

@@ -81,19 +81,17 @@ func TestDeduplicateSamplesWithIdenticalTimestamps(t *testing.T) {
f(time.Second, []int64{1001, 1001}, []float64{2, 1}, []int64{1001}, []float64{2})
f(time.Second, []int64{1000, 1001, 1001, 1001, 2001}, []float64{1, 2, 5, 3, 0}, []int64{1000, 1001, 2001}, []float64{1, 5, 0})
// position of decimal.StaleNaN shouldn't matter during deduplication
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
f(time.Second, []int64{1000, 1000}, []float64{2, decimal.StaleNaN}, []int64{1000}, []float64{decimal.StaleNaN})
f(time.Second, []int64{1000, 1000}, []float64{decimal.StaleNaN, 2}, []int64{1000}, []float64{decimal.StaleNaN})
f(time.Second, []int64{1000, 1000, 1000}, []float64{1, decimal.StaleNaN, 2}, []int64{1000}, []float64{decimal.StaleNaN})
// verify decimal.StaleNaN is NOT preferred on timestamp conflicts
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
f(time.Second, []int64{1000, 1000}, []float64{2, decimal.StaleNaN}, []int64{1000}, []float64{2})
f(time.Second, []int64{1000, 1000}, []float64{decimal.StaleNaN, 2}, []int64{1000}, []float64{2})
f(time.Second, []int64{1000, 1000, 1000}, []float64{1, decimal.StaleNaN, 2}, []int64{1000}, []float64{2})
// compare with Inf values
f(time.Second, []int64{1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN}, []int64{1000}, []float64{decimal.StaleNaN})
f(time.Second, []int64{1000, 1000}, []float64{decimal.StaleNaN, math.Inf(1)}, []int64{1000}, []float64{decimal.StaleNaN})
f(time.Second, []int64{1000, 1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN, math.Inf(-1)}, []int64{1000}, []float64{decimal.StaleNaN})
// verify decimal.StaleNaN is preferred only on timestamp conflicts
f(time.Second, []int64{1000, 1000, 2000}, []float64{1, decimal.StaleNaN, 2}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 2})
f(time.Second, []int64{1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 2, 3}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 3})
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 6, 2, 3}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 3})
f(time.Second, []int64{1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN}, []int64{1000}, []float64{math.Inf(1)})
f(time.Second, []int64{1000, 1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN, math.Inf(-1)}, []int64{1000}, []float64{math.Inf(1)})
f(time.Second, []int64{1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 2, 3}, []int64{1000, 2000}, []float64{1, 3})
f(time.Second, []int64{1000, 1000, 2000, 2000}, []float64{decimal.StaleNaN, decimal.StaleNaN, 2, 3}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 3})
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 6, 2, 3}, []int64{1000, 2000}, []float64{6, 3})
}
func TestDeduplicateSamplesDuringMergeWithIdenticalTimestamps(t *testing.T) {
@@ -124,20 +122,18 @@ func TestDeduplicateSamplesDuringMergeWithIdenticalTimestamps(t *testing.T) {
f(time.Second, []int64{1001, 1001}, []int64{2, 1}, []int64{1001}, []int64{2})
f(time.Second, []int64{1000, 1001, 1001, 1001, 2001}, []int64{1, 2, 5, 3, 0}, []int64{1000, 1001, 2001}, []int64{1, 5, 0})
// verify decimal.StaleNaN is NOT preferred on timestamp conflicts
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
staleNaN, _ := decimal.FromFloat(decimal.StaleNaN)
// position of decimal.StaleNaN shouldn't matter during deduplication
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
f(time.Second, []int64{1000, 1000}, []int64{2, staleNaN}, []int64{1000}, []int64{staleNaN})
f(time.Second, []int64{1000, 1000}, []int64{staleNaN, 2}, []int64{1000}, []int64{staleNaN})
f(time.Second, []int64{1000, 1000, 1000}, []int64{1, staleNaN, 2}, []int64{1000}, []int64{staleNaN})
f(time.Second, []int64{1000, 1000}, []int64{2, staleNaN}, []int64{1000}, []int64{2})
f(time.Second, []int64{1000, 1000}, []int64{staleNaN, 2}, []int64{1000}, []int64{2})
f(time.Second, []int64{1000, 1000, 1000}, []int64{1, staleNaN, 2}, []int64{1000}, []int64{2})
// compare with max values
f(time.Second, []int64{1000, 1000}, []int64{math.MaxInt64, staleNaN}, []int64{1000}, []int64{staleNaN})
f(time.Second, []int64{1000, 1000}, []int64{staleNaN, math.MaxInt64}, []int64{1000}, []int64{staleNaN})
f(time.Second, []int64{1000, 1000, 1000}, []int64{math.MaxInt64, staleNaN, math.MaxInt64}, []int64{1000}, []int64{staleNaN})
// verify decimal.StaleNaN is preferred only on timestamp conflicts
f(time.Second, []int64{1000, 1000, 2000}, []int64{1, staleNaN, 2}, []int64{1000, 2000}, []int64{staleNaN, 2})
f(time.Second, []int64{1000, 1000, 2000, 2000}, []int64{1, staleNaN, 2, 3}, []int64{1000, 2000}, []int64{staleNaN, 3})
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []int64{1, staleNaN, math.MaxInt64, 2, 3}, []int64{1000, 2000}, []int64{staleNaN, 3})
f(time.Second, []int64{1000, 1000}, []int64{math.MaxInt64, staleNaN}, []int64{1000}, []int64{math.MaxInt64})
f(time.Second, []int64{1000, 1000, 1000}, []int64{math.MaxInt64, staleNaN, math.MaxInt64}, []int64{1000}, []int64{math.MaxInt64})
f(time.Second, []int64{1000, 1000, 2000}, []int64{1, staleNaN, 2}, []int64{1000, 2000}, []int64{1, 2})
f(time.Second, []int64{1000, 1000, 2000, 2000}, []int64{1, staleNaN, 2, 3}, []int64{1000, 2000}, []int64{1, 3})
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []int64{1, staleNaN, math.MaxInt64, 2, 3}, []int64{1000, 2000}, []int64{math.MaxInt64, 3})
}
func TestDeduplicateSamples(t *testing.T) {