mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-17 00:26:36 +03:00
lib/storage: prefer numerical values over stale markers when samples share the same timestamp during deduplication
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196 Prefer the non StaleNaN value when both StaleNaN and non-StaleNaN samples share the timestamp during deduplication(downsampling). The scenario can occur when: 1. Multiple vmagent instances scrape the same target(without -promscrape.cluster.name flag), one instance fails to scrape due to issues such as network, while others succeed. 2. Multiple vmalert instances evaluate the same recording rule, with one instance receiving a partial response while others receive a complete response. In both cases, since the samples share the same timestamp and represent the metric state at that moment, the non-StaleNaN value is entirely valid, whereas the StaleNaN could be caused by other unknown issues. Therefore, it is reasonable to prioritize the non-StaleNaN value.
This commit is contained in:
@@ -193,9 +193,10 @@ func testDeduplication(tc *apptest.TestCase, sut apptest.PrometheusWriteQuerier,
|
||||
}},
|
||||
{Metric: map[string]string{"__name__": "metric4"}, Samples: []*apptest.Sample{
|
||||
// If multiple raw samples have the same timestamp on the
|
||||
// given -dedup.minScrapeInterval discrete interval, then
|
||||
// stale markers are preferred over any other value.
|
||||
{Timestamp: ts10, Value: decimal.StaleNaN},
|
||||
// given -dedup.minScrapeInterval discrete interval,
|
||||
// always prefer a non-decimal.StaleNaN value,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
|
||||
{Timestamp: ts10, Value: 50},
|
||||
}},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -1345,7 +1345,7 @@ This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/
|
||||
|
||||
If multiple raw samples have **the same timestamp** on the given `-dedup.minScrapeInterval` discrete interval,
|
||||
then the sample with **the biggest value** is kept.
|
||||
[Stale markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) are preferred over any other value.
|
||||
Numerical values are preferred over [stale markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers).
|
||||
|
||||
[Prometheus staleness markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) are processed as any other value during de-duplication.
|
||||
If raw sample with the biggest timestamp on `-dedup.minScrapeInterval` contains a stale marker, then it is kept after the deduplication.
|
||||
|
||||
@@ -43,6 +43,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): reduce default value for `storage.vminsertConnsShutdownDuration` flag from `25s` to `10s` seconds. It reduces probability of ungraceful storage shutdown at Kubernetes based environments, which has 30 seconds default graceful termination period value. See [#10273](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10273)
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): remove legacy `tenantID` query param and use the URL path as the single source of truth for multitenancy. See [#10232](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10232).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix heatmap rendering issues where charts could break or appear empty when bucket values were uniform or sparse. See [#10240](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10240).
|
||||
* BUGFIX: all VictoriaMetrics components: prefer numerical values over [stale markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) when samples share the same timestamp during deduplication. See [#10196](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196#issuecomment-3738433938).
|
||||
|
||||
## [v1.133.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.133.0)
|
||||
|
||||
|
||||
@@ -47,13 +47,16 @@ func DeduplicateSamples(srcTimestamps []int64, srcValues []float64, dedupInterva
|
||||
j := i
|
||||
tsPrev := srcTimestamps[j]
|
||||
vPrev := srcValues[j]
|
||||
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
|
||||
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
|
||||
for j > 0 && srcTimestamps[j-1] == tsPrev {
|
||||
j--
|
||||
if decimal.IsStaleNaN(srcValues[j]) {
|
||||
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
|
||||
continue
|
||||
}
|
||||
if decimal.IsStaleNaN(vPrev) {
|
||||
vPrev = srcValues[j]
|
||||
break
|
||||
continue
|
||||
}
|
||||
if srcValues[j] > vPrev {
|
||||
vPrev = srcValues[j]
|
||||
@@ -70,14 +73,16 @@ func DeduplicateSamples(srcTimestamps []int64, srcValues []float64, dedupInterva
|
||||
j := len(srcTimestamps) - 1
|
||||
tsPrev := srcTimestamps[j]
|
||||
vPrev := srcValues[j]
|
||||
// Invariant: vPrev > srcValues[j]
|
||||
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
|
||||
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
|
||||
for j > 0 && srcTimestamps[j-1] == tsPrev {
|
||||
j--
|
||||
if decimal.IsStaleNaN(srcValues[j]) {
|
||||
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
|
||||
continue
|
||||
}
|
||||
if decimal.IsStaleNaN(vPrev) {
|
||||
vPrev = srcValues[j]
|
||||
break
|
||||
continue
|
||||
}
|
||||
if srcValues[j] > vPrev {
|
||||
vPrev = srcValues[j]
|
||||
@@ -106,13 +111,16 @@ func deduplicateSamplesDuringMerge(srcTimestamps, srcValues []int64, dedupInterv
|
||||
j := i
|
||||
tsPrev := srcTimestamps[j]
|
||||
vPrev := srcValues[j]
|
||||
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
|
||||
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
|
||||
for j > 0 && srcTimestamps[j-1] == tsPrev {
|
||||
j--
|
||||
if decimal.IsStaleNaNInt64(srcValues[j]) {
|
||||
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
|
||||
continue
|
||||
}
|
||||
if decimal.IsStaleNaNInt64(vPrev) {
|
||||
vPrev = srcValues[j]
|
||||
break
|
||||
continue
|
||||
}
|
||||
if srcValues[j] > vPrev {
|
||||
vPrev = srcValues[j]
|
||||
@@ -129,19 +137,16 @@ func deduplicateSamplesDuringMerge(srcTimestamps, srcValues []int64, dedupInterv
|
||||
j := len(srcTimestamps) - 1
|
||||
tsPrev := srcTimestamps[j]
|
||||
vPrev := srcValues[j]
|
||||
if decimal.IsStaleNaNInt64(vPrev) {
|
||||
// fast path - decimal.StaleNaN is always preferred to other values on interval
|
||||
dstTimestamps = append(dstTimestamps, tsPrev)
|
||||
dstValues = append(dstValues, vPrev)
|
||||
return dstTimestamps, dstValues
|
||||
}
|
||||
// if multiple samples have the same timestamp, choose the maximum value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333;
|
||||
// always prefer a non-decimal.StaleNaN value, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
|
||||
for j > 0 && srcTimestamps[j-1] == tsPrev {
|
||||
j--
|
||||
if decimal.IsStaleNaNInt64(srcValues[j]) {
|
||||
// always prefer decimal.IsStaleNaN to avoid inconsistency when comparing values
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
|
||||
continue
|
||||
}
|
||||
if decimal.IsStaleNaNInt64(vPrev) {
|
||||
vPrev = srcValues[j]
|
||||
break
|
||||
continue
|
||||
}
|
||||
if srcValues[j] > vPrev {
|
||||
vPrev = srcValues[j]
|
||||
|
||||
@@ -81,19 +81,17 @@ func TestDeduplicateSamplesWithIdenticalTimestamps(t *testing.T) {
|
||||
f(time.Second, []int64{1001, 1001}, []float64{2, 1}, []int64{1001}, []float64{2})
|
||||
f(time.Second, []int64{1000, 1001, 1001, 1001, 2001}, []float64{1, 2, 5, 3, 0}, []int64{1000, 1001, 2001}, []float64{1, 5, 0})
|
||||
|
||||
// position of decimal.StaleNaN shouldn't matter during deduplication
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
|
||||
f(time.Second, []int64{1000, 1000}, []float64{2, decimal.StaleNaN}, []int64{1000}, []float64{decimal.StaleNaN})
|
||||
f(time.Second, []int64{1000, 1000}, []float64{decimal.StaleNaN, 2}, []int64{1000}, []float64{decimal.StaleNaN})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []float64{1, decimal.StaleNaN, 2}, []int64{1000}, []float64{decimal.StaleNaN})
|
||||
// verify decimal.StaleNaN is NOT preferred on timestamp conflicts
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
|
||||
f(time.Second, []int64{1000, 1000}, []float64{2, decimal.StaleNaN}, []int64{1000}, []float64{2})
|
||||
f(time.Second, []int64{1000, 1000}, []float64{decimal.StaleNaN, 2}, []int64{1000}, []float64{2})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []float64{1, decimal.StaleNaN, 2}, []int64{1000}, []float64{2})
|
||||
// compare with Inf values
|
||||
f(time.Second, []int64{1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN}, []int64{1000}, []float64{decimal.StaleNaN})
|
||||
f(time.Second, []int64{1000, 1000}, []float64{decimal.StaleNaN, math.Inf(1)}, []int64{1000}, []float64{decimal.StaleNaN})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN, math.Inf(-1)}, []int64{1000}, []float64{decimal.StaleNaN})
|
||||
// verify decimal.StaleNaN is preferred only on timestamp conflicts
|
||||
f(time.Second, []int64{1000, 1000, 2000}, []float64{1, decimal.StaleNaN, 2}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 2})
|
||||
f(time.Second, []int64{1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 2, 3}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 3})
|
||||
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 6, 2, 3}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 3})
|
||||
f(time.Second, []int64{1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN}, []int64{1000}, []float64{math.Inf(1)})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []float64{math.Inf(1), decimal.StaleNaN, math.Inf(-1)}, []int64{1000}, []float64{math.Inf(1)})
|
||||
f(time.Second, []int64{1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 2, 3}, []int64{1000, 2000}, []float64{1, 3})
|
||||
f(time.Second, []int64{1000, 1000, 2000, 2000}, []float64{decimal.StaleNaN, decimal.StaleNaN, 2, 3}, []int64{1000, 2000}, []float64{decimal.StaleNaN, 3})
|
||||
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []float64{1, decimal.StaleNaN, 6, 2, 3}, []int64{1000, 2000}, []float64{6, 3})
|
||||
}
|
||||
|
||||
func TestDeduplicateSamplesDuringMergeWithIdenticalTimestamps(t *testing.T) {
|
||||
@@ -124,20 +122,18 @@ func TestDeduplicateSamplesDuringMergeWithIdenticalTimestamps(t *testing.T) {
|
||||
f(time.Second, []int64{1001, 1001}, []int64{2, 1}, []int64{1001}, []int64{2})
|
||||
f(time.Second, []int64{1000, 1001, 1001, 1001, 2001}, []int64{1, 2, 5, 3, 0}, []int64{1000, 1001, 2001}, []int64{1, 5, 0})
|
||||
|
||||
// verify decimal.StaleNaN is NOT preferred on timestamp conflicts
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10196
|
||||
staleNaN, _ := decimal.FromFloat(decimal.StaleNaN)
|
||||
// position of decimal.StaleNaN shouldn't matter during deduplication
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7674
|
||||
f(time.Second, []int64{1000, 1000}, []int64{2, staleNaN}, []int64{1000}, []int64{staleNaN})
|
||||
f(time.Second, []int64{1000, 1000}, []int64{staleNaN, 2}, []int64{1000}, []int64{staleNaN})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []int64{1, staleNaN, 2}, []int64{1000}, []int64{staleNaN})
|
||||
f(time.Second, []int64{1000, 1000}, []int64{2, staleNaN}, []int64{1000}, []int64{2})
|
||||
f(time.Second, []int64{1000, 1000}, []int64{staleNaN, 2}, []int64{1000}, []int64{2})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []int64{1, staleNaN, 2}, []int64{1000}, []int64{2})
|
||||
// compare with max values
|
||||
f(time.Second, []int64{1000, 1000}, []int64{math.MaxInt64, staleNaN}, []int64{1000}, []int64{staleNaN})
|
||||
f(time.Second, []int64{1000, 1000}, []int64{staleNaN, math.MaxInt64}, []int64{1000}, []int64{staleNaN})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []int64{math.MaxInt64, staleNaN, math.MaxInt64}, []int64{1000}, []int64{staleNaN})
|
||||
// verify decimal.StaleNaN is preferred only on timestamp conflicts
|
||||
f(time.Second, []int64{1000, 1000, 2000}, []int64{1, staleNaN, 2}, []int64{1000, 2000}, []int64{staleNaN, 2})
|
||||
f(time.Second, []int64{1000, 1000, 2000, 2000}, []int64{1, staleNaN, 2, 3}, []int64{1000, 2000}, []int64{staleNaN, 3})
|
||||
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []int64{1, staleNaN, math.MaxInt64, 2, 3}, []int64{1000, 2000}, []int64{staleNaN, 3})
|
||||
f(time.Second, []int64{1000, 1000}, []int64{math.MaxInt64, staleNaN}, []int64{1000}, []int64{math.MaxInt64})
|
||||
f(time.Second, []int64{1000, 1000, 1000}, []int64{math.MaxInt64, staleNaN, math.MaxInt64}, []int64{1000}, []int64{math.MaxInt64})
|
||||
f(time.Second, []int64{1000, 1000, 2000}, []int64{1, staleNaN, 2}, []int64{1000, 2000}, []int64{1, 2})
|
||||
f(time.Second, []int64{1000, 1000, 2000, 2000}, []int64{1, staleNaN, 2, 3}, []int64{1000, 2000}, []int64{1, 3})
|
||||
f(time.Second, []int64{1000, 1000, 1000, 2000, 2000}, []int64{1, staleNaN, math.MaxInt64, 2, 3}, []int64{1000, 2000}, []int64{math.MaxInt64, 3})
|
||||
}
|
||||
|
||||
func TestDeduplicateSamples(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user