app/vmagent: fix rare hash collision in getLabelsHash (#10937)

Add '=' separator between label name and value when computing the hash
to prevent false collisions, like {a="bc"} and {ab="c"} hashing to the
same value.

getLabelsHashForShard is added to avoid sharding disruptions in vmagent
(-remoteWrite.shardByURL=true mode). The function preserves previous
behavior, without '=' between name and value.

PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10937
This commit is contained in:
Max Kotliar
2026-05-12 15:42:55 +03:00
committed by GitHub
parent 85e0253569
commit 243037823a
5 changed files with 27 additions and 6 deletions

View File

@@ -699,7 +699,7 @@ func shardAmountRemoteWriteCtx(tssBlock []prompb.TimeSeries, shards [][]prompb.T
}
tmpLabels.Labels = hashLabels
}
h := getLabelsHash(hashLabels)
h := getLabelsHashForShard(hashLabels)
// Get the rwctxIdx through consistent hashing and then map it to the index in shards.
// The rwctxIdx is not always equal to the shardIdx, for example, when some rwctx are not available.
@@ -790,11 +790,28 @@ var (
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
)
// getLabelsHashForShard is a separate function from getLabelsHash because
// it omits the '=' separator between label name and value for backward compatibility.
// Changing it would re-shard all series across remoteWrite targets.
func getLabelsHashForShard(labels []prompb.Label) uint64 {
bb := labelsHashBufPool.Get()
b := bb.B[:0]
for _, label := range labels {
b = append(b, label.Name...)
b = append(b, label.Value...)
}
h := xxhash.Sum64(b)
bb.B = b
labelsHashBufPool.Put(bb)
return h
}
func getLabelsHash(labels []prompb.Label) uint64 {
bb := labelsHashBufPool.Get()
b := bb.B[:0]
for _, label := range labels {
b = append(b, label.Name...)
b = append(b, '=')
b = append(b, label.Value...)
}
h := xxhash.Sum64(b)

View File

@@ -25,7 +25,7 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
t.Helper()
// Distribute itemsCount hashes returned by getLabelsHash() across bucketsCount buckets.
itemsCount := 1_000 * bucketsCount
itemsCount := 10_000 * bucketsCount
m := make([]int, bucketsCount)
var labels []prompb.Label
for i := range itemsCount {
@@ -44,10 +44,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
}
// Verify that the distribution is even
expectedItemsPerBucket := itemsCount / bucketsCount
expectedItemsPerBucket := float64(itemsCount / bucketsCount)
allowedDeviation := math.Round(float64(expectedItemsPerBucket) * 0.04)
for _, n := range m {
if math.Abs(1-float64(n)/float64(expectedItemsPerBucket)) > 0.04 {
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want around %d", bucketsCount, n, expectedItemsPerBucket)
if math.Abs(expectedItemsPerBucket-float64(n)) > allowedDeviation {
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want in range [%.0f, %.0f]",
bucketsCount, n, expectedItemsPerBucket-allowedDeviation, expectedItemsPerBucket+allowedDeviation)
}
}
}