Compare commits

...

8 Commits

Author SHA1 Message Date
Jayice
27f3c6ba45 use pooled labels array 2026-06-24 16:39:50 +08:00
Jayice
c558291847 update CHANGELOG.md 2026-04-17 14:34:07 +08:00
Jayice
9bd219fdc7 address review 2026-04-17 14:31:52 +08:00
Jayice
ec9d37ce36 improve code style 2026-04-17 13:55:33 +08:00
Jayice
607630b9f5 add unit test 2026-04-17 13:52:07 +08:00
Jayice
f4df18d2db add documentation for obfuscation 2026-04-17 13:03:25 +08:00
Jayice
29bc38871d address review 2026-04-16 15:49:04 +08:00
Jayice
3f35399c24 support obfuscation for rw 2026-04-15 15:09:40 +08:00
5 changed files with 255 additions and 3 deletions

View File

@@ -0,0 +1,84 @@
package remotewrite
import (
"crypto/sha256"
"encoding/hex"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
type obfuscationCtx struct {
labels []prompb.Label
}
func (ctx *obfuscationCtx) Reset() {
promrelabel.CleanLabels(ctx.labels)
ctx.labels = ctx.labels[:0]
}
var obfuscationCtxPool = &sync.Pool{
New: func() any {
return &obfuscationCtx{}
},
}
func (rwctx *remoteWriteCtx) initObfuscationConfig() {
if len(*obfuscationLabels) == 0 {
return
}
idx := rwctx.idx
rwctx.obfuscationLabels = make(map[string]struct{})
rwObfuscationLabels := obfuscationLabels.GetOptionalArg(idx)
rwObfuscationLabelsList := strings.Split(rwObfuscationLabels, "^^")
for _, label := range rwObfuscationLabelsList {
rwctx.obfuscationLabels[label] = struct{}{}
}
}
func (rwctx *remoteWriteCtx) applyObfuscation(tss []prompb.TimeSeries, ctx *obfuscationCtx) []prompb.TimeSeries {
if len(rwctx.obfuscationLabels) == 0 || len(tss) == 0 {
return tss
}
cacheObfuscatedResult := make(map[string]string)
poolLabels := ctx.labels[:0]
for i := range tss {
ts := &tss[i]
labels := ts.Labels
j := 0
needToObfuscate := false
for ; j < len(labels); j++ {
label := &labels[j]
if _, ok := rwctx.obfuscationLabels[label.Name]; !ok {
continue
}
needToObfuscate = true
break
}
if !needToObfuscate {
continue
}
// Copy the label array to apply obfuscation
poolLabelsLen := len(poolLabels)
labels = append(poolLabels, labels...)
ts.Labels = labels[poolLabelsLen:]
for ; j < len(labels); j++ {
label := &labels[j]
if _, ok := rwctx.obfuscationLabels[label.Name]; !ok {
continue
}
if obfuscatedValue, ok := cacheObfuscatedResult[label.Value]; ok {
// fast path: the obfuscated result was calculated before
label.Value = obfuscatedValue
} else {
obfuscatedResult := sha256.Sum256([]byte(label.Value))
cacheObfuscatedResult[label.Value] = hex.EncodeToString(obfuscatedResult[:])
label.Value = cacheObfuscatedResult[label.Value]
}
}
}
return tss
}

View File

@@ -102,6 +102,9 @@ var (
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
"By default, metadata sending is controlled by the global -enableMetadata flag")
obfuscationLabels = flagutil.NewArrayString("remoteWrite.obfuscationLabels", "List of label names whose values must be obfuscated before sending to the corresponding -remoteWrite.url."+
"Multiple label names should be separated by `^^`, e.g. \"job^^instance,ip\". By default, label obfuscation is disabled")
)
var (
@@ -833,6 +836,8 @@ type remoteWriteCtx struct {
pss []*pendingSeries
pssNextIdx atomic.Uint64
obfuscationLabels map[string]struct{}
rowsPushedAfterRelabel *metrics.Counter
rowsDroppedByRelabel *metrics.Counter
@@ -937,6 +942,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
rowsDroppedOnPushFailure: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_samples_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
}
rwctx.initStreamAggrConfig()
rwctx.initObfuscationConfig()
return rwctx
}
@@ -1102,6 +1108,7 @@ func (rwctx *remoteWriteCtx) tryPushMetadataInternal(mms []prompb.MetricMetadata
func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries) bool {
var rctx *relabelCtx
var v *[]prompb.TimeSeries
var octx *obfuscationCtx
defer func() {
if rctx == nil {
return
@@ -1120,6 +1127,24 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
rctx.appendExtraLabels(tss, labelsGlobal)
}
if len(rwctx.obfuscationLabels) != 0 {
if rctx == nil {
shadowTss := tssPool.Get().(*[]prompb.TimeSeries)
tss = append(*shadowTss, tss...)
defer func() {
*shadowTss = prompb.ResetTimeSeries(tss)
tssPool.Put(shadowTss)
}()
}
octx = obfuscationCtxPool.Get().(*obfuscationCtx)
defer func() {
octx.Reset()
obfuscationCtxPool.Put(octx)
}()
tss = rwctx.applyObfuscation(tss, octx)
}
pss := rwctx.pss
idx := rwctx.pssNextIdx.Add(1) % uint64(len(pss))

View File

@@ -1,6 +1,8 @@
package remotewrite
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"math"
"reflect"
@@ -374,3 +376,124 @@ func TestCalculateHealthyRwctxIdx(t *testing.T) {
f(1, []int{0}, nil)
f(1, []int{}, []int{0})
}
func TestRemoteWriteObfuscation(t *testing.T) {
f := func(obfuscationLabelList string, inputTss []prompb.TimeSeries, expectedTss []prompb.TimeSeries) {
t.Helper()
rwctx := &remoteWriteCtx{
idx: 0,
}
defer metrics.UnregisterAllMetrics()
originValue := *obfuscationLabels
defer func() {
*obfuscationLabels = originValue
}()
*obfuscationLabels = []string{obfuscationLabelList}
rwctx.initObfuscationConfig()
octx := obfuscationCtx{}
outputTss := rwctx.applyObfuscation(inputTss, &octx)
if !reflect.DeepEqual(expectedTss, outputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", outputTss, expectedTss)
}
}
sha256Result := func(str string) string {
sha256Result := sha256.Sum256([]byte(str))
return hex.EncodeToString(sha256Result[:])
}
// 1. obfuscation is not set.
f("",
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "123"},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "123"},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
)
// 2. obfuscate the value of "ip" label
f("ip",
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "123"},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: sha256Result("123")},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
)
// 3. obfuscate the values of "ip" and "instance"
f("ip^^instance",
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "123"},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
{
Labels: []prompb.Label{
{Name: "job", Value: "123"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: sha256Result("123")},
{Name: "instance", Value: sha256Result("1234")},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
{
Labels: []prompb.Label{
{Name: "job", Value: "123"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
)
}

View File

@@ -25,9 +25,9 @@ The sandbox cluster installation runs under the constant load generated by
See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
## tip
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): introduce obfuscation functionality for remote write. By setting `-remoteWrite.obfuscationLabels`, the values of the specific labels will be anonymized before they're sent to corresponding `-remoteWrite.url`. See [#10599](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10599).
## [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0)
Released at 2026-04-10
**Update Note 1:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): [CSV export](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-csv-data) (`/api/v1/export/csv`) now adds a header row as the first line of the response, so existing CSV-processing scripts may need to skip this header. See [#10666](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10666).

View File

@@ -289,14 +289,16 @@ flowchart TB
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
H2 --> H3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
H3 --> H4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
H4 --> H5[[push to <b>-remoteWrite.url</b>]]
H4 --> H5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#obfuscating-label-values">obfuscate labels</a><br><b>-remoteWrite.obfuscationLabels</b>]
H5 --> H6[[push to <b>-remoteWrite.url</b>]]
%% Right branch
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
R2 --> R3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
R3 --> R4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
R4 --> R5[[push to <b>-remoteWrite.url</b>]]
R4 --> R5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#obfuscating-label-values">obfuscate labels</a><br><b>-remoteWrite.obfuscationLabels</b>]
R5 --> R6[[push to <b>-remoteWrite.url</b>]]
```
Scraping has additional settings that can be applied before samples are pushed to the processing pipeline above:
@@ -529,6 +531,24 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
/path/to/vmagent -remoteWrite.url=http://127.0.0.1:8428/api/v1/write?extra_label="env=prod"
```
## Obfuscating label values
Before sending metrics to `-remoteWrite.url`, `vmagent` can anonymize the values of specific labels in the metrics via setting `-remoteWrite.obfuscationLabels`.
Sometimes, some of the `-remoteWrite.url` may point to external services, such as monitoring service vendor outside the department or company. For security and compliance requirements,
obfuscating the specific label values (e.g. ip, host, datacenter, etc.) before sending them to these external services will be useful.
Use `-remoteWrite.obfuscationLabels` to specify the labels that need to be obfuscated before sending to `-remoteWrite.url`. Multiple labels should be separated by `^^`:
```sh
./vmagent \
-remoteWrite.url=http://<external-service1> \
-remoteWrite.obfuscationLabels='instance^^datacenter' \
-remoteWrite.url=http://<external-service2> \
-remoteWrite.obfuscationLabels='instance' \
-remoteWrite.url=http://<internal-service> \
-remoteWrite.obfuscationLabels=''
```
## Automatically generated metrics
`vmagent` automatically generates the following metrics per each scrape of every [Prometheus-compatible target](#how-to-collect-metrics-in-prometheus-format)