Compare commits

...

7 Commits

Author SHA1 Message Date
Jayice
c558291847 update CHANGELOG.md 2026-04-17 14:34:07 +08:00
Jayice
9bd219fdc7 address review 2026-04-17 14:31:52 +08:00
Jayice
ec9d37ce36 improve code style 2026-04-17 13:55:33 +08:00
Jayice
607630b9f5 add unit test 2026-04-17 13:52:07 +08:00
Jayice
f4df18d2db add documentation for obfuscation 2026-04-17 13:03:25 +08:00
Jayice
29bc38871d address review 2026-04-16 15:49:04 +08:00
Jayice
3f35399c24 support obfuscation for rw 2026-04-15 15:09:40 +08:00
5 changed files with 174 additions and 3 deletions

View File

@@ -0,0 +1,49 @@
package remotewrite
import (
"crypto/sha256"
"encoding/hex"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
func (rwctx *remoteWriteCtx) initObfuscationConfig() {
if len(*obfuscationLabels) == 0 {
return
}
idx := rwctx.idx
rwctx.obfuscationLabels = make(map[string]struct{})
rwObfuscationLabels := obfuscationLabels.GetOptionalArg(idx)
rwObfuscationLabelsList := strings.Split(rwObfuscationLabels, "^^")
for _, label := range rwObfuscationLabelsList {
rwctx.obfuscationLabels[label] = struct{}{}
}
}
func (rwctx *remoteWriteCtx) applyObfuscation(tss []prompb.TimeSeries) []prompb.TimeSeries {
if len(rwctx.obfuscationLabels) == 0 || len(tss) == 0 {
return tss
}
cacheObfuscatedResult := make(map[string]string)
for i := range tss {
ts := &tss[i]
labels := ts.Labels
for j := range labels {
label := &labels[j]
if _, ok := rwctx.obfuscationLabels[label.Name]; !ok {
continue
}
if obfuscatedValue, ok := cacheObfuscatedResult[label.Value]; ok {
// fast path: the obfuscated result was calculated before
label.Value = obfuscatedValue
} else {
obfuscatedResult := sha256.Sum256([]byte(label.Value))
cacheObfuscatedResult[label.Value] = hex.EncodeToString(obfuscatedResult[:])
label.Value = cacheObfuscatedResult[label.Value]
}
}
}
return tss
}

View File

@@ -102,6 +102,9 @@ var (
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
"By default, metadata sending is controlled by the global -enableMetadata flag")
obfuscationLabels = flagutil.NewArrayString("remoteWrite.obfuscationLabels", "List of label names whose values must be obfuscated before sending to the corresponding -remoteWrite.url."+
"By default, label obfuscation is disabled")
)
var (
@@ -833,6 +836,8 @@ type remoteWriteCtx struct {
pss []*pendingSeries
pssNextIdx atomic.Uint64
obfuscationLabels map[string]struct{}
rowsPushedAfterRelabel *metrics.Counter
rowsDroppedByRelabel *metrics.Counter
@@ -937,6 +942,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
rowsDroppedOnPushFailure: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_samples_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
}
rwctx.initStreamAggrConfig()
rwctx.initObfuscationConfig()
return rwctx
}
@@ -1120,6 +1126,15 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
rctx.appendExtraLabels(tss, labelsGlobal)
}
if len(rwctx.obfuscationLabels) != 0 {
if rctx == nil {
rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompb.TimeSeries)
tss = append(*v, tss...)
}
tss = rwctx.applyObfuscation(tss)
}
pss := rwctx.pss
idx := rwctx.pssNextIdx.Add(1) % uint64(len(pss))

View File

@@ -374,3 +374,92 @@ func TestCalculateHealthyRwctxIdx(t *testing.T) {
f(1, []int{0}, nil)
f(1, []int{}, []int{0})
}
func TestRemoteWriteContext_Obfuscation(t *testing.T) {
f := func(obfuscationLabelList string, obfuscationLabelCount int, inputTss []prompb.TimeSeries, expectedTss []prompb.TimeSeries) {
t.Helper()
rwctx := &remoteWriteCtx{
idx: 0,
streamAggrKeepInput: false,
streamAggrDropInput: true,
}
defer metrics.UnregisterAllMetrics()
*obfuscationLabels = []string{obfuscationLabelList}
rwctx.initObfuscationConfig()
if len(rwctx.obfuscationLabels) != obfuscationLabelCount {
t.Fatalf("unexpected obfuscation labels len; got %v; want %d", len(rwctx.obfuscationLabels), obfuscationLabelCount)
}
outputTss := rwctx.applyObfuscation(inputTss)
if !reflect.DeepEqual(expectedTss, outputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", outputTss, expectedTss)
}
}
f("ip", 1,
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "123"},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
)
f("ip^^instance", 2,
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "123"},
{Name: "instance", Value: "1234"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
{
Labels: []prompb.Label{
{Name: "job", Value: "123"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
[]prompb.TimeSeries{
{
Labels: []prompb.Label{
{Name: "ip", Value: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"},
{Name: "instance", Value: "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
{
Labels: []prompb.Label{
{Name: "job", Value: "123"},
},
Samples: []prompb.Sample{
{Value: 1, Timestamp: 0},
},
},
},
)
}

View File

@@ -25,9 +25,9 @@ The sandbox cluster installation runs under the constant load generated by
See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
## tip
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): introduce obfuscation functionality for remote write. By setting `-remoteWrite.obfuscationLabels`, the values of the specific labels will be anonymized before they're sent to corresponding `-remoteWrite.url`. See [#10599](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10599).
## [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0)
Released at 2026-04-10
**Update Note 1:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): [CSV export](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-csv-data) (`/api/v1/export/csv`) now adds a header row as the first line of the response, so existing CSV-processing scripts may need to skip this header. See [#10666](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10666).

View File

@@ -289,14 +289,16 @@ flowchart TB
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
H2 --> H3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
H3 --> H4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
H4 --> H5[[push to <b>-remoteWrite.url</b>]]
H4 --> H5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#obfuscating-label-values">obfuscate labels</a><br><b>-remoteWrite.obfuscationLabels</b>]
H5 --> H6[[push to <b>-remoteWrite.url</b>]]
%% Right branch
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
R2 --> R3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
R3 --> R4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
R4 --> R5[[push to <b>-remoteWrite.url</b>]]
R4 --> R5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#obfuscating-label-values">obfuscate labels</a><br><b>-remoteWrite.obfuscationLabels</b>]
R5 --> R6[[push to <b>-remoteWrite.url</b>]]
```
Scraping has additional settings that can be applied before samples are pushed to the processing pipeline above:
@@ -529,6 +531,22 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
/path/to/vmagent -remoteWrite.url=http://127.0.0.1:8428/api/v1/write?extra_label="env=prod"
```
## Obfuscating label values
Before sending metrics to `-remoteWrite.url`, `vmagent` can anonymize the values of specific labels in the metrics.
Sometimes, some of the `-remoteWrite.url` may point to external services, such as monitoring service vendor outside the company. For security and compliance requirements,
obfuscating the specific labels (e.g. ip address, datacenter, etc.) before sending them to these external services will be useful.
Use `-remoteWrite.obfuscationLabels` to specify the labels that need to be obfuscated before sending to `-remoteWrite.url`. Multiple labels should be separated by `^^`:
```sh
./vmagent \
-remoteWrite.url=http://<external-service> \
-remoteWrite.obfuscationLabels='instance^^datacenter' \
-remoteWrite.url=http://<internal-url> \
-remoteWrite.obfuscationLabels='job'
```
## Automatically generated metrics
`vmagent` automatically generates the following metrics per each scrape of every [Prometheus-compatible target](#how-to-collect-metrics-in-prometheus-format)