From e6d764e7f68d470cb37b9c0f46920dab27eaaf66 Mon Sep 17 00:00:00 2001 From: Nikolay Date: Fri, 24 Apr 2026 13:36:35 +0200 Subject: [PATCH] app/vmagent: properly attach tenant information to metadata (#10865) Previously, vmagent ignored tenant ID information obtained from `__tenant_id__` label for metrics metadata. It made it impossible to route metrics metadata to the `/multitenant` endpoints. This commit adds tenant ID to the metrics metadata. It also fixes VMagent multitenant ingestion endpoints. Previously, the tenant info defined there was not properly set to metadata. Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10828 PR https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10865 --------- Signed-off-by: Nikolay Signed-off-by: f41gh7 Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> Co-authored-by: Max Kotliar --- app/vmagent/opentelemetry/request_handler.go | 10 --- .../prometheusimport/request_handler.go | 7 -- .../promremotewrite/request_handler.go | 9 +-- app/vmagent/remotewrite/pendingseries.go | 3 + app/vmagent/remotewrite/remotewrite.go | 11 ++- apptest/tests/vmagent_remotewrite_test.go | 72 +++++++++++++++++++ apptest/vmagent.go | 39 +++++++++- docs/victoriametrics/changelog/CHANGELOG.md | 2 + docs/victoriametrics/vmagent.md | 23 +++++- 9 files changed, 146 insertions(+), 30 deletions(-) diff --git a/app/vmagent/opentelemetry/request_handler.go b/app/vmagent/opentelemetry/request_handler.go index ea40f9e319..b0be66c84f 100644 --- a/app/vmagent/opentelemetry/request_handler.go +++ b/app/vmagent/opentelemetry/request_handler.go @@ -77,16 +77,6 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta var metadataTotal int if prommetadata.IsEnabled() { - var accountID, projectID uint32 - if at != nil { - accountID = at.AccountID - projectID = at.ProjectID - for i := range mms { - mm := &mms[i] - mm.AccountID = accountID - mm.ProjectID = projectID - } - } ctx.WriteRequest.Metadata = mms metadataTotal = len(mms) } diff --git a/app/vmagent/prometheusimport/request_handler.go b/app/vmagent/prometheusimport/request_handler.go index 39eb2c6aee..f618d225d1 100644 --- a/app/vmagent/prometheusimport/request_handler.go +++ b/app/vmagent/prometheusimport/request_handler.go @@ -75,11 +75,6 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata Samples: samples[len(samples)-1:], }) } - var accountID, projectID uint32 - if at != nil { - accountID = at.AccountID - projectID = at.ProjectID - } for i := range mms { mm := &mms[i] mmsDst = append(mmsDst, prompb.MetricMetadata{ @@ -88,8 +83,6 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata Type: mm.Type, // there is no unit in Prometheus exposition formats - AccountID: accountID, - ProjectID: projectID, }) } ctx.WriteRequest.Timeseries = tssDst diff --git a/app/vmagent/promremotewrite/request_handler.go b/app/vmagent/promremotewrite/request_handler.go index d0ffd0f054..c2a6e320e1 100644 --- a/app/vmagent/promremotewrite/request_handler.go +++ b/app/vmagent/promremotewrite/request_handler.go @@ -72,11 +72,6 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met var metadataTotal int if prommetadata.IsEnabled() { - var accountID, projectID uint32 - if at != nil { - accountID = at.AccountID - projectID = at.ProjectID - } for i := range mms { mm := &mms[i] mmsDst = append(mmsDst, prompb.MetricMetadata{ @@ -85,8 +80,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met Type: mm.Type, Unit: mm.Unit, - AccountID: accountID, - ProjectID: projectID, + AccountID: mm.AccountID, + ProjectID: mm.ProjectID, }) } ctx.WriteRequest.Metadata = mmsDst diff --git a/app/vmagent/remotewrite/pendingseries.go b/app/vmagent/remotewrite/pendingseries.go index e64b217bb1..1e1ee11acc 100644 --- a/app/vmagent/remotewrite/pendingseries.go +++ b/app/vmagent/remotewrite/pendingseries.go @@ -211,6 +211,9 @@ func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) { dst.Type = src.Type dst.Unit = src.Unit + dst.AccountID = src.AccountID + dst.ProjectID = src.ProjectID + // Pre-allocate memory for all string fields. neededBufLen := len(src.MetricFamilyName) + len(src.Help) bufLen := len(wr.metadatabuf) diff --git a/app/vmagent/remotewrite/remotewrite.go b/app/vmagent/remotewrite/remotewrite.go index e1dcf10b42..51f3d515e7 100644 --- a/app/vmagent/remotewrite/remotewrite.go +++ b/app/vmagent/remotewrite/remotewrite.go @@ -398,7 +398,7 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure // Push metadata separately from time series, since it doesn't need sharding, // relabeling, stream aggregation, deduplication, etc. - if !tryPushMetadataToRemoteStorages(rwctxs, mms, forceDropSamplesOnFailure) { + if !tryPushMetadataToRemoteStorages(at, rwctxs, mms, forceDropSamplesOnFailure) { return false } @@ -536,11 +536,18 @@ func pushTimeSeriesToRemoteStoragesTrackDropped(tss []prompb.TimeSeries) { } } -func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool { +func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool { if len(mms) == 0 { // Nothing to push return true } + if at != nil { + for idx := range mms { + mm := &mms[idx] + mm.AccountID = at.AccountID + mm.ProjectID = at.ProjectID + } + } // Do not shard metadata even if -remoteWrite.shardByURL is set, just replicate it among rwctxs. // Since metadata is usually small and there is no guarantee that metadata can be sent to // the same remote storage with the corresponding metrics. diff --git a/apptest/tests/vmagent_remotewrite_test.go b/apptest/tests/vmagent_remotewrite_test.go index fcdbe417b5..85ed2342f5 100644 --- a/apptest/tests/vmagent_remotewrite_test.go +++ b/apptest/tests/vmagent_remotewrite_test.go @@ -13,6 +13,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/apptest" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" ) // TestSingleVMAgentReloadConfigs verifies that vmagent reload new configurations on SIGHUP signal @@ -512,3 +513,74 @@ func TestSingleVMAgentCardinalityLimiter(t *testing.T) { t.Fatalf("unexpected vmagent_daily_series_limit_rows_dropped_total value: %d", v) } } + +func TestClusterVMAgentForwardMetricsMetadata(t *testing.T) { + tc := apptest.NewTestCase(t) + defer tc.Stop() + + sut := tc.MustStartDefaultCluster() + + vmagent := tc.MustStartVmagent("vmagent", []string{ + `-remoteWrite.flushInterval=50ms`, + `-remoteWrite.forcePromProto=true`, + `-enableMultitenantHandlers=true`, + "-remoteWrite.tmpDataPath=" + tc.Dir() + "/vmagent", + fmt.Sprintf(`-remoteWrite.url=http://%s/insert/multitenant/prometheus/api/v1/write`, sut.Vminsert.HTTPAddr()), + }, ``) + + prometheusRemoteWriteDataSet := prompb.WriteRequest{ + Metadata: []prompb.MetricMetadata{ + {MetricFamilyName: "metric_name_4", Help: "some help message", Type: prompb.MetricTypeSummary, AccountID: 100}, + }, + } + vmagent.PrometheusAPIV1Write(t, prometheusRemoteWriteDataSet, apptest.QueryOpts{Tenant: "multitenant"}) + + tc.Assert(&apptest.AssertOptions{ + Msg: "unexpected /api/v1/metadata response", + Got: func() any { + return sut.PrometheusAPIV1Metadata(t, ``, -1, apptest.QueryOpts{Tenant: "100:0"}) + }, + Want: &apptest.PrometheusAPIV1Metadata{ + Status: "success", + Data: map[string][]apptest.MetadataEntry{ + "metric_name_4": {{Help: "some help message", Type: "summary"}}, + }, + }, + }) + + prometheusRemoteWriteDataSet = prompb.WriteRequest{ + Metadata: []prompb.MetricMetadata{ + {MetricFamilyName: "metric_name_6", Help: "some help message", Type: prompb.MetricTypeSummary, AccountID: 100}, + }, + } + // enforce tenant from request uri /insert/tenant_id/prometheus/api/v1/write + vmagent.PrometheusAPIV1Write(t, prometheusRemoteWriteDataSet, apptest.QueryOpts{Tenant: "500:500"}) + + tc.Assert(&apptest.AssertOptions{ + Msg: "unexpected /api/v1/metadata response", + Got: func() any { + return sut.PrometheusAPIV1Metadata(t, ``, -1, apptest.QueryOpts{Tenant: "500:500"}) + }, + Want: &apptest.PrometheusAPIV1Metadata{ + Status: "success", + Data: map[string][]apptest.MetadataEntry{ + "metric_name_6": {{Help: "some help message", Type: "summary"}}, + }, + }, + }) + + tc.Assert(&apptest.AssertOptions{ + Msg: "unexpected /api/v1/metadata response", + Got: func() any { + return sut.PrometheusAPIV1Metadata(t, ``, -1, apptest.QueryOpts{Tenant: "multitenant"}) + }, + Want: &apptest.PrometheusAPIV1Metadata{ + Status: "success", + Data: map[string][]apptest.MetadataEntry{ + "metric_name_4": {{Help: "some help message", Type: "summary"}}, + "metric_name_6": {{Help: "some help message", Type: "summary"}}, + }, + }, + }) + +} diff --git a/apptest/vmagent.go b/apptest/vmagent.go index c3e2607465..db15042917 100644 --- a/apptest/vmagent.go +++ b/apptest/vmagent.go @@ -10,6 +10,10 @@ import ( "syscall" "testing" "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" + "github.com/golang/snappy" ) // Vmagent holds the state of a vmagent app and provides vmagent-specific functions @@ -158,6 +162,31 @@ func (app *Vmagent) ReloadRelabelConfigs(t *testing.T) { t.Fatalf("relabel configs were not reloaded after SIGHUP signal; previous total: %f, current total: %f", prevTotal, currTotal) } +// PrometheusAPIV1Write is a test helper function that inserts a +// collection of records in Prometheus remote-write format by sending a HTTP +// POST request to /prometheus/api/v1/write vmagent endpoint. +func (app *Vmagent) PrometheusAPIV1Write(t *testing.T, wr prompb.WriteRequest, opts QueryOpts) { + t.Helper() + + url := fmt.Sprintf("http://%s/prometheus/api/v1/write", app.httpListenAddr) + if opts.Tenant != "" { + url = fmt.Sprintf("http://%s/insert/%s/prometheus/api/v1/write", app.httpListenAddr, opts.Tenant) + } + data := snappy.Encode(nil, wr.MarshalProtobuf(nil)) + recordsCount := len(wr.Timeseries) + if prommetadata.IsEnabled() { + recordsCount += len(wr.Metadata) + } + headers := opts.getHeaders() + headers.Set("Content-Type", "application/x-protobuf") + app.sendBlocking(t, recordsCount, func() { + _, statusCode := app.cli.Post(t, url, data, headers) + if statusCode != http.StatusNoContent { + t.Fatalf("unexpected status code: got %d, want %d", statusCode, http.StatusNoContent) + } + }) +} + // HTTPAddr returns the address at which the vmagent process is listening // for http connections. func (app *Vmagent) HTTPAddr() string { @@ -176,16 +205,22 @@ func (app *Vmagent) HTTPAddr() string { // If it is, then the data has been sent to vmstorage. // // Unreliable if the records are inserted concurrently. -func (app *Vmagent) sendBlocking(t *testing.T, numRecordsToSend int, send func()) { +func (app *Vmagent) sendBlocking(t *testing.T, _ int, send func()) { t.Helper() + currRowsSentCount := app.remoteWriteRequestsTotal(t) + send() const ( retries = 20 period = 100 * time.Millisecond ) - wantRowsSentCount := app.remoteWriteRequestsTotal(t) + numRecordsToSend + // TODO: properly account wantRowsSentCount + // currently vmagent doesn't expose per time-series write information + // so we can only account number of blocks sent via remote write protocol + // it should be suitable for tests purpose + wantRowsSentCount := currRowsSentCount + 1 for range retries { if app.remoteWriteRequestsTotal(t) >= wantRowsSentCount { return diff --git a/docs/victoriametrics/changelog/CHANGELOG.md b/docs/victoriametrics/changelog/CHANGELOG.md index 58760f3163..e7f5ac570e 100644 --- a/docs/victoriametrics/changelog/CHANGELOG.md +++ b/docs/victoriametrics/changelog/CHANGELOG.md @@ -36,6 +36,8 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel * BUGFIX: `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix increased memory usage after upgrade to [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0) by properly accounting for internal buffer count when calculating per-storage buffer size. See [#10725](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10725#issuecomment-4282256709). * BUGFIX: all VictoriaMetrics components: properly parse IPv6 source address when accepting connections with proxy protocol v2 enabled. See [#10839](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10839). Thanks to @andriibeee for the contribution. +* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): properly attach [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) from `__tenant_id__` label to the scraped metadata. See [#10828](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10828). +* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): keep [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) ingested into vmagent via [prometheus remotewrite](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus/) endpoint. See [#10828](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10828). * BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): `-maxScrapeSize` is now correctly applied when reading response bodies, including non-OK scrape error responses. See [#10804](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10804). * BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix `ec2_sd_configs` returning 401 `AuthFailure` from AWS when credentials are obtained via IRSA, instance role or `AWS_CONTAINER_CREDENTIALS_*` env vars. The regression was introduced in [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0). See [#10815](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10815). * BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): fix leak of backend TCP connections, file descriptors and goroutines when the client cancels the request after the backend response has been received. See [#10833](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10833). Thanks to @andriibeee for the contribution. diff --git a/docs/victoriametrics/vmagent.md b/docs/victoriametrics/vmagent.md index 5c31bb4d80..29599bf968 100644 --- a/docs/victoriametrics/vmagent.md +++ b/docs/victoriametrics/vmagent.md @@ -501,9 +501,24 @@ scrape_configs: target_label: vm_account_id ``` +In addition, vmagent could obtain tenant identifier from `__tenant_id__` label at target discovery phase. +It implicitly converts `__tenant_id__` label into `vm_account_id` and `vm_project_id` labels and attaches +it to the scraped metrics and metrics metadata. +For example, the following relabeling rule instructs sending metrics to `10:5` [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) +defined in the `prometheus.io/tenant_id: 10:5` annotation of Kubernetes pod deployment: + +```yaml +scrape_configs: +- kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_tenant_id] + target_label: __tenant_id__ +``` + `vmagent` can accept data via the same multitenant endpoints (`/insert//`) as `vminsert` at [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) does according to [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) if `-enableMultitenantHandlers` command-line flag is set. -In this case, vmagent automatically converts tenant identifiers from the URL to `vm_account_id` and `vm_project_id` labels. +In this case, vmagent automatically converts tenant identifiers from the URL to `vm_account_id` and `vm_project_id` labels and sets tenant info in metadata. These tenant labels are added before applying [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) specified via `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig` command-line flags. Metrics with `vm_account_id` and `vm_project_id` labels can be routed to the corresponding tenants when specifying `-remoteWrite.url` to [multitenant url at VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels). @@ -665,10 +680,14 @@ e.g. it sets `scrape_series_added` metric to zero. See [these docs](#automatical `vmagent` accepts{{% available_from "v1.137.0" %}} metric metadata exposed by scrape targets in [Prometheus exposition format](https://github.com/prometheus/docs/blob/main/docs/instrumenting/exposition_formats.md), received via [Prometheus remote write v1](https://prometheus.io/docs/specs/prw/remote_write_spec/) or [OpenTelemetry protocol](https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto) by default. Set `-enableMetadata=false` to disable metadata processing{{% available_from "v1.125.1" %}}. During processing, metadata won't be dropped or modified by [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) or [streaming aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/). -When `-enableMultitenantHandlers` is enabled, vmagent adds tenant info to metadata received via the [multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy) (`/insert//`). However, if `vm_account_id` or `vm_project_id` labels are added directly to metrics before reaching vmagent, and vmagent writes to the [vminsert multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels), the tenant info won't be attached and the metadata will be stored under the default tenant of VictoriaMetrics cluster. +When `-enableMultitenantHandlers` is enabled, vmagent adds tenant info to metadata specified in [multitenant endpoint](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy) (`/insert//`). +However, if the `/insert/multitenant/` endpoint is used, vmagent preserves the tenant information provided in the metadata by the sender. If the sender does not specify tenant information, the default tenant `0:0` is used. > Metadata requires extra memory, disk space, and network traffic. +Use `-remoteWrite.disableMetadata`{{% available_from "v1.140.0" %}} to fully disable sending metadata from vmagent. +This reduces network traffic and resource usage when metadata is not required. + ## Stream parsing mode By default, `vmagent` parses the full response from the scrape target, applies [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/)