mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-09 11:54:31 +03:00
Compare commits
22 Commits
gh-2388
...
issue-1060
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef0ae0fb9a | ||
|
|
4cbedc6b1b | ||
|
|
4f5cd15163 | ||
|
|
5b161ce283 | ||
|
|
3c192f9238 | ||
|
|
159bc15825 | ||
|
|
8db58ac410 | ||
|
|
42c1f729db | ||
|
|
6851a75c71 | ||
|
|
43773e1d5c | ||
|
|
517c17b744 | ||
|
|
1c2622d8ae | ||
|
|
9a836dac59 | ||
|
|
799ecb0a08 | ||
|
|
c88dc19052 | ||
|
|
919049f9e2 | ||
|
|
24efe47c6a | ||
|
|
f8d99d9289 | ||
|
|
333a015be5 | ||
|
|
b6196524ba | ||
|
|
e9f1bb911c | ||
|
|
12b79143dc |
@@ -12,6 +12,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mdx"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -103,6 +104,9 @@ var (
|
||||
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
||||
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
|
||||
"By default, metadata sending is controlled by the global -enableMetadata flag")
|
||||
|
||||
enableMdx = flagutil.NewArrayBool("remoteWrite.mdx.enable", "Whether to only retain metrics from VictoriaMetrics services before sending them to the corresponding -remoteWrite.url. "+
|
||||
"Please see https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -304,6 +308,10 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
}
|
||||
fs.RegisterPathFsMetrics(*tmpDataPath)
|
||||
|
||||
if slices.Contains(*enableMdx, true) && *shardByURL {
|
||||
logger.Fatalf("-remoteWrite.mdx.enable and -remoteWrite.shardByURL cannot be set to true simultaneously.")
|
||||
}
|
||||
|
||||
if *shardByURL {
|
||||
consistentHashNodes := make([]string, 0, len(urls))
|
||||
for i, url := range urls {
|
||||
@@ -859,6 +867,7 @@ type remoteWriteCtx struct {
|
||||
|
||||
sas atomic.Pointer[streamaggr.Aggregators]
|
||||
deduplicator *streamaggr.Deduplicator
|
||||
mdxFilter *mdx.Filter
|
||||
|
||||
streamAggrKeepInput bool
|
||||
streamAggrDropInput bool
|
||||
@@ -873,6 +882,7 @@ type remoteWriteCtx struct {
|
||||
|
||||
rowsPushedAfterRelabel *metrics.Counter
|
||||
rowsDroppedByRelabel *metrics.Counter
|
||||
rowsPreservedByMdx *metrics.Counter
|
||||
|
||||
pushFailures *metrics.Counter
|
||||
metadataDroppedOnPushFailure *metrics.Counter
|
||||
@@ -959,7 +969,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq, &c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
@@ -976,6 +985,15 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
rwctx.initStreamAggrConfig()
|
||||
|
||||
if enableMdx.GetOptionalArg(argIdx) {
|
||||
rwctx.mdxFilter = mdx.NewFilter()
|
||||
rwctx.rowsPreservedByMdx = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_mdx_rows_preserved_total{path=%q,url=%q}`, queuePath, sanitizedURL))
|
||||
_ = metrics.NewGauge(fmt.Sprintf(`vmagent_mdx_tracked_vm_instances{path=%q,url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(rwctx.mdxFilter.VmInstancesCount())
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
@@ -989,6 +1007,10 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.deduplicator.MustStop()
|
||||
rwctx.deduplicator = nil
|
||||
}
|
||||
if rwctx.mdxFilter != nil {
|
||||
rwctx.mdxFilter.MustStop()
|
||||
rwctx.mdxFilter = nil
|
||||
}
|
||||
|
||||
for _, ps := range rwctx.pss {
|
||||
ps.MustStop()
|
||||
@@ -1004,6 +1026,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
|
||||
rwctx.rowsPushedAfterRelabel = nil
|
||||
rwctx.rowsDroppedByRelabel = nil
|
||||
rwctx.rowsPreservedByMdx = nil
|
||||
}
|
||||
|
||||
// TryPushTimeSeries sends tss series to the configured remote write endpoint
|
||||
@@ -1012,24 +1035,38 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDropSamplesOnFailure bool) bool {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompb.TimeSeries
|
||||
acquiredTssFromPool := false
|
||||
defer func() {
|
||||
if rctx == nil {
|
||||
return
|
||||
if acquiredTssFromPool {
|
||||
*v = prompb.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
}
|
||||
if rctx != nil {
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
*v = prompb.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}()
|
||||
|
||||
if rwctx.mdxFilter != nil {
|
||||
acquiredTssFromPool = true
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = rwctx.mdxFilter.Filter(tss, *v)
|
||||
rowsCountAfterMdx := getRowsCount(tss)
|
||||
rwctx.rowsPreservedByMdx.Add(rowsCountAfterMdx)
|
||||
if len(tss) == 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Apply relabeling
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcs := rcs.perURL[rwctx.idx]
|
||||
if pcs.Len() > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/599
|
||||
rctx = getRelabelCtx()
|
||||
acquiredTssFromPool = true
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
rowsCountBeforeRelabel := getRowsCount(tss)
|
||||
@@ -1049,6 +1086,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
if rctx == nil {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before dropping aggregated series
|
||||
acquiredTssFromPool = true
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
@@ -1058,6 +1096,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
if rctx == nil {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before dropping aggregated series
|
||||
acquiredTssFromPool = true
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
|
||||
@@ -130,7 +130,7 @@
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "/^short_version$/",
|
||||
"fields": "/^version$/",
|
||||
"values": false
|
||||
},
|
||||
"showPercentChange": false,
|
||||
@@ -146,11 +146,10 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "vm_app_version{job=~\"$job\",instance=~\"$instance\"}",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "{{short_version}}",
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
|
||||
@@ -791,7 +791,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -789,7 +789,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -131,7 +131,7 @@
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "/^short_version$/",
|
||||
"fields": "/^version$/",
|
||||
"values": false
|
||||
},
|
||||
"showPercentChange": false,
|
||||
@@ -147,11 +147,10 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "vm_app_version{job=~\"$job\",instance=~\"$instance\"}",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "{{short_version}}",
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
|
||||
@@ -792,7 +792,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -790,7 +790,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -785,7 +785,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -566,7 +566,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -492,14 +492,14 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by (job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Version",
|
||||
"title": "",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -784,7 +784,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -565,7 +565,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
|
||||
@@ -491,14 +491,14 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by (job, short_version)",
|
||||
"expr": "sum by(job, version) (label_replace(vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version!=\"\"}, \"version\", \"$1\", \"short_version\", \"(.*)\") or vm_app_version{job=~\"$job\", instance=~\"$instance\", short_version=\"\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Version",
|
||||
"title": "",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -25,6 +25,7 @@ The sandbox cluster installation runs under the constant load generated by
|
||||
See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
|
||||
|
||||
## tip
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): introduce `-remoteWrite.mdx.enable` and `-mdx.instanceEntryTTL` command-line flags to support mdx service in `vmagent`, it allows `vmagent` to send only metrics from VictoriaMetrics services to the corresponding `-remoteWrite.url`. See [monitoring-data-exchange](https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange) and [#10600](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10600) for detail.
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.26.3 to Go1.26.4. See [the list of issues addressed in Go1.26.4](https://github.com/golang/go/issues?q=milestone%3AGo1.26.4%20label%3ACherryPickApproved).
|
||||
|
||||
@@ -32,16 +33,18 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/), [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `-opentelemetry.promoteAllResourceAttributes` and `-opentelemetry.promoteScopeMetadata` command-line flags to allow managing label promotion for resource attributes and OTel scope metadata. See [OpenTelemetry](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/) docs and [#10931](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10931).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) : introduce `vmagent_remotewrite_kafka_outbuf_latency_seconds` and `vmagent_remotewrite_kafka_rtt_seconds` metrics for [kafka integration](https://docs.victoriametrics.com/victoriametrics/integrations/kafka/). The metrics could help identify throughput bottlenecks. See [#10730](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10730).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): properly log user information when a missing route error occurs. See [#11052](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11052).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl/): add the ability to migrate data from Mimir object storage to VictoriaMetrics. See [#7717](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7717).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl/): add the ability to migrate data from [Mimir](https://docs.victoriametrics.com/victoriametrics/vmctl/mimir/#) object storage to VictoriaMetrics. See [#7717](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7717).
|
||||
* FEATURE: [dashboards](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/dashboards): show the full `version` label in the `Version` panel when `short_version` label is empty (e.g. custom builds from feature branch). Previously, the panel could appear empty. See [#11047](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11047).
|
||||
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): fix the `Notifiers` page in web UI appearing blank despite the API returning notifier data correctly. See [#11035](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11035).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): reset the group evaluation timestamp if it exceeds the current host time. Previously, vmalert could use future timestamps for evaluations if the system clock was shifted backward. See [#10985](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10985).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): properly parse [Prometheus Native Histograms](https://prometheus.io/docs/specs/native_histograms/), previously Protobuf parser could produce unexpected `vmrange` labels. See [#11041](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11041).
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): properly calculate number of loaded users to be printed in startup log. Previously, it was only accounting for static users and skipped JWT configuration entries.
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): properly calculate number of loaded users to be printed in startup log. Previously, it was only accounting for static users and skipped JWT configuration entries. See [#11050](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11050/).
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): `integrate()` no longer extrapolates the last sample's value past the end of the time series. Previously, querying `integrate(metric[1h])` at a timestamp where the series had already ended would keep accruing area as if the last value continued indefinitely, producing values much larger than the true integral. See [#9474](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9474). Thanks to @wtfashwin for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): avoid returning HTTP 503 for queries with partial results when a storage group is unavailable and `-search.denyPartialResponse` is disabled.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): avoid returning HTTP 503 for queries with partial results when a storage group is unavailable and `-search.denyPartialResponse` is disabled. See [#11009](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11009). Thanks to @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly escape `utf-8` label names for [/federate](https://docs.victoriametrics.com/victoriametrics/#federation) API requests. See [#10968](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10968).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): persist the `Disable deduplication` toggle under its own local storage key. Before this fix, the toggle state was lost after reload and could overwrite the `Compact view` table setting. See [#11004](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11004). Thanks to @immanuwell for the contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix intermittent `write: connection timed out` errors caused by silently dropped TCP connections being reused from the connection pool. See [#10735](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10735#issuecomment-4535832301).
|
||||
|
||||
## [v1.144.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.144.0)
|
||||
|
||||
|
||||
@@ -268,6 +268,31 @@ for the collected samples. Examples:
|
||||
```sh
|
||||
./vmagent -remoteWrite.url=http://remote-storage/api/v1/write -streamAggr.dropInputLabels=replica -streamAggr.dedupInterval=60s
|
||||
```
|
||||
|
||||
### Monitoring Data eXchange
|
||||
|
||||
The MDX (Monitoring Data eXchange) feature aims to send only metrics from the VictoriaMetrics services to the corresponding `-remoteWrite.url`, discarding metrics from non-VictoriaMetrics services.
|
||||
|
||||
To enable MDX, set `-remoteWrite.mdx.enable=true` for the target URL and `-remoteWrite.mdx.enable=false` for other URLs:
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-all-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=false \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true
|
||||
```
|
||||
When enabling MDX for the `-remoteWrite.url`, `vmagent` will only forward the metrics from the instances that emit `vm_app_version`, which is a metric that all VictoriaMetrics services will emit,
|
||||
or the metrics contain the label specified by `mdx.label`:
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true \
|
||||
-mdx.label="service=victoriametrics"
|
||||
```
|
||||
|
||||
The number of preserved rows from non-VictoriaMetrics services is exposed as `vmagent_remotewrite_mdx_rows_preserved_total`.
|
||||
|
||||
### Life of a sample
|
||||
|
||||
@@ -285,18 +310,20 @@ flowchart TB
|
||||
F --> G[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability">replicate</a> to each <b>-remoteWrite.url</b><br/>or <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages">shard</a> if <b>-remoteWrite.shardByURL</b> is set]
|
||||
|
||||
%% Left branch
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H2 --> H3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H3 --> H4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H4 --> H5[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange/">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H2 --> H3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H3 --> H4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H4 --> H5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H5 --> H6[[push to <b>-remoteWrite.url</b>]]
|
||||
|
||||
%% Right branch
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R2 --> R3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R3 --> R4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R4 --> R5[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R2 --> R3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R3 --> R4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R4 --> R5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R5 --> R6[[push to <b>-remoteWrite.url</b>]]
|
||||
```
|
||||
|
||||
Scraping has additional settings that can be applied before samples are pushed to the processing pipeline above:
|
||||
|
||||
159
lib/mdx/filter.go
Normal file
159
lib/mdx/filter.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
var (
|
||||
vmLabel = flag.String("mdx.label", "", "Optional label in the form 'name=value' to identify metrics from VictoriaMetrics. The metrics contain this label will be kept and sent to the `-remoteWrite.url` that configured with `-remoteWrite.mdx.enable=true`.")
|
||||
)
|
||||
|
||||
// Filter manages the list of VictoriaMetrics instances discovered from previous data flow, and uses it to filter out metrics that are not from VictoriaMetrics instances.
|
||||
type Filter struct {
|
||||
mu sync.RWMutex
|
||||
wg sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
vmInstance map[string]*atomic.Int64
|
||||
filterByLabel bool
|
||||
filterByCustomLabelName string
|
||||
filterByCustomLabelValue string
|
||||
}
|
||||
|
||||
func NewFilter() *Filter {
|
||||
filter := &Filter{
|
||||
vmInstance: make(map[string]*atomic.Int64),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
if len(*vmLabel) != 0 {
|
||||
n := strings.IndexByte(*vmLabel, '=')
|
||||
if n < 0 {
|
||||
logger.Fatalf("missing '=' in `-mdx.label`. It must contain label in the form `name=value`; got %q", *vmLabel)
|
||||
}
|
||||
filter.filterByCustomLabelName = (*vmLabel)[:n]
|
||||
filter.filterByCustomLabelValue = (*vmLabel)[n+1:]
|
||||
filter.filterByLabel = true
|
||||
}
|
||||
|
||||
filter.wg.Go(filter.cleanStale)
|
||||
return filter
|
||||
}
|
||||
|
||||
func (filter *Filter) VmInstancesCount() int {
|
||||
filter.mu.RLock()
|
||||
defer filter.mu.RUnlock()
|
||||
return len(filter.vmInstance)
|
||||
|
||||
}
|
||||
|
||||
func (filter *Filter) cleanStale() {
|
||||
entryTTL := time.Hour * 1
|
||||
ttlSec := int64(entryTTL.Seconds())
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
filter.mu.Lock()
|
||||
currTs := time.Now().Unix()
|
||||
|
||||
dst := make(map[string]*atomic.Int64, len(filter.vmInstance))
|
||||
for k, v := range filter.vmInstance {
|
||||
if currTs-v.Load() < ttlSec {
|
||||
dst[k] = v
|
||||
}
|
||||
}
|
||||
if len(dst) != len(filter.vmInstance) {
|
||||
filter.vmInstance = dst
|
||||
}
|
||||
filter.mu.Unlock()
|
||||
case <-filter.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (filter *Filter) MustStop() {
|
||||
if filter == nil {
|
||||
return
|
||||
}
|
||||
close(filter.stopCh)
|
||||
filter.wg.Wait()
|
||||
}
|
||||
|
||||
func (filter *Filter) Filter(tss []prompb.TimeSeries, resTss []prompb.TimeSeries) []prompb.TimeSeries {
|
||||
currTs := time.Now().Unix()
|
||||
var identicalKey []byte
|
||||
|
||||
nextTss:
|
||||
for _, ts := range tss {
|
||||
var hasVersionLabel, triedJobInstance bool
|
||||
var job, instance string
|
||||
for _, label := range ts.Labels {
|
||||
if filter.filterByLabel && label.Name == filter.filterByCustomLabelName && label.Value == filter.filterByCustomLabelValue {
|
||||
resTss = append(resTss, ts)
|
||||
continue nextTss
|
||||
}
|
||||
|
||||
if label.Name == "__name__" && label.Value == "vm_app_version" {
|
||||
hasVersionLabel = true
|
||||
}
|
||||
if instance == "" && label.Name == "instance" {
|
||||
if label.Value == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
instance = label.Value
|
||||
}
|
||||
if job == "" && label.Name == "job" {
|
||||
if label.Value == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
job = label.Value
|
||||
}
|
||||
if !triedJobInstance && job != "" && instance != "" {
|
||||
identicalKey = identicalKey[:0]
|
||||
identicalKey = strconv.AppendQuote(identicalKey, job)
|
||||
identicalKey = append(identicalKey, ':')
|
||||
identicalKey = strconv.AppendQuote(identicalKey, instance)
|
||||
filter.mu.RLock()
|
||||
ptr, found := filter.vmInstance[bytesutil.ToUnsafeString(identicalKey)]
|
||||
filter.mu.RUnlock()
|
||||
if found {
|
||||
ptr.Store(currTs)
|
||||
resTss = append(resTss, ts)
|
||||
continue nextTss
|
||||
}
|
||||
triedJobInstance = true
|
||||
}
|
||||
|
||||
if hasVersionLabel && job != "" && instance != "" {
|
||||
identicalKey = identicalKey[:0]
|
||||
identicalKey = strconv.AppendQuote(identicalKey, job)
|
||||
identicalKey = append(identicalKey, ':')
|
||||
identicalKey = strconv.AppendQuote(identicalKey, instance)
|
||||
|
||||
v := &atomic.Int64{}
|
||||
v.Store(currTs)
|
||||
|
||||
filter.mu.Lock()
|
||||
filter.vmInstance[string(identicalKey)] = v
|
||||
filter.mu.Unlock()
|
||||
resTss = append(resTss, ts)
|
||||
continue nextTss
|
||||
}
|
||||
}
|
||||
}
|
||||
return resTss
|
||||
}
|
||||
216
lib/mdx/filter_test.go
Normal file
216
lib/mdx/filter_test.go
Normal file
@@ -0,0 +1,216 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
func timeSeriessToString(tss []prompb.TimeSeries) string {
|
||||
a := make([]string, len(tss))
|
||||
for i, ts := range tss {
|
||||
a[i] = timeSeriesToString(ts)
|
||||
}
|
||||
sort.Strings(a)
|
||||
return strings.Join(a, "")
|
||||
}
|
||||
|
||||
func timeSeriesToString(ts prompb.TimeSeries) string {
|
||||
labelsString := promrelabel.LabelsToString(ts.Labels)
|
||||
|
||||
return fmt.Sprintf("%s\n", labelsString)
|
||||
}
|
||||
|
||||
func TestMdxInstanceFilter(t *testing.T) {
|
||||
|
||||
filter := NewFilter()
|
||||
|
||||
f := func(input []prompb.TimeSeries, expectedOutput []prompb.TimeSeries, expectedInstanceMap map[string]int64) {
|
||||
t.Helper()
|
||||
output := filter.Filter(input, []prompb.TimeSeries{})
|
||||
if len(output) != len(expectedOutput) {
|
||||
t.Fatalf("unexpected output length; got %d; want %d", len(output), len(expectedOutput))
|
||||
}
|
||||
if timeSeriessToString(output) != timeSeriessToString(expectedOutput) {
|
||||
t.Fatalf("unexpected output; got %s; want %s", timeSeriessToString(output), timeSeriessToString(expectedOutput))
|
||||
}
|
||||
if len(filter.vmInstance) != len(expectedInstanceMap) {
|
||||
t.Fatalf("unexpected instance map length; got %d; want %d", len(filter.vmInstance), len(expectedInstanceMap))
|
||||
}
|
||||
for k := range expectedInstanceMap {
|
||||
if filter.vmInstance[k] == nil {
|
||||
t.Fatalf("missing instance in filter.vmInstance: %q", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
f([]prompb.TimeSeries{{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
}, map[string]int64{
|
||||
fmt.Sprintf("%q:%q", "test", "victoria-metrics1:8428"): 0,
|
||||
fmt.Sprintf("%q:%q", "test", "vmagent1:8429"): 0,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestMdxFilterByLabel(t *testing.T) {
|
||||
*vmLabel = "service=victoriametrics"
|
||||
filter := NewFilter()
|
||||
f := func(input []prompb.TimeSeries, expectedOutput []prompb.TimeSeries) {
|
||||
t.Helper()
|
||||
output := filter.Filter(input, []prompb.TimeSeries{})
|
||||
if len(output) != len(expectedOutput) {
|
||||
t.Fatalf("unexpected output length; got %d; want %d", len(output), len(expectedOutput))
|
||||
}
|
||||
if timeSeriessToString(output) != timeSeriessToString(expectedOutput) {
|
||||
t.Fatalf("unexpected output; got %s; want %s", timeSeriessToString(output), timeSeriessToString(expectedOutput))
|
||||
}
|
||||
}
|
||||
f([]prompb.TimeSeries{{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "up"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "up"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
})
|
||||
*vmLabel = ""
|
||||
}
|
||||
|
||||
func TestMdxInstanceCleanup(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
filter := NewFilter()
|
||||
|
||||
// init instance list
|
||||
filter.Filter([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}}, []prompb.TimeSeries{},
|
||||
)
|
||||
f := func(expectedInstanceMap map[string]int64) {
|
||||
t.Helper()
|
||||
if len(filter.vmInstance) != len(expectedInstanceMap) {
|
||||
t.Fatalf("unexpected instance map length; got %d; want %d", len(filter.vmInstance), len(expectedInstanceMap))
|
||||
}
|
||||
for k := range expectedInstanceMap {
|
||||
if filter.vmInstance[k] == nil {
|
||||
t.Fatalf("missing instance in filter.vmInstance: %q", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
f(map[string]int64{
|
||||
fmt.Sprintf("%q:%q", "test", "victoria-metrics1:8428"): 0,
|
||||
fmt.Sprintf("%q:%q", "test", "vmagent1:8429"): 0,
|
||||
})
|
||||
|
||||
// receive samples from victoria-metrics1:8428 after 9 seconds.
|
||||
time.Sleep(59 * time.Minute)
|
||||
filter.Filter([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}}, []prompb.TimeSeries{},
|
||||
)
|
||||
|
||||
// no samples from vmagent1:8429 in the last 10 seconds, so it should be removed from the mdx instance list.
|
||||
time.Sleep(2 * time.Minute)
|
||||
f(map[string]int64{
|
||||
fmt.Sprintf("%q:%q", "test", "victoria-metrics1:8428"): 0,
|
||||
})
|
||||
filter.MustStop()
|
||||
})
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user