mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-07-01 14:44:35 +03:00
Compare commits
67 Commits
vmestimato
...
testonly-m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd48160bc3 | ||
|
|
c82127b6d4 | ||
|
|
06bc808ddc | ||
|
|
a6927c46be | ||
|
|
15a4c31e87 | ||
|
|
54f9cd6edd | ||
|
|
2e16874e95 | ||
|
|
81d330f297 | ||
|
|
3278ddd170 | ||
|
|
cc790c2ea1 | ||
|
|
950f38fd6a | ||
|
|
ab9db9152f | ||
|
|
5b89f52c72 | ||
|
|
3608ab5b4c | ||
|
|
5ee1fa70c1 | ||
|
|
1b0e843e8f | ||
|
|
679646a3b3 | ||
|
|
bb3c038e2f | ||
|
|
8f32b6648f | ||
|
|
df5f11623f | ||
|
|
6c8a41f5ed | ||
|
|
e749a6ce8d | ||
|
|
615176ad55 | ||
|
|
3aec167f00 | ||
|
|
6f633e5654 | ||
|
|
50a827256a | ||
|
|
e30e8be1f4 | ||
|
|
24ac567a9f | ||
|
|
dce8193c16 | ||
|
|
e196479fb2 | ||
|
|
1c774564a2 | ||
|
|
e1c554d4a6 | ||
|
|
3419328f1c | ||
|
|
e841e45877 | ||
|
|
d53d8849e7 | ||
|
|
d3641394d9 | ||
|
|
53a8f4bd47 | ||
|
|
2b256952c9 | ||
|
|
12086e75de | ||
|
|
d426575622 | ||
|
|
a76b1ce0e3 | ||
|
|
5f49fb7f31 | ||
|
|
80d1104fca | ||
|
|
ae59c2624c | ||
|
|
4661f69d9f | ||
|
|
4d9901fbf4 | ||
|
|
9356c2111a | ||
|
|
45f0b87150 | ||
|
|
8480f6b43e | ||
|
|
61668f0672 | ||
|
|
d1ebbf573c | ||
|
|
16422b2d14 | ||
|
|
0f1ca87611 | ||
|
|
0dd2b2cee6 | ||
|
|
7caec5fcb4 | ||
|
|
612f8ac8d6 | ||
|
|
6aa31a09d7 | ||
|
|
b6e6a50e29 | ||
|
|
a6d48b6af3 | ||
|
|
dc4cf5631b | ||
|
|
005f133146 | ||
|
|
35fc595e6f | ||
|
|
710c920d60 | ||
|
|
0ceeb14076 | ||
|
|
adc29732f9 | ||
|
|
41ffe23b18 | ||
|
|
6229a8fe7d |
@@ -35,6 +35,9 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabels []prompb.Label) error {
|
||||
if len(extraLabels) == 0 && !prommetadata.IsEnabled() && at == nil {
|
||||
return insertRowsFast(at, timeseries)
|
||||
}
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
@@ -102,3 +105,17 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
|
||||
func insertRowsFast(at *auth.Token, timeseries []prompb.TimeSeries) error {
|
||||
rowsTotal := 0
|
||||
for i := range timeseries {
|
||||
rowsTotal += len(timeseries[i].Samples)
|
||||
}
|
||||
wr := &prompb.WriteRequest{Timeseries: timeseries}
|
||||
if !remotewrite.TryPush(at, wr) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -187,7 +187,7 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
func (c *client) init(argIdx int, sanitizedURL string) {
|
||||
limitReached := metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
if bytesPerSec := rateLimit.GetOptionalArg(argIdx); bytesPerSec > 0 {
|
||||
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
|
||||
@@ -204,11 +204,20 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL))
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(concurrency)
|
||||
})
|
||||
for range concurrency {
|
||||
c.wg.Go(c.runWorker)
|
||||
workers := queues.GetOptionalArg(argIdx)
|
||||
if workers <= 0 {
|
||||
workers = 1
|
||||
}
|
||||
inmemoryWorkers := inmemoryQueues.GetOptionalArg(argIdx)
|
||||
for range inmemoryWorkers {
|
||||
c.wg.Go(func() {
|
||||
c.runWorker(c.fq.MustReadInMemoryBlockBlocking)
|
||||
})
|
||||
}
|
||||
for range workers {
|
||||
c.wg.Go(func() {
|
||||
c.runWorker(c.fq.MustReadBlock)
|
||||
})
|
||||
}
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
@@ -302,12 +311,12 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func (c *client) runWorker() {
|
||||
func (c *client) runWorker(readBlock func(dst []byte) ([]byte, bool)) {
|
||||
var ok bool
|
||||
var block []byte
|
||||
ch := make(chan bool, 1)
|
||||
for {
|
||||
block, ok = c.fq.MustReadBlock(block[:0])
|
||||
block, ok = readBlock(block[:0])
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -12,19 +12,18 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mdx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
@@ -66,6 +65,9 @@ var (
|
||||
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||
inmemoryQueues = flagutil.NewArrayInt("remoteWrite.inmemoryQueues", 0, "The number of additional workers per each -remoteWrite.url, which send only recently ingested data from the in-memory queue, "+
|
||||
"while the file-based queue at -remoteWrite.tmpDataPath is drained by workers configured via -remoteWrite.queues. "+
|
||||
"This reduces delivery lag for fresh samples when the file-based queue contains a backlog accumulated during remote storage outages.")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
@@ -103,6 +105,9 @@ var (
|
||||
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
||||
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
|
||||
"By default, metadata sending is controlled by the global -enableMetadata flag")
|
||||
|
||||
enableMdx = flagutil.NewArrayBool("remoteWrite.mdx.enable", "Whether to only retain metrics from VictoriaMetrics services before sending them to the corresponding -remoteWrite.url. "+
|
||||
"Please see https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -159,8 +164,8 @@ func InitSecretFlags() {
|
||||
}
|
||||
|
||||
var (
|
||||
shardByURLLabelsMap map[string]struct{}
|
||||
shardByURLIgnoreLabelsMap map[string]struct{}
|
||||
shardByURLLabelsFilter []string
|
||||
shardByURLIgnoreLabelsFilter []string
|
||||
)
|
||||
|
||||
// Init initializes remotewrite.
|
||||
@@ -207,8 +212,8 @@ func Init() {
|
||||
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
||||
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
|
||||
}
|
||||
shardByURLLabelsMap = newMapFromStrings(*shardByURLLabels)
|
||||
shardByURLIgnoreLabelsMap = newMapFromStrings(*shardByURLIgnoreLabels)
|
||||
shardByURLLabelsFilter = slices.Clone(*shardByURLLabels)
|
||||
shardByURLIgnoreLabelsFilter = slices.Clone(*shardByURLIgnoreLabels)
|
||||
|
||||
initLabelsGlobal()
|
||||
|
||||
@@ -304,6 +309,10 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
}
|
||||
fs.RegisterPathFsMetrics(*tmpDataPath)
|
||||
|
||||
if slices.Contains(*enableMdx, true) && *shardByURL {
|
||||
logger.Fatalf("-remoteWrite.mdx.enable and -remoteWrite.shardByURL cannot be set to true simultaneously.")
|
||||
}
|
||||
|
||||
if *shardByURL {
|
||||
consistentHashNodes := make([]string, 0, len(urls))
|
||||
for i, url := range urls {
|
||||
@@ -695,18 +704,18 @@ func shardAmountRemoteWriteCtx(tssBlock []prompb.TimeSeries, shards [][]prompb.T
|
||||
|
||||
for _, ts := range tssBlock {
|
||||
hashLabels := ts.Labels
|
||||
if len(shardByURLLabelsMap) > 0 {
|
||||
if len(shardByURLLabelsFilter) > 0 {
|
||||
hashLabels = tmpLabels.Labels[:0]
|
||||
for _, label := range ts.Labels {
|
||||
if _, ok := shardByURLLabelsMap[label.Name]; ok {
|
||||
if slices.Contains(shardByURLLabelsFilter, label.Name) {
|
||||
hashLabels = append(hashLabels, label)
|
||||
}
|
||||
}
|
||||
tmpLabels.Labels = hashLabels
|
||||
} else if len(shardByURLIgnoreLabelsMap) > 0 {
|
||||
} else if len(shardByURLIgnoreLabelsFilter) > 0 {
|
||||
hashLabels = tmpLabels.Labels[:0]
|
||||
for _, label := range ts.Labels {
|
||||
if _, ok := shardByURLIgnoreLabelsMap[label.Name]; !ok {
|
||||
if !slices.Contains(shardByURLIgnoreLabelsFilter, label.Name) {
|
||||
hashLabels = append(hashLabels, label)
|
||||
}
|
||||
}
|
||||
@@ -807,34 +816,26 @@ var (
|
||||
// it omits the '=' separator between label name and value for backward compatibility.
|
||||
// Changing it would re-shard all series across remoteWrite targets.
|
||||
func getLabelsHashForShard(labels []prompb.Label) uint64 {
|
||||
bb := labelsHashBufPool.Get()
|
||||
b := bb.B[:0]
|
||||
var d xxhash.Digest
|
||||
d.Reset()
|
||||
for _, label := range labels {
|
||||
b = append(b, label.Name...)
|
||||
b = append(b, label.Value...)
|
||||
_, _ = d.WriteString(label.Name)
|
||||
_, _ = d.WriteString(label.Value)
|
||||
}
|
||||
h := xxhash.Sum64(b)
|
||||
bb.B = b
|
||||
labelsHashBufPool.Put(bb)
|
||||
return h
|
||||
return d.Sum64()
|
||||
}
|
||||
|
||||
func getLabelsHash(labels []prompb.Label) uint64 {
|
||||
bb := labelsHashBufPool.Get()
|
||||
b := bb.B[:0]
|
||||
var d xxhash.Digest
|
||||
d.Reset()
|
||||
for _, label := range labels {
|
||||
b = append(b, label.Name...)
|
||||
b = append(b, '=')
|
||||
b = append(b, label.Value...)
|
||||
_, _ = d.WriteString(label.Name)
|
||||
_, _ = d.WriteString("=")
|
||||
_, _ = d.WriteString(label.Value)
|
||||
}
|
||||
h := xxhash.Sum64(b)
|
||||
bb.B = b
|
||||
labelsHashBufPool.Put(bb)
|
||||
return h
|
||||
return d.Sum64()
|
||||
}
|
||||
|
||||
var labelsHashBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func logSkippedSeries(labels []prompb.Label, flagName string, flagValue int) {
|
||||
select {
|
||||
case <-logSkippedSeriesTicker.C:
|
||||
@@ -859,6 +860,7 @@ type remoteWriteCtx struct {
|
||||
|
||||
sas atomic.Pointer[streamaggr.Aggregators]
|
||||
deduplicator *streamaggr.Deduplicator
|
||||
mdxFilter *mdx.Filter
|
||||
|
||||
streamAggrKeepInput bool
|
||||
streamAggrDropInput bool
|
||||
@@ -873,6 +875,7 @@ type remoteWriteCtx struct {
|
||||
|
||||
rowsPushedAfterRelabel *metrics.Counter
|
||||
rowsDroppedByRelabel *metrics.Counter
|
||||
mdxRowsPreserved *metrics.Counter
|
||||
|
||||
pushFailures *metrics.Counter
|
||||
metadataDroppedOnPushFailure *metrics.Counter
|
||||
@@ -906,7 +909,8 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
|
||||
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
||||
queuesSize := queues.GetOptionalArg(argIdx)
|
||||
inmemoryQueueSize := inmemoryQueues.GetOptionalArg(argIdx)
|
||||
queuesSize := queues.GetOptionalArg(argIdx) + inmemoryQueueSize
|
||||
if queuesSize > maxQueues {
|
||||
queuesSize = maxQueues
|
||||
} else if queuesSize <= 0 {
|
||||
@@ -923,7 +927,13 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
||||
fqOpts := persistentqueue.OpenFastQueueOpts{
|
||||
MaxInmemoryBlocks: maxInmemoryBlocks,
|
||||
MaxPendingBytes: maxPendingBytes,
|
||||
IsPQDisabled: isPQDisabled,
|
||||
PrioritizeInmemoryData: inmemoryQueueSize > 0,
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueueWithOpts(queuePath, sanitizedURL, fqOpts)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
@@ -936,6 +946,9 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
return 0
|
||||
})
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, sanitizedURL), func() float64 {
|
||||
return float64(queuesSize)
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
@@ -944,7 +957,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||
}
|
||||
c.init(argIdx, queuesSize, sanitizedURL)
|
||||
c.init(argIdx, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArg(argIdx)
|
||||
@@ -959,7 +972,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq, &c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
@@ -976,6 +988,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
rwctx.initStreamAggrConfig()
|
||||
|
||||
if enableMdx.GetOptionalArg(argIdx) {
|
||||
mdxFilter := mdx.NewFilter()
|
||||
rwctx.mdxFilter = mdxFilter
|
||||
rwctx.mdxRowsPreserved = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_mdx_rows_preserved_total{path=%q,url=%q}`, queuePath, sanitizedURL))
|
||||
_ = metrics.NewGauge(fmt.Sprintf(`vmagent_remotewrite_mdx_tracked_instances{path=%q,url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(mdxFilter.VMInstancesCount())
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
@@ -989,6 +1011,11 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.deduplicator.MustStop()
|
||||
rwctx.deduplicator = nil
|
||||
}
|
||||
if rwctx.mdxFilter != nil {
|
||||
rwctx.mdxFilter.MustStop()
|
||||
rwctx.mdxFilter = nil
|
||||
rwctx.mdxRowsPreserved = nil
|
||||
}
|
||||
|
||||
for _, ps := range rwctx.pss {
|
||||
ps.MustStop()
|
||||
@@ -1004,6 +1031,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
|
||||
rwctx.rowsPushedAfterRelabel = nil
|
||||
rwctx.rowsDroppedByRelabel = nil
|
||||
|
||||
}
|
||||
|
||||
// TryPushTimeSeries sends tss series to the configured remote write endpoint
|
||||
@@ -1011,16 +1039,41 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
// TryPushTimeSeries doesn't modify tss, so tss can be passed concurrently to TryPush across distinct rwctx instances.
|
||||
func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDropSamplesOnFailure bool) bool {
|
||||
var rctx *relabelCtx
|
||||
var mctx *mdx.Ctx
|
||||
var v *[]prompb.TimeSeries
|
||||
defer func() {
|
||||
if rctx == nil {
|
||||
return
|
||||
if v != nil {
|
||||
*v = prompb.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
}
|
||||
if rctx != nil {
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
if mctx != nil {
|
||||
mdx.PutContext(mctx)
|
||||
}
|
||||
*v = prompb.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}()
|
||||
|
||||
copyTimeSeriesIfNeeded := func() {
|
||||
if v == nil {
|
||||
v := tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
}
|
||||
|
||||
if rwctx.mdxFilter != nil {
|
||||
mctx = mdx.GetContext()
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.mdx configs.
|
||||
copyTimeSeriesIfNeeded()
|
||||
tss = rwctx.mdxFilter.Filter(mctx, tss)
|
||||
if len(tss) == 0 {
|
||||
return true
|
||||
}
|
||||
rowsCount := getRowsCount(tss)
|
||||
rwctx.mdxRowsPreserved.Add(rowsCount)
|
||||
}
|
||||
|
||||
// Apply relabeling
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcs := rcs.perURL[rwctx.idx]
|
||||
@@ -1030,8 +1083,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/599
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
copyTimeSeriesIfNeeded()
|
||||
rowsCountBeforeRelabel := getRowsCount(tss)
|
||||
tss = rctx.applyRelabeling(tss, pcs)
|
||||
rowsCountAfterRelabel := getRowsCount(tss)
|
||||
@@ -1049,8 +1101,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
if rctx == nil {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before dropping aggregated series
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
copyTimeSeriesIfNeeded()
|
||||
}
|
||||
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
|
||||
} else if rwctx.streamAggrDropInput {
|
||||
@@ -1058,8 +1109,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
if rctx == nil {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before dropping aggregated series
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
copyTimeSeriesIfNeeded()
|
||||
}
|
||||
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
|
||||
}
|
||||
@@ -1178,15 +1228,6 @@ func getRowsCount(tss []prompb.TimeSeries) int {
|
||||
}
|
||||
return rowsCount
|
||||
}
|
||||
|
||||
func newMapFromStrings(a []string) map[string]struct{} {
|
||||
m := make(map[string]struct{}, len(a))
|
||||
for _, s := range a {
|
||||
m[s] = struct{}{}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func getMaxHourlySeries() int {
|
||||
limit := *maxHourlySeries
|
||||
if limit == -1 || limit > math.MaxInt32 {
|
||||
|
||||
@@ -145,10 +145,10 @@ func TestRuleValidate(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGroupValidate_Failure(t *testing.T) {
|
||||
f := func(group *Group, validateExpressions bool, errStrExpected string) {
|
||||
f := func(data []byte, validateExpressions bool, errStrExpected string) {
|
||||
t.Helper()
|
||||
|
||||
err := group.Validate(nil, validateExpressions)
|
||||
_, err := parse(map[string][]byte{"test.yaml": data}, nil, validateExpressions)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
@@ -158,275 +158,238 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
f(&Group{}, false, "group name must be set")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: ""
|
||||
`), false, "group name must be set")
|
||||
|
||||
f(&Group{
|
||||
Name: "both record and alert are not set",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
{
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
},
|
||||
},
|
||||
}, false, "invalid rule")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: both record and alert are not set
|
||||
rules:
|
||||
- expr: "sum(up == 0 ) by (host)"
|
||||
for: 10ms
|
||||
- expr: "sumSeries(time('foo.bar',10))"
|
||||
`), false, "invalid rule")
|
||||
|
||||
f(&Group{
|
||||
Name: "negative interval",
|
||||
Interval: promutil.NewDuration(-1),
|
||||
}, false, "interval shouldn't be lower than 0")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: negative interval
|
||||
interval: -1ms
|
||||
`), false, "interval shouldn't be lower than 0")
|
||||
|
||||
f(&Group{
|
||||
Name: "too big eval_offset",
|
||||
Interval: promutil.NewDuration(time.Minute),
|
||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: too big eval_offset
|
||||
interval: 1m
|
||||
eval_offset: 2m
|
||||
`), false, "eval_offset should be smaller than interval")
|
||||
|
||||
f(&Group{
|
||||
Name: "too big negative eval_offset",
|
||||
Interval: promutil.NewDuration(time.Minute),
|
||||
EvalOffset: promutil.NewDuration(-2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: too big negative eval_offset
|
||||
interval: 1m
|
||||
eval_offset: -2m
|
||||
`), false, "eval_offset should be smaller than interval")
|
||||
|
||||
limit := -1
|
||||
f(&Group{
|
||||
Name: "wrong limit",
|
||||
Limit: &limit,
|
||||
}, false, "invalid limit")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: wrong limit
|
||||
limit: -1
|
||||
`), false, "invalid limit")
|
||||
|
||||
f(&Group{
|
||||
Name: "wrong concurrency",
|
||||
Concurrency: -1,
|
||||
}, false, "invalid concurrency")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: wrong concurrency
|
||||
concurrency: -1
|
||||
`), false, "invalid concurrency")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
},
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
`), false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
labels:
|
||||
summary: "{{ value|query }}"
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
labels:
|
||||
summary: "{{ value|query }}"
|
||||
`), false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "record", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Record: "record", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test
|
||||
rules:
|
||||
- record: record
|
||||
expr: up == 1
|
||||
labels:
|
||||
summary: "{{ value|query }}"
|
||||
- record: record
|
||||
expr: up == 1
|
||||
labels:
|
||||
summary: "{{ value|query }}"
|
||||
`), false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{Record: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, "duplicate")
|
||||
|
||||
f(&Group{
|
||||
Name: "test thanos",
|
||||
Type: NewRawType("thanos"),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, true, "unknown datasource type")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test thanos
|
||||
type: thanos
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
labels:
|
||||
description: "{{ value|query }}"
|
||||
`), true, "unknown datasource type")
|
||||
|
||||
// validate expressions
|
||||
f(&Group{
|
||||
Name: "test prometheus expr",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
}, true, "bad MetricsQL expr")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test prometheus expr
|
||||
type: prometheus
|
||||
rules:
|
||||
- record: record
|
||||
expr: "up | 0"
|
||||
`), true, "bad MetricsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test graphite expr",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "some-description",
|
||||
}},
|
||||
},
|
||||
}, true, "bad GraphiteQL expr")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test graphite expr
|
||||
type: graphite
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
labels:
|
||||
description: some-description
|
||||
`), true, "bad GraphiteQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs expr",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "stats count(*) as requests"},
|
||||
},
|
||||
}, true, "bad LogsQL expr")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test vlogs expr
|
||||
type: vlogs
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: "stats count(*) as requests"
|
||||
`), true, "bad LogsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs expr",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "_time: 1m | stats by (path, _time: 1m) count(*) as requests"},
|
||||
},
|
||||
}, true, "bad LogsQL expr")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test vlogs expr multipart
|
||||
type: vlogs
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: "_time: 1m | stats by (path, _time: 1m) count(*) as requests"
|
||||
`), true, "bad LogsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test graphite with prometheus expr",
|
||||
Type: NewGraphiteType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "r1",
|
||||
ID: 1,
|
||||
Expr: "sumSeries(time('foo.bar',10))",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
{
|
||||
Record: "r2",
|
||||
ID: 2,
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
},
|
||||
},
|
||||
}, true, "bad GraphiteQL expr")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test graphite with prometheus expr
|
||||
type: graphite
|
||||
rules:
|
||||
- record: r1
|
||||
expr: "sumSeries(time('foo.bar',10))"
|
||||
for: 10ms
|
||||
- record: r2
|
||||
expr: "sum(up == 0 ) by (host)"
|
||||
`), true, "bad GraphiteQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs with prometheus exp",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "r1",
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
},
|
||||
}, true, "bad LogsQL expr")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test vlogs with prometheus expr
|
||||
type: vlogs
|
||||
rules:
|
||||
- record: r1
|
||||
expr: "sum(up == 0 ) by (host)"
|
||||
for: 10ms
|
||||
`), true, "bad LogsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test prometheus with vlogs exp",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "r1",
|
||||
Expr: "* | stats by (path) count()",
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
},
|
||||
}, true, "bad MetricsQL expr")
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test prometheus with vlogs expr
|
||||
type: prometheus
|
||||
rules:
|
||||
- record: r1
|
||||
expr: "* | stats by (path) count()"
|
||||
for: 10ms
|
||||
`), true, "bad MetricsQL expr")
|
||||
}
|
||||
|
||||
func TestGroupValidate_Success(t *testing.T) {
|
||||
f := func(group *Group, validateAnnotations, validateExpressions bool) {
|
||||
f := func(data []byte, validateAnnotations, validateExpressions bool) {
|
||||
t.Helper()
|
||||
|
||||
var validateTplFn ValidateTplFn
|
||||
if validateAnnotations {
|
||||
validateTplFn = notifier.ValidateTemplates
|
||||
}
|
||||
err := group.Validate(validateTplFn, validateExpressions)
|
||||
_, err := parse(map[string][]byte{"test.yaml": data}, validateTplFn, validateExpressions)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Record: "record",
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
}, false, false)
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test
|
||||
rules:
|
||||
- record: record
|
||||
expr: "up | 0"
|
||||
`), false, false)
|
||||
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
},
|
||||
},
|
||||
},
|
||||
}, false, false)
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
labels:
|
||||
summary: "{{ value|query }}"
|
||||
`), false, false)
|
||||
|
||||
// validate annotations
|
||||
f(&Group{
|
||||
Name: "test",
|
||||
Rules: []Rule{
|
||||
{
|
||||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": `
|
||||
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" "localhost" | query }}
|
||||
{{ . | first | value | humanize1024 }}B
|
||||
{{ end }}`,
|
||||
},
|
||||
},
|
||||
},
|
||||
}, true, false)
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
labels:
|
||||
summary: "\n{{ with printf \"node_memory_MemTotal{job='node',instance='%s'}\" \"localhost\" | query }}\n {{ . | first | value | humanize1024 }}B\n{{ end }}"
|
||||
`), true, false)
|
||||
|
||||
// validate expressions
|
||||
f(&Group{
|
||||
Name: "test prometheus",
|
||||
Type: NewPrometheusType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, true)
|
||||
f(&Group{
|
||||
Name: "test victorialogs",
|
||||
Type: NewVLogsType(),
|
||||
Rules: []Rule{
|
||||
{Alert: "alert", Expr: " _time: 1m | stats count(*) as requests", Labels: map[string]string{
|
||||
"description": "{{ value|query }}",
|
||||
}},
|
||||
},
|
||||
}, false, true)
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test prometheus
|
||||
type: prometheus
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: up == 1
|
||||
labels:
|
||||
description: "{{ value|query }}"
|
||||
`), false, true)
|
||||
|
||||
f([]byte(`
|
||||
groups:
|
||||
- name: test victorialogs
|
||||
type: vlogs
|
||||
rules:
|
||||
- alert: alert
|
||||
expr: " _time: 1m | stats count(*) as requests"
|
||||
labels:
|
||||
description: "{{ value|query }}"
|
||||
`), false, true)
|
||||
}
|
||||
|
||||
func TestHashRule_NotEqual(t *testing.T) {
|
||||
|
||||
@@ -457,12 +457,10 @@ func TestSetIntervalAsTimeFilter(t *testing.T) {
|
||||
f(`* | count()`, "vlogs", true)
|
||||
f(`error OR _time:5m | count()`, "vlogs", true)
|
||||
f(`(_time: 5m AND error) OR (_time: 5m AND warn) | count()`, "vlogs", true)
|
||||
f(`* | error OR _time:5m | count()`, "vlogs", true)
|
||||
|
||||
f(`_time:5m | count()`, "vlogs", false)
|
||||
f(`_time:2023-04-25T22:45:59Z | count()`, "vlogs", false)
|
||||
f(`error AND _time:5m | count()`, "vlogs", false)
|
||||
f(`* | error AND _time:5m | count()`, "vlogs", false)
|
||||
}
|
||||
|
||||
func TestRecordingRuleExec_Partial(t *testing.T) {
|
||||
|
||||
@@ -140,6 +140,18 @@ users:
|
||||
- "ProjectID: {{.MetricsProjectID}}"
|
||||
url_prefix: "http://vminsert:8480/insert/prometheus"
|
||||
|
||||
# JWT-based routing that relies solely on custom claims.
|
||||
# The `vm_access` claim is missing, default value will be used.
|
||||
# e.g. {"role": "admin"}.
|
||||
- name: jwt-custom-claims
|
||||
jwt:
|
||||
skip_verify: true
|
||||
vm_default_access_claim:
|
||||
metrics_account_id: 1
|
||||
match_claims:
|
||||
role: admin
|
||||
url_prefix: "http://vmselect-admin:8481/select/0/prometheus"
|
||||
|
||||
# Requests without Authorization header are proxied according to `unauthorized_user` section.
|
||||
# Requests are proxied in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the proxied requests.
|
||||
|
||||
@@ -65,6 +65,8 @@ type JWTConfig struct {
|
||||
MatchClaims map[string]string `yaml:"match_claims,omitempty"`
|
||||
parsedMatchClaims []*jwt.Claim
|
||||
|
||||
DefaultVMAccessClaim *jwt.VMAccessClaim `yaml:"default_vm_access_claim,omitempty"`
|
||||
|
||||
// verifierPool is used to verify JWT tokens.
|
||||
// It is initialized from PublicKeys and/or PublicKeyFiles.
|
||||
// In this case, it is initialized once at config reload and never updated until next reload
|
||||
@@ -432,7 +434,6 @@ func validateJWTPlaceholdersForURL(up *URLPrefix, isAllowed bool) error {
|
||||
}
|
||||
if strings.Contains(p, placeholderPrefix) {
|
||||
return fmt.Errorf("invalid placeholder found in URL request path: %q, supported values are: %s", bu.Path, strings.Join(allPlaceholders, ", "))
|
||||
|
||||
}
|
||||
}
|
||||
for param, values := range bu.Query() {
|
||||
@@ -487,7 +488,6 @@ func hasAnyPlaceholders(u *url.URL) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -190,6 +190,10 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if tkn == nil {
|
||||
logger.Panicf("BUG: unexpected nil jwt token for user %q", ui.name())
|
||||
}
|
||||
if !tkn.HasVMAccessClaim() && ui.JWT.DefaultVMAccessClaim == nil {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
return true
|
||||
}
|
||||
defer putToken(tkn)
|
||||
processUserRequest(w, r, ui, tkn)
|
||||
return true
|
||||
@@ -424,8 +428,12 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *j
|
||||
}
|
||||
targetURL := bu.url
|
||||
if tkn != nil {
|
||||
vmac := tkn.VMAccess()
|
||||
if !tkn.HasVMAccessClaim() {
|
||||
vmac = ui.JWT.DefaultVMAccessClaim
|
||||
}
|
||||
// for security reasons allow templating only for configured url values and headers
|
||||
targetURL, hc = replaceJWTPlaceholders(bu, hc, tkn.VMAccess())
|
||||
targetURL, hc = replaceJWTPlaceholders(bu, hc, vmac)
|
||||
}
|
||||
if isDefault {
|
||||
// Don't change path and add request_path query param for default route.
|
||||
|
||||
@@ -739,6 +739,12 @@ users:
|
||||
"vm_access": map[string]any{},
|
||||
}, false)
|
||||
|
||||
// token without vm_access claim, but with a custom claim usable for routing
|
||||
roleToken := genToken(t, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"role": "admin",
|
||||
}, true)
|
||||
|
||||
fullToken := genToken(t, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"vm_access": map[string]any{
|
||||
@@ -779,6 +785,26 @@ statusCode=401
|
||||
Unauthorized`
|
||||
f(simpleCfgStr, request, responseExpected)
|
||||
|
||||
// token without vm_access claim is accepted when it
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+roleToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /foo/abc
|
||||
query:
|
||||
headers:`
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
default_vm_access_claim:
|
||||
metrics_account_id: 10
|
||||
metrics_project_id: 10
|
||||
match_claims:
|
||||
role: admin
|
||||
url_prefix: {BACKEND}/foo`, string(publicKeyPEM)), request, responseExpected)
|
||||
|
||||
// expired token
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+expiredToken)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
See vmctl docs [here](https://docs.victoriametrics.com/victoriametrics/vmctl/).
|
||||
|
||||
vmctl docs can be edited at [docs/vmctl.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/vmctl.md).
|
||||
vmctl docs can be edited at [docs/vmctl.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/vmctl/vmctl.md).
|
||||
|
||||
@@ -259,7 +259,7 @@ func (cr *ChunkedResponse) Next() ([]int64, []float64, error) {
|
||||
|
||||
fieldValues, ok := r.values[cr.field]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("response doesn't contain filed %q", cr.field)
|
||||
return nil, nil, fmt.Errorf("response doesn't contain field %q", cr.field)
|
||||
}
|
||||
values := make([]float64, len(fieldValues))
|
||||
for i, fv := range fieldValues {
|
||||
|
||||
@@ -563,11 +563,11 @@ func main() {
|
||||
}()
|
||||
|
||||
err = app.Run(os.Args)
|
||||
pushmetrics.StopAndPush()
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
log.Printf("Total time: %v", time.Since(start))
|
||||
pushmetrics.StopAndPush()
|
||||
}
|
||||
|
||||
func initConfigVM(c *cli.Context) (vm.Config, error) {
|
||||
|
||||
@@ -405,7 +405,16 @@ func buildMatchWithFilter(filter string, metricName string) (string, error) {
|
||||
if len(tf.Key) == 0 {
|
||||
continue
|
||||
}
|
||||
a = append(a, tf.String())
|
||||
switch {
|
||||
case tf.IsNegative && tf.IsRegexp:
|
||||
a = append(a, fmt.Sprintf("%s!~%q", tf.Key, tf.Value))
|
||||
case tf.IsNegative:
|
||||
a = append(a, fmt.Sprintf("%s!=%q", tf.Key, tf.Value))
|
||||
case tf.IsRegexp:
|
||||
a = append(a, fmt.Sprintf("%s=~%q", tf.Key, tf.Value))
|
||||
default:
|
||||
a = append(a, fmt.Sprintf("%s=%q", tf.Key, tf.Value))
|
||||
}
|
||||
}
|
||||
a = append(a, nameFilter)
|
||||
filters = append(filters, strings.Join(a, ","))
|
||||
|
||||
@@ -15,7 +15,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-m
|
||||
currentItem := 0
|
||||
%}
|
||||
{% for _, row := range result %}
|
||||
"{%s string(row.MetricFamilyName) %}": [
|
||||
{%q= string(row.MetricFamilyName) %}: [
|
||||
{
|
||||
"type": {%q= row.Type.String() %},
|
||||
{% if len(row.Unit) > 0 -%}
|
||||
|
||||
@@ -35,12 +35,10 @@ func StreamMetadataResponse(qw422016 *qt422016.Writer, result []*metricsmetadata
|
||||
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:17
|
||||
for _, row := range result {
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:17
|
||||
qw422016.N().S(`"`)
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:18
|
||||
qw422016.E().S(string(row.MetricFamilyName))
|
||||
qw422016.N().Q(string(row.MetricFamilyName))
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:18
|
||||
qw422016.N().S(`": [{"type":`)
|
||||
qw422016.N().S(`: [{"type":`)
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:20
|
||||
qw422016.N().Q(row.Type.String())
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:20
|
||||
|
||||
@@ -956,6 +956,7 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.Respo
|
||||
start, end, step int64, r *http.Request, ct int64, etfs [][]storage.TagFilter) error {
|
||||
deadline := searchutil.GetDeadlineForQuery(r, startTime)
|
||||
mayCache := !httputil.GetBool(r, "nocache")
|
||||
optimizeRepeatedBinaryOpSubexprs := httputil.GetBool(r, "optimize_repeated_binary_op_subexprs")
|
||||
lookbackDelta, err := getMaxLookback(r)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -977,18 +978,19 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.Respo
|
||||
}
|
||||
|
||||
ec := &promql.EvalConfig{
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: step,
|
||||
MaxPointsPerSeries: *maxPointsPerTimeseries,
|
||||
MaxSeries: 0, // let vmstorage use maxUniqueTimeseries by default
|
||||
QuotedRemoteAddr: httpserver.GetQuotedRemoteAddr(r),
|
||||
Deadline: deadline,
|
||||
MayCache: mayCache,
|
||||
LookbackDelta: lookbackDelta,
|
||||
RoundDigits: getRoundDigits(r),
|
||||
EnforcedTagFilterss: etfs,
|
||||
CacheTagFilters: etfs,
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: step,
|
||||
MaxPointsPerSeries: *maxPointsPerTimeseries,
|
||||
MaxSeries: 0, // let vmstorage use maxUniqueTimeseries by default
|
||||
QuotedRemoteAddr: httpserver.GetQuotedRemoteAddr(r),
|
||||
Deadline: deadline,
|
||||
MayCache: mayCache,
|
||||
OptimizeRepeatedBinaryOpSubexprs: optimizeRepeatedBinaryOpSubexprs,
|
||||
LookbackDelta: lookbackDelta,
|
||||
RoundDigits: getRoundDigits(r),
|
||||
EnforcedTagFilterss: etfs,
|
||||
CacheTagFilters: etfs,
|
||||
GetRequestURI: func() string {
|
||||
return httpserver.GetRequestURI(r)
|
||||
},
|
||||
|
||||
@@ -132,6 +132,9 @@ type EvalConfig struct {
|
||||
// Whether the response can be cached.
|
||||
MayCache bool
|
||||
|
||||
// Whether repeated cacheable binary op subexpressions can be optimized.
|
||||
OptimizeRepeatedBinaryOpSubexprs bool
|
||||
|
||||
// LookbackDelta is analog to `-query.lookback-delta` from Prometheus.
|
||||
LookbackDelta int64
|
||||
|
||||
@@ -171,6 +174,7 @@ func copyEvalConfig(src *EvalConfig) *EvalConfig {
|
||||
ec.MaxPointsPerSeries = src.MaxPointsPerSeries
|
||||
ec.Deadline = src.Deadline
|
||||
ec.MayCache = src.MayCache
|
||||
ec.OptimizeRepeatedBinaryOpSubexprs = src.OptimizeRepeatedBinaryOpSubexprs
|
||||
ec.LookbackDelta = src.LookbackDelta
|
||||
ec.RoundDigits = src.RoundDigits
|
||||
ec.EnforcedTagFilterss = src.EnforcedTagFilterss
|
||||
@@ -467,7 +471,8 @@ func isAggrFuncWithoutGrouping(e metricsql.Expr) bool {
|
||||
}
|
||||
|
||||
func execBinaryOpArgs(qt *querytracer.Tracer, ec *EvalConfig, exprFirst, exprSecond metricsql.Expr, be *metricsql.BinaryOpExpr) ([]*timeseries, []*timeseries, error) {
|
||||
if !canPushdownCommonFilters(be) {
|
||||
canPushdown := canPushdownCommonFilters(be)
|
||||
if !canPushdown && !shouldOptimizeRepeatedBinaryOpSubexprs(ec, exprFirst, exprSecond) {
|
||||
// Execute exprFirst and exprSecond in parallel, since it is impossible to pushdown common filters
|
||||
// from exprFirst to exprSecond.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2886
|
||||
@@ -500,6 +505,25 @@ func execBinaryOpArgs(qt *querytracer.Tracer, ec *EvalConfig, exprFirst, exprSec
|
||||
}
|
||||
return tssFirst, tssSecond, nil
|
||||
}
|
||||
if !canPushdown {
|
||||
qt = qt.NewChild("execute left and right sides of %q sequentially because repeated cacheable subexpression was found", be.Op)
|
||||
defer qt.Done()
|
||||
|
||||
qtFirst := qt.NewChild("expr1")
|
||||
tssFirst, err := evalExpr(qtFirst, ec, exprFirst)
|
||||
qtFirst.Done()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
qtSecond := qt.NewChild("expr2")
|
||||
tssSecond, err := evalExpr(qtSecond, ec, exprSecond)
|
||||
qtSecond.Done()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return tssFirst, tssSecond, nil
|
||||
}
|
||||
|
||||
// Execute binary operation in the following way:
|
||||
//
|
||||
@@ -544,6 +568,78 @@ func execBinaryOpArgs(qt *querytracer.Tracer, ec *EvalConfig, exprFirst, exprSec
|
||||
return tssFirst, tssSecond, nil
|
||||
}
|
||||
|
||||
func shouldOptimizeRepeatedBinaryOpSubexprs(ec *EvalConfig, exprFirst, exprSecond metricsql.Expr) bool {
|
||||
if !ec.OptimizeRepeatedBinaryOpSubexprs {
|
||||
return false
|
||||
}
|
||||
if ec.Start == ec.End {
|
||||
return false
|
||||
}
|
||||
if !ec.mayCache() {
|
||||
return false
|
||||
}
|
||||
|
||||
candidatesFirst := make(map[string]struct{}, 1)
|
||||
var b []byte
|
||||
visitOptimizedAggrs(exprFirst, func(ae *metricsql.AggrFuncExpr) {
|
||||
if hasUnseededVolatileFunc(ae) {
|
||||
return
|
||||
}
|
||||
b = ae.AppendString(b[:0])
|
||||
candidatesFirst[string(b)] = struct{}{}
|
||||
})
|
||||
if len(candidatesFirst) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
repeated := false
|
||||
visitOptimizedAggrs(exprSecond, func(ae *metricsql.AggrFuncExpr) {
|
||||
if repeated {
|
||||
return
|
||||
}
|
||||
b = ae.AppendString(b[:0])
|
||||
_, repeated = candidatesFirst[string(b)]
|
||||
})
|
||||
return repeated
|
||||
}
|
||||
|
||||
func visitOptimizedAggrs(e metricsql.Expr, f func(ae *metricsql.AggrFuncExpr)) {
|
||||
metricsql.VisitAll(e, func(expr metricsql.Expr) {
|
||||
ae, ok := expr.(*metricsql.AggrFuncExpr)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if getIncrementalAggrFuncCallbacks(ae.Name) == nil {
|
||||
return
|
||||
}
|
||||
fe, _ := tryGetArgRollupFuncWithMetricExpr(ae)
|
||||
if fe == nil {
|
||||
return
|
||||
}
|
||||
f(ae)
|
||||
})
|
||||
}
|
||||
|
||||
func hasUnseededVolatileFunc(e metricsql.Expr) bool {
|
||||
found := false
|
||||
metricsql.VisitAll(e, func(expr metricsql.Expr) {
|
||||
if found {
|
||||
return
|
||||
}
|
||||
fe, ok := expr.(*metricsql.FuncExpr)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
switch strings.ToLower(fe.Name) {
|
||||
case "now":
|
||||
found = true
|
||||
case "rand", "rand_normal", "rand_exponential":
|
||||
found = len(fe.Args) == 0
|
||||
}
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
func getCommonLabelFilters(tss []*timeseries) []metricsql.LabelFilter {
|
||||
if len(tss) == 0 {
|
||||
return nil
|
||||
|
||||
@@ -170,3 +170,87 @@ func TestGetSumInstantValues(t *testing.T) {
|
||||
[]*timeseries{ts("foo", 100, 1)},
|
||||
)
|
||||
}
|
||||
|
||||
func TestShouldOptimizeRepeatedBinaryOpSubexprsGate(t *testing.T) {
|
||||
e, err := metricsql.Parse(`count(count(vm_requests_total) by (action,addr,cluster,endpoint)) by (action,addr,cluster) / count(count(vm_requests_total) by (action,addr,cluster,endpoint))`)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in metricsql.Parse(): %s", err)
|
||||
}
|
||||
be, ok := e.(*metricsql.BinaryOpExpr)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected expr type; got %T; want *metricsql.BinaryOpExpr", e)
|
||||
}
|
||||
|
||||
f := func(name string, ec *EvalConfig, resultExpected bool) {
|
||||
t.Helper()
|
||||
result := shouldOptimizeRepeatedBinaryOpSubexprs(ec, be.Left, be.Right)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for %q; got %v; want %v", name, result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("disabled optimization", &EvalConfig{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Step: 1000,
|
||||
}, false)
|
||||
f("disabled cache", &EvalConfig{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Step: 1000,
|
||||
OptimizeRepeatedBinaryOpSubexprs: true,
|
||||
}, false)
|
||||
f("instant query", &EvalConfig{
|
||||
Start: 1000,
|
||||
End: 1000,
|
||||
Step: 1000,
|
||||
MayCache: true,
|
||||
OptimizeRepeatedBinaryOpSubexprs: true,
|
||||
}, false)
|
||||
f("repeated cacheable aggregate subexpression", &EvalConfig{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Step: 1000,
|
||||
MayCache: true,
|
||||
OptimizeRepeatedBinaryOpSubexprs: true,
|
||||
}, true)
|
||||
f("unaligned range query", &EvalConfig{
|
||||
Start: 1001,
|
||||
End: 2000,
|
||||
Step: 1000,
|
||||
MayCache: true,
|
||||
OptimizeRepeatedBinaryOpSubexprs: true,
|
||||
}, false)
|
||||
}
|
||||
|
||||
func TestShouldOptimizeRepeatedBinaryOpSubexprsExpressions(t *testing.T) {
|
||||
f := func(name, q string, resultExpected bool) {
|
||||
t.Helper()
|
||||
e, err := metricsql.Parse(q)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in metricsql.Parse(%q) for %q: %s", q, name, err)
|
||||
}
|
||||
be, ok := e.(*metricsql.BinaryOpExpr)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected expr type for %q; got %T; want *metricsql.BinaryOpExpr", name, e)
|
||||
}
|
||||
ec := &EvalConfig{Start: 1000, End: 2000, Step: 1000, MayCache: true, OptimizeRepeatedBinaryOpSubexprs: true}
|
||||
result := shouldOptimizeRepeatedBinaryOpSubexprs(ec, be.Left, be.Right)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for %q; got %v; want %v; query: %q", name, result, resultExpected, q)
|
||||
}
|
||||
}
|
||||
|
||||
f("original issue query", `count(count(vm_requests_total) by (action,addr,cluster,endpoint)) by (action,addr,cluster) / count(count(vm_requests_total) by (action,addr,cluster,endpoint))`, true)
|
||||
f("right side contains repeated count aggregate", `count(foo) by (job) / (count(foo) by (job) + 1)`, true)
|
||||
f("same sum aggregate", `sum(rate(foo[5m])) by (job) / sum(rate(foo[5m])) by (job)`, true)
|
||||
f("same inner rollup but different aggregates", `sum(rate(foo[5m])) by (job) / count(rate(foo[5m])) by (job)`, false)
|
||||
f("different count aggregates", `count(foo) by (job) / count(bar) by (job)`, false)
|
||||
f("bare metric selector", `foo / foo`, false)
|
||||
f("bare rollup function", `rate(a[5m]) / rate(a[5m])`, false)
|
||||
f("now at modifier", `sum(rate(foo[5m] @ now())) by (job) / sum(rate(foo[5m] @ now())) by (job)`, false)
|
||||
f("unseeded rand at modifier", `sum(rate(foo[5m] @ rand())) by (job) / sum(rate(foo[5m] @ rand())) by (job)`, false)
|
||||
f("unseeded rand_normal at modifier", `sum(rate(foo[5m] @ rand_normal())) by (job) / sum(rate(foo[5m] @ rand_normal())) by (job)`, false)
|
||||
f("unseeded rand_exponential at modifier", `sum(rate(foo[5m] @ rand_exponential())) by (job) / sum(rate(foo[5m] @ rand_exponential())) by (job)`, false)
|
||||
f("seeded rand at modifier", `sum(rate(foo[5m] @ rand(1))) by (job) / sum(rate(foo[5m] @ rand(1))) by (job)`, true)
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
197
app/vmselect/vmui/assets/index-CusQvJzs.js
Normal file
197
app/vmselect/vmui/assets/index-CusQvJzs.js
Normal file
File diff suppressed because one or more lines are too long
@@ -1 +0,0 @@
|
||||
var e=Object.create,t=Object.defineProperty,n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,o=(e,t)=>()=>(e&&(t=e(e=0)),t),s=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),c=(e,n)=>{let r={};for(var i in e)t(r,i,{get:e[i],enumerable:!0});return n||t(r,Symbol.toStringTag,{value:`Module`}),r},l=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},u=(n,r,a)=>(a=n==null?{}:e(i(n)),l(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n)),d=e=>a.call(e,`module.exports`)?e[`module.exports`]:l(t({},`__esModule`,{value:!0}),e);export{u as a,d as i,o as n,c as r,s as t};
|
||||
1
app/vmselect/vmui/assets/rolldown-runtime-Cyuzqnbw.js
Normal file
1
app/vmselect/vmui/assets/rolldown-runtime-Cyuzqnbw.js
Normal file
@@ -0,0 +1 @@
|
||||
var e=Object.create,t=Object.defineProperty,n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,o=(e,t)=>()=>(e&&(t=e(e=0)),t),s=(e,t)=>()=>(t||(e((t={exports:{}}).exports,t),e=null),t.exports),c=(e,n)=>{let r={};for(var i in e)t(r,i,{get:e[i],enumerable:!0});return n||t(r,Symbol.toStringTag,{value:`Module`}),r},l=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},u=(n,r,a)=>(a=n==null?{}:e(i(n)),l(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n)),d=e=>a.call(e,`module.exports`)?e[`module.exports`]:l(t({},`__esModule`,{value:!0}),e);export{u as a,d as i,o as n,c as r,s as t};
|
||||
78
app/vmselect/vmui/assets/vendor-B83wxFqK.js
Normal file
78
app/vmselect/vmui/assets/vendor-B83wxFqK.js
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -37,9 +37,9 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-CoGukb-x.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/rolldown-runtime-COnpUsM8.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-C8Kwp93_.js">
|
||||
<script type="module" crossorigin src="./assets/index-CusQvJzs.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/rolldown-runtime-Cyuzqnbw.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-B83wxFqK.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-CnsZ1jie.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-BBUnmLOr.css">
|
||||
</head>
|
||||
|
||||
@@ -6,7 +6,7 @@ COPY web/ /build/
|
||||
RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o web-amd64 github.com/VictoriMetrics/vmui/ && \
|
||||
GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build -o web-windows github.com/VictoriMetrics/vmui/
|
||||
|
||||
FROM alpine:3.23.4
|
||||
FROM alpine:3.24.1
|
||||
USER root
|
||||
|
||||
COPY --from=build-web-stage /build/web-amd64 /app/web
|
||||
|
||||
@@ -4,7 +4,7 @@ The `apptest` package contains the integration tests for the VictoriaMetrics
|
||||
applications (such as vmstorage, vminsert, and vmselect).
|
||||
|
||||
An integration test aims at verifying the behavior of an application as a whole,
|
||||
as apposed to a unit test that verifies the behavior of a building block of an
|
||||
as opposed to a unit test that verifies the behavior of a building block of an
|
||||
application.
|
||||
|
||||
To achieve that an integration test starts an application in a separate process
|
||||
@@ -19,10 +19,10 @@ work together as a system.
|
||||
The package provides a collection of helpers to start applications and make
|
||||
queries to them:
|
||||
|
||||
- `app.go` - contains the generic code for staring an application and should
|
||||
- `app.go` - contains the generic code for starting an application and should
|
||||
not be used by integration tests directly.
|
||||
- `{vmstorage,vminsert,etc}.go` - build on top of `app.go` and provide the
|
||||
code for staring a specific application.
|
||||
code for starting a specific application.
|
||||
- `client.go` - provides helper functions for sending HTTP requests to
|
||||
applications.
|
||||
|
||||
@@ -36,7 +36,7 @@ the application binary files to be built and put into the `bin` directory. The
|
||||
build rule used for running integration tests, `make apptest`,
|
||||
accounts for that, it builds all application binaries before running the tests.
|
||||
But if you want to run the tests without `make`, i.e. by executing
|
||||
`go test ./app/apptest`, you will need to build the binaries first (for example,
|
||||
`go test ./apptest/tests`, you will need to build the binaries first (for example,
|
||||
by executing `make all`).
|
||||
|
||||
Not all binaries can be built from `master` branch, cluster binaries can be built
|
||||
|
||||
@@ -45,11 +45,13 @@ func TestSingleMetricsMetadata(t *testing.T) {
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_4"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_5"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_6"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: `metric_name_7_!@"_suffix`}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
},
|
||||
Metadata: []prompb.MetricMetadata{
|
||||
{MetricFamilyName: "metric_name_4", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_5", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_6", Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
{MetricFamilyName: `metric_name_7_!@"_suffix`, Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -59,12 +61,13 @@ func TestSingleMetricsMetadata(t *testing.T) {
|
||||
expected := &apptest.PrometheusAPIV1Metadata{
|
||||
Status: "success",
|
||||
Data: map[string][]apptest.MetadataEntry{
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
`metric_name_7_!@"_suffix`: {{Help: "some help message", Type: "stateset"}},
|
||||
},
|
||||
}
|
||||
gotStats := sut.PrometheusAPIV1Metadata(t, "", 0, apptest.QueryOpts{})
|
||||
@@ -154,11 +157,13 @@ func TestClusterMetricsMetadata(t *testing.T) {
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_4"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_5"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_6"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: `metric_name_7_!@"_suffix`}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
},
|
||||
Metadata: []prompb.MetricMetadata{
|
||||
{MetricFamilyName: "metric_name_4", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_5", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_6", Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
{MetricFamilyName: `metric_name_7_!@"_suffix`, Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -171,12 +176,13 @@ func TestClusterMetricsMetadata(t *testing.T) {
|
||||
expected := &apptest.PrometheusAPIV1Metadata{
|
||||
Status: "success",
|
||||
Data: map[string][]apptest.MetadataEntry{
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
`metric_name_7_!@"_suffix`: {{Help: "some help message", Type: "stateset"}},
|
||||
},
|
||||
}
|
||||
gotStats := vmselect.PrometheusAPIV1Metadata(t, "", 0, apptest.QueryOpts{Tenant: tenantID})
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -332,13 +333,11 @@ func TestSingleVMAgentDropOnOverload(t *testing.T) {
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 1 && vmagent.RemoteWriteRequests(t, url2) == 1
|
||||
},
|
||||
)
|
||||
|
||||
// Send 2 more requests, the first RW endpoint should receive everything, the second should add them to the queue
|
||||
// since worker is busy with the first request.
|
||||
for i := range 2 {
|
||||
@@ -641,3 +640,116 @@ func TestSingleVMAgentMultitenancy(t *testing.T) {
|
||||
t.Fatalf("expected vmagent_tenant_inserted_rows_total to have value 1 for accountID=5, projectID=0")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSingleVMAgentPriorizeRecentData(t *testing.T) {
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
remoteWriteSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}))
|
||||
defer remoteWriteSrv.Close()
|
||||
|
||||
var mustRW2ReturnError atomic.Bool
|
||||
mustRW2ReturnError.Store(true)
|
||||
|
||||
remoteWriteSrv2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if mustRW2ReturnError.Load() {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}))
|
||||
defer remoteWriteSrv2.Close()
|
||||
|
||||
vmagent := tc.MustStartDefaultRWVmagent("vmagent", []string{
|
||||
fmt.Sprintf(`-remoteWrite.url=%s/api/v1/write`, remoteWriteSrv.URL),
|
||||
fmt.Sprintf(`-remoteWrite.url=%s/api/v1/write`, remoteWriteSrv2.URL),
|
||||
"-remoteWrite.disableOnDiskQueue=true",
|
||||
// use only 1 worker to get a full queue faster
|
||||
"-remoteWrite.queues=1",
|
||||
"-remoteWrite.flushInterval=1ms",
|
||||
"-remoteWrite.inmemoryQueues=1",
|
||||
// fastqueue size is roughly memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
|
||||
// Use very large maxRowsPerBlock to get fastqueue of minimal length(2).
|
||||
// See initRemoteWriteCtxs function in remotewrite.go for details.
|
||||
"-remoteWrite.maxRowsPerBlock=1000000000",
|
||||
"-remoteWrite.tmpDataPath=" + tc.Dir() + "/vmagent",
|
||||
|
||||
// Delay retry logic to avoid race conditions with waitFor assertions.
|
||||
// It improves the test stability on resource-constrained runners.
|
||||
"-remoteWrite.retryMinInterval=3s",
|
||||
"-remoteWrite.retryMaxTime=3s",
|
||||
})
|
||||
|
||||
const (
|
||||
retries = 20
|
||||
period = 200 * time.Millisecond
|
||||
)
|
||||
|
||||
waitFor := func(f func() bool) {
|
||||
t.Helper()
|
||||
for range retries {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out waiting for retry #%d", retries)
|
||||
}
|
||||
|
||||
// Real remote write URLs are hidden in metrics
|
||||
url1 := "1:secret-url"
|
||||
url2 := "2:secret-url"
|
||||
|
||||
// Wait until first request got flushed to remote write server
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 1 && vmagent.RemoteWriteRequests(t, url2) == 1
|
||||
},
|
||||
)
|
||||
// Wait until second request got flushed to remote write server
|
||||
// since there are 2 independent queues (general and in-memory) with minimal capacity of 1
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 2 && vmagent.RemoteWriteRequests(t, url2) == 2
|
||||
},
|
||||
)
|
||||
// Send 2 more requests, the first RW endpoint should receive everything, the second should add them to the queue
|
||||
// since worker is busy with the first request.
|
||||
for i := range 2 {
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 3+i && vmagent.RemoteWritePendingInmemoryBlocks(t, url2) == 1+i
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// Send one more request.
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 5 && vmagent.RemoteWriteSamplesDropped(t, url2) > 0
|
||||
},
|
||||
)
|
||||
mustRW2ReturnError.Store(false)
|
||||
// ensure that inmemory data correctly flushed to the remote write
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWritePendingInmemoryBlocks(t, url2) == 0
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2083,7 +2083,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -2388,7 +2388,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -2084,7 +2084,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -2389,7 +2389,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -2165,7 +2165,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6201,7 +6201,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The rate of ignored samples during aggregation. \nStream aggregation will drop samples with NaN values, or samples with too old timestamps. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"description": "The rate of dropped samples during aggregation. \nStream aggregation will drop samples with NaN values, too old timestamps or samples identified as duplicates during deduplication. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6282,14 +6282,14 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_streamaggr_ignored_samples_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"expr": "sum(rate({__name__=~\"vm_streamaggr_ignored_samples_total|vm_streamaggr_dedup_dropped_samples_total\", job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Ignored samples ($instance)",
|
||||
"title": "Dropped samples ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1840,7 +1840,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -2164,7 +2164,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6200,7 +6200,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The rate of ignored samples during aggregation. \nStream aggregation will drop samples with NaN values, or samples with too old timestamps. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"description": "The rate of dropped samples during aggregation. \nStream aggregation will drop samples with NaN values, too old timestamps or samples identified as duplicates during deduplication. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6281,14 +6281,14 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_streamaggr_ignored_samples_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"expr": "sum(rate({__name__=~\"vm_streamaggr_ignored_samples_total|vm_streamaggr_dedup_dropped_samples_total\", job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Ignored samples ($instance)",
|
||||
"title": "Dropped samples ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1839,7 +1839,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSee major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
DOCKER_REGISTRIES ?= docker.io quay.io
|
||||
DOCKER_NAMESPACE ?= victoriametrics
|
||||
|
||||
ROOT_IMAGE ?= alpine:3.23.4
|
||||
ROOT_IMAGE ?= alpine:3.24.1
|
||||
ROOT_IMAGE_SCRATCH ?= scratch
|
||||
CERTS_IMAGE := alpine:3.23.4
|
||||
CERTS_IMAGE := alpine:3.24.1
|
||||
|
||||
GO_BUILDER_IMAGE := golang:1.26.4
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -42,14 +42,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -68,7 +68,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -90,7 +90,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -105,7 +105,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.145.0
|
||||
image: victoriametrics/vmauth:v1.146.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -119,7 +119,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.7
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -32,6 +32,17 @@ docs-image:
|
||||
--platform $(DOCKER_PLATFORM) \
|
||||
vmdocs
|
||||
|
||||
docs-check-links: docs-image
|
||||
rm -rf vmdocs/public
|
||||
docker run \
|
||||
--rm \
|
||||
--platform $(DOCKER_PLATFORM) \
|
||||
-v ./vmdocs:/opt/docs \
|
||||
$(shell for d in ./docs/*/; do printf ' -v %s:/opt/docs/content/%s' "$${d}" "$$(basename $${d})"; done) \
|
||||
--entrypoint /bin/sh \
|
||||
vmdocs-docker-package \
|
||||
-c "yarn install && hugo --minify && yarn run check-links"
|
||||
|
||||
docs-debug: docs docs-image
|
||||
docker run \
|
||||
--rm \
|
||||
|
||||
@@ -14,6 +14,24 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.29.7
|
||||
Released: 2026-06-25
|
||||
|
||||
- UI: updated [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) from [v1.7.1](https://docs.victoriametrics.com/anomaly-detection/ui/#v171) to [v1.7.2](https://docs.victoriametrics.com/anomaly-detection/ui/#v172), see respective [release notes](https://docs.victoriametrics.com/anomaly-detection/ui/#v172) for details. Notable mentions include `api/v1/server/model` endpoint for accessing production models config and queries from UI, manually or through [AI assistant](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance).
|
||||
|
||||
- IMPROVEMENT: Increased high-cardinality inference scaling by optionally scattering periodic infer jobs to reduce contention on shared resources (e.g. datasource, CPU, RAM) when `settings.n_workers > 1` and `scheduler.infer_every` is smaller than the total time to fetch and process all queries. This is controlled by new `scatter_infer_jobs` boolean argument of [Periodic Scheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#parameters-1) (default: `false`).
|
||||
|
||||
- IMPROVEMENT: Optimized internal batching for reader post-fetch series processing, exposing reader processing queue depth (`vmanomaly_reader_processing_tasks_queued` [metric](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#reader-behaviour-metrics)), and clarifying inference skip logs after data fetch timeouts. See `series_processing_batch_size` argument of [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) and [VLogsReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#victorialogs-reader) for details.
|
||||
|
||||
- IMPROVEMENT: Refined `VmReader` and `VLogsReader` logging after datasource request failures by suppressing the follow-up generic "No data" or "No unseen data" warning for failed fetches. Failed requests now keep the original datasource error while empty successful responses still emit the no-data warning.
|
||||
|
||||
## v1.29.6
|
||||
Released: 2026-06-17
|
||||
|
||||
- BUGFIX: Fixed `VLogsReader` startup and query execution when `tenant_id` is omitted or provided in short account-only form such as `"0"`. Omitted or empty tenant IDs are treated as single-node/no-tenant mode, and account-only tenant IDs are expanded to `accountID:0` before adding VictoriaLogs `AccountID`/`ProjectID` params or VM tenant labels.
|
||||
|
||||
- BUGFIX: Hardened [`OnlineMADModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-mad) anomaly scoring for perfectly constant time series (all values identical). The model now keeps a small deterministic prediction interval when the learned MAD is zero, so values deviating from an unknown constant baseline can produce `anomaly_score > 1` (previously, all anomaly scores were `0`).
|
||||
|
||||
## v1.29.5
|
||||
Released: 2026-06-11
|
||||
|
||||
|
||||
@@ -423,7 +423,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.7
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -641,7 +641,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.5 && docker image tag victoriametrics/vmanomaly:v1.29.5 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.29.7 && docker image tag victoriametrics/vmanomaly:v1.29.7 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -45,7 +45,7 @@ There are 2 types of compatibility to consider when migrating in stateful mode:
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | [v1.29.5](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1295) | Fully Compatible | - |
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | [v1.29.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1297) | Fully Compatible | - |
|
||||
| [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Partially compatible* | Dumped models of class [prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [seasonal quantile](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) have problems with loading to [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) due to dropped `pytz` library. **Upgrading directly from v1.28.7 to [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) with a fix is suggested** |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
|
||||
@@ -132,7 +132,7 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.5
|
||||
docker pull victoriametrics/vmanomaly:v1.29.7
|
||||
```
|
||||
|
||||
2. Create the license file with your license key.
|
||||
@@ -152,7 +152,7 @@ docker run -it \
|
||||
-v ./license:/license \
|
||||
-v ./config.yaml:/config.yaml \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.5 \
|
||||
victoriametrics/vmanomaly:v1.29.7 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -169,7 +169,7 @@ docker run -it \
|
||||
-e VMANOMALY_DATA_DUMPS_DIR=/tmp/vmanomaly/data \
|
||||
-e VMANOMALY_MODEL_DUMPS_DIR=/tmp/vmanomaly/models \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.5 \
|
||||
victoriametrics/vmanomaly:v1.29.7 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -182,7 +182,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.7
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -267,6 +267,7 @@ schedulers:
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler
|
||||
class: 'periodic'
|
||||
infer_every: '5m'
|
||||
scatter_infer_jobs: true
|
||||
fit_every: '1d'
|
||||
fit_window: '4w'
|
||||
|
||||
@@ -298,6 +299,7 @@ reader:
|
||||
datasource_url: "https://play.victoriametrics.com/" # [YOUR_DATASOURCE_URL]
|
||||
tenant_id: '0:0'
|
||||
sampling_period: "5m"
|
||||
series_processing_batch_size: 8 # number of time series to process together while preparing data for fit or infer stages
|
||||
queries:
|
||||
# define your queries with MetricsQL - https://docs.victoriametrics.com/victoriametrics/metricsql/
|
||||
cpu_user:
|
||||
@@ -413,11 +415,13 @@ For optimal service behavior, consider the following tweaks when configuring `vm
|
||||
- Configure the **inference frequency** in the [scheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/) section of the configuration file.
|
||||
- Ensure that `infer_every` aligns with your **minimum required alerting frequency**.
|
||||
- For example, if receiving **alerts every 15 minutes** is sufficient (when `anomaly_score > 1`), set `infer_every` to match `reader.sampling_period` or override it per query via `reader.queries.query_xxx.step` for an optimal setup.
|
||||
- Set `scheduler.scatter_infer_jobs` {{% available_from "v1.29.7" anomaly %}} [arg](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#parameters-1) to `true` to allow for equal distribution of inference jobs across `infer_every` intervals, which can further enhance parallel processing efficiency and reduce resource contention when `reader.queries` contains a large number of queries.
|
||||
|
||||
**Reader**:
|
||||
- Setup the datasource to read data from in the [reader](https://docs.victoriametrics.com/anomaly-detection/components/reader/) section. Include tenant ID if using a [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) (`multitenant` value {{% available_from "v1.16.2" anomaly %}} can be also used here).
|
||||
- Define queries for input data using [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) under `reader.queries` section. Note, it's possible to override reader-level arguments at query level for increased flexibility, e.g. specifying per-query [timezone](https://docs.victoriametrics.com/anomaly-detection/faq/#handling-timezones) or [sampling period](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters).
|
||||
- For longer `fit_window` intervals in scheduler, consider splitting queries into smaller time ranges to avoid excessive memory usage, timeouts and hitting server-side constraints, so they can be queried separately and reconstructed on `vmanomaly` side. Please refer to this [example](https://docs.victoriametrics.com/anomaly-detection/faq/#handling-large-queries-in-vmanomaly) for more details.
|
||||
- Set `reader.series_processing_batch_size` {{% available_from "v1.29.7" anomaly %}} [arg](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters) to a reasonable value (4-16, default is 8) to balance between memory usage and processing speed when preparing data for fit or infer stages.
|
||||
|
||||
> If applicable - consider [`VLogsReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#victorialogs-reader) {{% available_from "v1.26.0" anomaly %}} to perform anomaly detection on **log-derived metrics**. This is particularly useful for scenarios where log data needs to be analyzed for unusual patterns or behaviors, such as error rates or request latencies.
|
||||
|
||||
|
||||
@@ -315,7 +315,7 @@ docker run -it --rm \
|
||||
-e VMANOMALY_MCP_SERVER_URL=http://mcp-vmanomaly:8081/mcp \
|
||||
-p 8080:8080 \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.5 \
|
||||
victoriametrics/vmanomaly:v1.29.7 \
|
||||
vmanomaly_config.yaml
|
||||
```
|
||||
|
||||
@@ -640,6 +640,21 @@ If the **results** look good and the **model configuration should be deployed in
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.7.2
|
||||
Released: 2026-06-25
|
||||
|
||||
vmanomaly version: [v1.29.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1297)
|
||||
|
||||
- FEATURE: Added controls for selecting server-configured scheduled models (drop-down inside [model wizard](#model-panel)) and browsing scheduled queries from the running vmanomaly instance ("Queries" button, "scheduled queries" tab).
|
||||
|
||||
- IMPROVEMENT: Surfaced datasource fetch failures from ad-hoc VMUI raw queries as query-level errors instead of returning a successful empty result that triggers a generic "No match" warning. Now the user can see the actual error message from the datasource (e.g. "unauthorized", "not found", etc.) and take appropriate action.
|
||||
|
||||
- BUGFIX: Fixed [UI/query-server](#settings-panel) handling of VictoriaMetrics datasource URLs that already include `/select/multitenant/prometheus`. Such URLs are now recognized as cluster datasource URLs, preserving the multitenant path when proxying VMUI requests and allowing `server.use_reader_connection_settings` to reuse [configured reader credentials for authenticated datasources](#authentication).
|
||||
|
||||
- BUGFIX: Fixed [settings](#settings-panel) inputs for server and datasource URLs so editing, deleting, or pasting text is no longer immediately reverted to the previous value before applying changes.
|
||||
|
||||
- BUGFIX: Fixed [model wizard](#model-panel) settings for [`IsolationForestModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#isolation-forest-multivariate) `contamination`, allowing decimal float values such as `0.1` or `0,1` to be typed or pasted without being collapsed to `0`, while preserving the `"auto"` value.
|
||||
|
||||
### v1.7.1
|
||||
Released: 2026-06-11
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ schedulers:
|
||||
periodic_online: # alias
|
||||
class: 'periodic' # scheduler class
|
||||
infer_every: "30s" # how often to produce anomaly scores for new data
|
||||
scatter_infer_jobs: true # distribute infer jobs evenly across the infer interval to reduce synchronized bursts
|
||||
fit_every: "365d" # how often to re-fit the models, for online models used effectively once, then they are updated with new data and won't require re-fit
|
||||
fit_window: "3d" # how much historical data to use for fit stage
|
||||
start_from: "00:00" # start from specified time, i.e. 00:00 given timezone and do daily fits as `fit_every` is 1 day
|
||||
@@ -56,6 +57,7 @@ schedulers:
|
||||
periodic_offline_1w:
|
||||
class: 'periodic'
|
||||
infer_every: "15m"
|
||||
scatter_infer_jobs: true
|
||||
fit_every: "24h"
|
||||
fit_window: "14d"
|
||||
# if no start_from is specified, jobs will start immediately after service starts
|
||||
@@ -135,6 +137,7 @@ server:
|
||||
port: 8490
|
||||
path_prefix: '/vmanomaly' # optional path prefix for all HTTP routes
|
||||
max_concurrent_tasks: 4 # maximum number of concurrent anomaly detection tasks processed by backend
|
||||
use_reader_connection_settings: True # if True, use reader's datasource_url and credentials for UI requests to datasource
|
||||
uvicorn_config: # optional Uvicorn server configuration
|
||||
log_level: 'warning'
|
||||
```
|
||||
|
||||
@@ -1265,7 +1265,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.5
|
||||
docker pull victoriametrics/vmanomaly:v1.29.7
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1279,7 +1279,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.5 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.7 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
|
||||
@@ -458,6 +458,21 @@ Label names [description](#labelnames)
|
||||
<td>The total number of datapoints received from VictoriaMetrics for the `query_key` query within the specified scheduler `scheduler_alias`, in the `vmanomaly` service running in `preset` mode.</td>
|
||||
<td>
|
||||
|
||||
`url`, `query_key`, `scheduler_alias`, `preset`
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
<span style="white-space: nowrap;">`vmanomaly_reader_processing_tasks_queued`</span>
|
||||
</td>
|
||||
<td>
|
||||
|
||||
`Gauge`
|
||||
</td>
|
||||
<td>The total number of queued processing tasks {{% available_from "v1.29.7" anomaly %}} (timeseries batches of size `series_processing_batch_size`) for the `query_key` query within the specified scheduler `scheduler_alias`, in the `vmanomaly` service running in `preset` mode. If continuously >0, it may lead to skipped infer runs due to resource contention and timeouts.</td>
|
||||
<td>
|
||||
|
||||
`url`, `query_key`, `scheduler_alias`, `preset`
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
@@ -421,7 +421,20 @@ Optional argument{{% available_from "v1.18.1" anomaly %}} allows defining **vali
|
||||
`60s`
|
||||
</td>
|
||||
<td>
|
||||
Optional argument{{% available_from "v1.25.3" anomaly %}} allows specifying a time offset for all queries in `queries`. Defaults to `0s` (0) if not set and can be overridden on a [per-query basis](#per-query-parameters).
|
||||
Optional argument {{% available_from "v1.25.3" anomaly %}}, allows specifying a time offset for all queries in `queries`. Defaults to `0s` (0) if not set and can be overridden on a [per-query basis](#per-query-parameters).
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
<span style="white-space: nowrap;">`series_processing_batch_size`</span>
|
||||
</td>
|
||||
<td>
|
||||
|
||||
`8`
|
||||
</td>
|
||||
<td>
|
||||
Optional argument {{% available_from "v1.29.7" anomaly %}}, allows specifying the number of time series to process together while preparing data for fit or infer stages. Defaults to `8`. Suggested values are 4-16 for high-cardinality queries.
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
@@ -450,6 +463,7 @@ reader:
|
||||
sampling_period: '1m'
|
||||
query_from_last_seen_timestamp: True # false by default
|
||||
latency_offset: '1ms'
|
||||
series_processing_batch_size: 8
|
||||
```
|
||||
|
||||
### MetricsQL Playground
|
||||
@@ -879,6 +893,19 @@ If a path to a CA bundle file (like `ca.crt`), it will verify the certificate us
|
||||
(Optional) Password for authentication. If set, it will be used to authenticate the request.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
<span style="white-space: nowrap;">`series_processing_batch_size`</span>
|
||||
</td>
|
||||
<td>
|
||||
|
||||
`8`
|
||||
</td>
|
||||
<td>
|
||||
Optional argument {{% available_from "v1.29.7" anomaly %}}, allows specifying the number of time series to process together while preparing data for fit or infer stages. Defaults to `8`. Suggested values are 4-16 for high-cardinality queries.
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -897,6 +924,7 @@ reader:
|
||||
# tenant_id: '0:0' # for cluster version only
|
||||
sampling_period: '1m'
|
||||
max_points_per_query: 10000
|
||||
series_processing_batch_size: 8
|
||||
data_range: [0, 'inf'] # reader-level
|
||||
offset: '0s' # reader-level
|
||||
timeout: '30s'
|
||||
|
||||
@@ -74,40 +74,7 @@ options={`"scheduler.periodic.PeriodicScheduler"`, `"scheduler.oneoff.OneoffSche
|
||||
|
||||
### Parameters
|
||||
|
||||
For periodic scheduler parameters are defined as differences in times, expressed in difference units, e.g. days, hours, minutes, seconds.
|
||||
|
||||
Examples: `"50s"`, `"4m"`, `"3h"`, `"2d"`, `"1w"`.
|
||||
|
||||
<table class="params">
|
||||
<thead>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th>Time granularity</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>s</td>
|
||||
<td>seconds</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>m</td>
|
||||
<td>minutes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>h</td>
|
||||
<td>hours</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>d</td>
|
||||
<td>days</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>w</td>
|
||||
<td>weeks</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
For periodic scheduler parameters are defined as differences in times, expressed in difference units, e.g. days, hours, minutes, seconds. Time granularity is defined by the last characters of a string. Examples: `"50s"` (seconds), `"4m"` (minutes), `"3h"` (hours), `"2d"` (days), `"1w"` (weeks).
|
||||
|
||||
<table class="params">
|
||||
<thead>
|
||||
@@ -188,6 +155,21 @@ Specifies when to initiate the first `fit_every` call. Accepts either an ISO 860
|
||||
Defines the local timezone for the `start_from` parameter, if specified. Defaults to `UTC` if no timezone is provided.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
<span style="white-space: nowrap;">`scatter_infer_jobs`{{% available_from "v1.29.7" anomaly %}}</span>
|
||||
</td>
|
||||
<td>bool, <span style="white-space: nowrap;">Optional</span></td>
|
||||
<td>
|
||||
|
||||
`true` or `false`
|
||||
</td>
|
||||
<td>
|
||||
|
||||
If `true`, distribute infer jobs and their dependent data-fetch jobs evenly across the infer interval. This reduces synchronized read and inference bursts for high-scale configurations. Defaults to `false`. Useful when `settings.n_workers > 1`, `reader.queries` cardinality is high, and `scheduler.infer_every` is small.
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -200,6 +182,7 @@ schedulers:
|
||||
# (or class: "scheduler.periodic.PeriodicScheduler" for versions before v1.13.0, without class alias support)
|
||||
fit_window: "14d"
|
||||
infer_every: "1m"
|
||||
scatter_infer_jobs: true # Distribute infer jobs evenly across the infer interval to reduce synchronized bursts.
|
||||
fit_every: "1h"
|
||||
start_from: "20:00" # If launched before 20:00 (local Kyiv time), the first run starts today at 20:00. Otherwise, it starts tomorrow at 20:00.
|
||||
tz: "Europe/Kyiv" # Defaults to 'UTC' if not specified.
|
||||
|
||||
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.145.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.145.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.145.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.146.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.146.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.146.0)
|
||||
- [Grafana](https://grafana.com/) (v12.2.0)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.9.1) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.28.1)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -395,7 +395,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.7
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -240,23 +240,23 @@ vmagent will write data into VictoriaMetrics single-node and cluster (with tenan
|
||||
# compose.yaml
|
||||
services:
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -308,7 +308,7 @@ Now add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: docker.io/victoriametrics/vmauth:v1.145.0
|
||||
image: docker.io/victoriametrics/vmauth:v1.146.0
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
|
||||
@@ -155,15 +155,15 @@ These services will store and query the metrics scraped by vmagent.
|
||||
# compose.yaml
|
||||
services:
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
ports:
|
||||
@@ -196,7 +196,7 @@ Add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.145.0-enterprise
|
||||
image: victoriametrics/vmauth:v1.146.0-enterprise
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
@@ -251,7 +251,7 @@ Add the vmagent service to `compose.yaml` with OAuth2 configuration:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
|
||||
@@ -107,7 +107,7 @@ The final piece is the Docker Compose file. This ties all the services together
|
||||
# compose.yml
|
||||
services:
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
command:
|
||||
- "--storageDataPath=/victoria-metrics-data"
|
||||
- "--selfScrapeInterval=10s"
|
||||
@@ -128,7 +128,7 @@ services:
|
||||
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- victoriametrics
|
||||
- alertmanager
|
||||
|
||||
@@ -19,6 +19,7 @@ See also [case studies](https://docs.victoriametrics.com/victoriametrics/casestu
|
||||
* [Datanami: Why Roblox Picked VictoriaMetrics for Observability Data Overhaul](https://www.hpcwire.com/bigdatawire/2023/05/30/why-roblox-picked-victoriametrics-for-observability-data-overhaul/)
|
||||
* [Cloudflare: Introducing notifications for HTTP Traffic Anomalies](https://blog.cloudflare.com/introducing-http-traffic-anomalies-notifications/)
|
||||
* [Grammarly: Better, Faster, Cheaper: How Grammarly Improved Monitoring by Over 10x with VictoriaMetrics](https://www.grammarly.com/blog/engineering/monitoring-with-victoriametrics/)
|
||||
* [Xata: How we rebuilt PostgreSQL branch metrics on VictoriaMetrics, per cell](https://xata.io/blog/how-we-rebuilt-postgresql-branch-metrics-on-victoriametrics-per-cell)
|
||||
* [CERN: CMS monitoring R&D: Real-time monitoring and alerts](https://indico.cern.ch/event/877333/contributions/3696707/attachments/1972189/3281133/CMS_mon_RD_for_opInt.pdf)
|
||||
* [CERN: The CMS monitoring infrastructure and applications](https://arxiv.org/pdf/2007.03630.pdf)
|
||||
* [Forbes: The (Almost) Infinitely Scalable Open Source Monitoring Dream](https://www.forbes.com/sites/adrianbridgwater/2022/08/16/the-almost-infinitely-scalable-open-source-monitoring-dream/)
|
||||
|
||||
@@ -85,6 +85,21 @@ Pull requests requirements:
|
||||
|
||||
See a good example of a [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6487).
|
||||
|
||||
## AI policy
|
||||
|
||||
You are free to use any AI tools when working on a contribution, on code,
|
||||
documentation, issues, or anything else. You do not need to disclose whether or
|
||||
how you used them.
|
||||
|
||||
With or without the help of AI, you are responsible for the changes you submit.
|
||||
Take the effort to understand the code base and every change in your pull request,
|
||||
and clean up any AI slop before sending it. Do not use AI to automate your
|
||||
responses to maintainers.
|
||||
|
||||
We review contributions on their quality, regardless of how they were produced. A
|
||||
pull request or issue that looks like unreviewed AI output, with low-quality or
|
||||
broken changes, may be closed without a detailed review or triage.
|
||||
|
||||
## Merging Pull Request
|
||||
|
||||
The person who merges the Pull Request is responsible for satisfying the requirements below:
|
||||
|
||||
@@ -61,9 +61,9 @@ Download the newest available [VictoriaMetrics release](https://docs.victoriamet
|
||||
from [DockerHub](https://hub.docker.com/r/victoriametrics/victoria-metrics) or [Quay](https://quay.io/repository/victoriametrics/victoria-metrics?tab=tags):
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/victoria-metrics:v1.145.0
|
||||
docker pull victoriametrics/victoria-metrics:v1.146.0
|
||||
docker run -it --rm -v `pwd`/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 \
|
||||
victoriametrics/victoria-metrics:v1.145.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
victoriametrics/victoria-metrics:v1.146.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
```
|
||||
|
||||
_For Enterprise images, see [this link](https://docs.victoriametrics.com/victoriametrics/enterprise/#docker-images)._
|
||||
|
||||
@@ -245,7 +245,7 @@ The following steps must be performed during the upgrade / downgrade procedure:
|
||||
* Wait until the process stops. This can take a few seconds.
|
||||
* Start the upgraded VictoriaMetrics.
|
||||
|
||||
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. The same applies also to [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/whats-new-in-prometheus-2-8-wal-based-remote-write/) for details. The same applies also to [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
|
||||
> If you'd prefer not to manage upgrades yourself, [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_single_upgrade)
|
||||
> performs version upgrades automatically during maintenance windows with no action required on your part.
|
||||
@@ -417,7 +417,7 @@ VictoriaMetrics is configured via command-line flags, so it must be restarted wh
|
||||
* Wait until the process stops. This can take a few seconds.
|
||||
* Start VictoriaMetrics with the new command-line flags.
|
||||
|
||||
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. The same applies also to [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/whats-new-in-prometheus-2-8-wal-based-remote-write/) for details. The same applies also to [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
|
||||
## How to scrape Prometheus exporters such as [node-exporter](https://github.com/prometheus/node_exporter)
|
||||
|
||||
@@ -478,6 +478,11 @@ and [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyco
|
||||
to the given number of digits after the decimal point.
|
||||
For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
|
||||
|
||||
VictoriaMetrics accepts `optimize_repeated_binary_op_subexprs=1` query arg for [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query)
|
||||
handler. It allows `vmselect` to execute left and right sides of binary operators sequentially when they contain the same
|
||||
optimized aggregate rollup result expression, so the second side may reuse the rollup result cache populated by the first side.
|
||||
The optimization is disabled by default and applies only when rollup result cache can be used for the request.
|
||||
|
||||
VictoriaMetrics accepts `limit` query arg for [/api/v1/labels](https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1labels)
|
||||
and [`/api/v1/label/<labelName>/values`](https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1labelvalues) handlers for limiting the number of returned entries.
|
||||
For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels.
|
||||
@@ -1712,64 +1717,63 @@ The following versions of VictoriaMetrics receive regular security fixes:
|
||||
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | ✅ |
|
||||
| other releases | ❌ |
|
||||
|
||||
### Software Bill of Materials (SBOM)
|
||||
|
||||
Every VictoriaMetrics container{{% available_from "v1.137.0" %}} image published to
|
||||
[Docker Hub](https://hub.docker.com/u/victoriametrics) and [Quay.io](https://quay.io/organization/victoriametrics) include an [SPDX](https://spdx.dev/) SBOM attestation generated automatically by BuildKit during `docker buildx build`.
|
||||
|
||||
To inspect the SBOM for an image:
|
||||
|
||||
```sh
|
||||
docker buildx imagetools inspect \
|
||||
docker.io/victoriametrics/victoria-metrics:latest \
|
||||
--format "{{ json .SBOM }}"
|
||||
```
|
||||
|
||||
To scan an image using its SBOM attestation with [Trivy](https://github.com/aquasecurity/trivy):
|
||||
|
||||
```sh
|
||||
trivy image --sbom-sources oci \
|
||||
docker.io/victoriametrics/victoria-metrics:latest
|
||||
```
|
||||
|
||||
### Reporting a Vulnerability
|
||||
|
||||
Please report any security issues to <security@victoriametrics.com>
|
||||
|
||||
### CVE handling policy
|
||||
|
||||
**Source code:** Go dependencies are scanned by [govulncheck](https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck) in CI.
|
||||
All vulnerabilities must be fixed before the next scheduled release and backported to [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
|
||||
|
||||
**Docker images:** CVE findings in the [Alpine](https://security.alpinelinux.org/) base image pose minimal risk since VictoriaMetrics binaries are statically compiled with no OS dependencies.
|
||||
When detected, only the Alpine base tag is updated.
|
||||
Releases proceed as planned even if upstream fixes are not yet available.
|
||||
For maximum security, hardened [scratch](https://hub.docker.com/_/scratch)-based images are also provided.
|
||||
All images are continuously scanned by Docker Hub and verified before release using [grype](https://github.com/anchore/grype).
|
||||
|
||||
### General security recommendations:
|
||||
|
||||
* All the VictoriaMetrics components must run in protected private networks without direct access from untrusted networks such as Internet.
|
||||
* All VictoriaMetrics components must run in protected private networks without direct access from untrusted networks such as the Internet.
|
||||
The exception is [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) and [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/),
|
||||
which are intended for serving public requests and performing authorization with [TLS termination](https://en.wikipedia.org/wiki/TLS_termination_proxy).
|
||||
* All the requests from untrusted networks to VictoriaMetrics components must go through auth proxy such as [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/)
|
||||
* All the requests from untrusted networks to VictoriaMetrics components must go through an auth proxy, such as [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/)
|
||||
or [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/). The proxy must be set up with proper authentication and authorization.
|
||||
* Prefer using lists of allowed API endpoints, while disallowing access to other endpoints when configuring [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/)
|
||||
in front of VictoriaMetrics components.
|
||||
* Set reasonable [`Strict-Transport-Security`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security) header value to all the components to mitigate [MitM attacks](https://en.wikipedia.org/wiki/Man-in-the-middle_attack), for example: `max-age=31536000; includeSubDomains`. See `-http.header.hsts` flag.
|
||||
* Set a reasonable [`Strict-Transport-Security`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security) header value on all the components to mitigate [MitM attacks](https://en.wikipedia.org/wiki/Man-in-the-middle_attack), for example: `max-age=31536000; includeSubDomains`. See `-http.header.hsts` flag.
|
||||
* Set reasonable [`Content-Security-Policy`](https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP) header value to mitigate [XSS attacks](https://en.wikipedia.org/wiki/Cross-site_scripting). See `-http.header.csp` flag.
|
||||
* Set reasonable [`X-Frame-Options`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options) header value to mitigate [clickjacking attacks](https://en.wikipedia.org/wiki/Clickjacking), for example `DENY`. See `-http.header.frameOptions` flag.
|
||||
|
||||
VictoriaMetrics provides the following security-related command-line flags:
|
||||
The following security-related command-line flags are available for all components with HTTP API:
|
||||
|
||||
* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS at `-httpListenAddr` (TCP port 8428 is listened by default).
|
||||
* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS at `-httpListenAddr`.
|
||||
[Enterprise version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports automatic issuing of TLS certificates.
|
||||
See [these docs](#automatic-issuing-of-tls-certificates).
|
||||
* `-mtls` and `-mtlsCAFile` for enabling [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication) for requests to `-httpListenAddr`. See [these docs](#mtls-protection).
|
||||
* `-httpAuth.username` and `-httpAuth.password` for protecting all the HTTP endpoints
|
||||
with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
|
||||
* `-http.header.hsts`, `-http.header.csp`, and `-http.header.frameOptions` for serving `Strict-Transport-Security`, `Content-Security-Policy`
|
||||
and `X-Frame-Options` HTTP response headers.
|
||||
|
||||
### Protecting service endpoints
|
||||
|
||||
All VictoriaMetrics components expose internal metrics in Prometheus exposition format at the `/metrics` page for [#Monitoring](https://docs.victoriametrics.com/victoriametrics/#monitoring).
|
||||
Consider limiting access to the `/metrics` page to trusted networks only.
|
||||
|
||||
The following service endpoints may require protection:
|
||||
|
||||
* `-deleteAuthKey` for protecting the `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
|
||||
* `-snapshotAuthKey` for protecting the `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
|
||||
* `-forceFlushAuthKey` for protecting the `/internal/force_flush` endpoint. See [force flush docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#forced-flush).
|
||||
* `-forceMergeAuthKey` for protecting the `/internal/force_merge` endpoint. See [force merge docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#forced-merge).
|
||||
* `-search.resetCacheAuthKey` for protecting the `/internal/resetRollupResultCache` endpoint. See [backfilling](#backfilling) for more details.
|
||||
* `-reloadAuthKey` for protecting the `/-/reload` endpoint, which is used for force reloading of [`-promscrape.config`](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* `-reloadAuthKey` for protecting the `/-/reload` endpoint, which is used to force reload the [`-promscrape.config`](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* `-configAuthKey` for protecting the `/config` endpoint, since it may contain sensitive information such as passwords.
|
||||
* `-flagsAuthKey` for protecting the `/flags` endpoint.
|
||||
* `-pprofAuthKey` for protecting the `/debug/pprof/*` endpoints, which can be used for [profiling](#profiling).
|
||||
* `-metricNamesStatsResetAuthKey` for protecting the `/api/v1/admin/status/metric_names_stats/reset` endpoint, used for [Metric Names Tracker](#track-ingested-metrics-usage).
|
||||
* `-denyQueryTracing` for disallowing [query tracing](#query-tracing).
|
||||
* `-http.header.hsts`, `-http.header.csp`, and `-http.header.frameOptions` for serving `Strict-Transport-Security`, `Content-Security-Policy`
|
||||
and `X-Frame-Options` HTTP response headers.
|
||||
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
|
||||
@@ -1777,17 +1781,6 @@ For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<i
|
||||
See also [security recommendation for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#security)
|
||||
and [the general security page at VictoriaMetrics website](https://victoriametrics.com/security/).
|
||||
|
||||
### CVE handling policy
|
||||
|
||||
**Source code:** Go dependencies are scanned by [govulncheck](https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck) in CI.
|
||||
All vulnerabilities must be fixed before next scheduled release and backported to [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
|
||||
|
||||
**Docker images:** CVE findings in [Alpine](https://security.alpinelinux.org/) base image pose minimal risk since VictoriaMetrics binaries are statically compiled with no OS dependencies.
|
||||
When detected, only the Alpine base tag is updated.
|
||||
Releases proceed as planned even if upstream fixes are not yet available.
|
||||
For maximum security, hardened [scratch](https://hub.docker.com/_/scratch)-based images are also provided.
|
||||
All images are continuously scanned by Docker Hub and verified before release using [grype](https://github.com/anchore/grype).
|
||||
|
||||
### mTLS protection
|
||||
|
||||
By default `VictoriaMetrics` accepts http requests at `8428` port (this port can be changed via `-httpListenAddr` command-line flags).
|
||||
@@ -1817,19 +1810,39 @@ This functionality can be evaluated for free according to [these docs](https://d
|
||||
|
||||
See also [security recommendations](#security).
|
||||
|
||||
### Software Bill of Materials (SBOM)
|
||||
|
||||
Every VictoriaMetrics container{{% available_from "v1.137.0" %}} image published to
|
||||
[Docker Hub](https://hub.docker.com/u/victoriametrics) and [Quay.io](https://quay.io/organization/victoriametrics) include an [SPDX](https://spdx.dev/) SBOM attestation generated automatically by BuildKit during `docker buildx build`.
|
||||
|
||||
To inspect the SBOM for an image:
|
||||
|
||||
```sh
|
||||
docker buildx imagetools inspect \
|
||||
docker.io/victoriametrics/victoria-metrics:latest \
|
||||
--format "{{ json .SBOM }}"
|
||||
```
|
||||
|
||||
To scan an image using its SBOM attestation with [Trivy](https://github.com/aquasecurity/trivy):
|
||||
|
||||
```sh
|
||||
trivy image --sbom-sources oci \
|
||||
docker.io/victoriametrics/victoria-metrics:latest
|
||||
```
|
||||
|
||||
## Tuning
|
||||
|
||||
* No need in tuning for VictoriaMetrics - it uses reasonable defaults for command-line flags,
|
||||
* No need to tune for VictoriaMetrics - it uses reasonable defaults for command-line flags,
|
||||
which are automatically adjusted for the available CPU and RAM resources.
|
||||
* No need in tuning for Operating System - VictoriaMetrics is optimized for default OS settings.
|
||||
* No need to tune for Operating System - VictoriaMetrics is optimized for default OS settings.
|
||||
The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a).
|
||||
The recommendation is not specific for VictoriaMetrics only but also for any service which handles many HTTP connections and stores data on disk.
|
||||
* VictoriaMetrics is a write-heavy application and its performance depends on disk performance. So be careful with other
|
||||
The recommendation is not specific to VictoriaMetrics only, but also for any service that handles many HTTP connections and stores data on disk.
|
||||
* VictoriaMetrics is a write-heavy application, and its performance depends on disk performance. So be careful with other
|
||||
applications or utilities (like [fstrim](https://manpages.ubuntu.com/manpages/lunar/en/man8/fstrim.8.html))
|
||||
which could [exhaust disk resources](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1521).
|
||||
* The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs),
|
||||
since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
|
||||
If you plan to store more than 1TB of data on `ext4` partition, then the following options are recommended to pass to `mkfs.ext4`:
|
||||
since it is protected from hardware failures via internal replication, and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
|
||||
If you plan to store more than 1TB of data on an `ext4` partition, then the following options are recommended to pass to `mkfs.ext4`:
|
||||
|
||||
```sh
|
||||
mkfs.ext4 ... -O 64bit,huge_file,extent -T huge
|
||||
|
||||
@@ -26,18 +26,46 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
* FEATURE: all VictoriaMetrics components: add `-http.header.disableServerHostname` command-line flag for disabling the `X-Server-Hostname` HTTP response header. See [#11067](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11067). Thanks to @zasdaym for contribution.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): log calls to [/api/v1/admin/tsdb/delete_series](https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1admintsdbdelete_series) API handler. This should help to identify events of metrics deletion from the database. See [#11104](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11104).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add `-vm-headers` and `-vm-bearer-token` flags for authenticating requests to the VictoriaMetrics import destination. The flags are available in `opentsdb`, `influx`, `remote-read`, `prometheus`, `mimir`, and `thanos` vmctl sub-commands. See [#8897](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8897).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): add the `last` value to graph legend statistics. See [#10759](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10759).
|
||||
* SECURITY: upgrade base docker image (Alpine) from 3.23.4 to 3.24.1. See [Alpine 3.24.1 release notes](https://www.alpinelinux.org/posts/Alpine-3.24.1-released.html).
|
||||
|
||||
* BUGFIX: all VictoriaMetrics components: cancel in-flight HTTP requests shortly before `-http.maxGracefulShutdownDuration` elapses during graceful shutdown, so they can drain and the shutdown completes cleanly within that window instead of timing out and exiting via `logger.Fatalf` -> `os.Exit`. This prevents skipping the storage flush and losing in-memory data when long-lived requests are in flight (such as VictoriaLogs live tailing). See [#1502](https://github.com/VictoriaMetrics/VictoriaLogs/issues/1502).
|
||||
* BUGFIX: `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fixes unexpected rare rerouting. See [#11162](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11162).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): propagate cache reset operation to `selectNode` when `/internal/resetRollupResultCache` is called. Previously, the propagation only happened when the `delete_series` API was called. See [#11112](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11112).
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix possible unexpected increases in `rate_avg` and `rate_sum` if an out-of-order sample is ingested after the previous flush. See [#11140](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11140).
|
||||
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): add `default_vm_access_claim` field into `jwt` section of auth config. It could be used at [JWT claim placeholders](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating), if `JWT` token doesn't have `vm_access` claim. See [#11054](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11054).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): reduces CPU usage by 10% at [sharding among remote storages](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages). See [#11113](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11113). Thanks to @bennf for contribution.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `optimize_repeated_binary_op_subexprs=1` query arg to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query) for executing binary operator sides sequentially when they share the same optimized aggregate rollup result expression. This allows the second side to reuse rollup result cache populated by the first side. See [#10575](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10575).
|
||||
|
||||
## [v1.146.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.146.0)
|
||||
|
||||
Released at 2026-06-22
|
||||
|
||||
* FEATURE: all VictoriaMetrics components: add `-http.header.disableServerHostname` command-line flag for disabling the `X-Server-Hostname` HTTP response header. See [#11067](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11067). Thanks to @zasdaym for contribution.
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): expose `vm_streamaggr_dedup_dropped_samples_total` to allow tracking dropped old samples during [deduplication](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication).
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): use the aggregation rule interval as the default [staleness_interval](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#staleness) instead of `2*interval`, to reduce spikes when there are gaps between received samples. See [#11102](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11102).
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): add new aggregation output [sum_samples_total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#sum_samples_total) for summing input delta values into a cumulative counter. See issues [#11002](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11002) and [#4843](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4843).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add a new flag `-remoteWrite.inmemoryQueues` to prioritize recently ingested data over historical data stored at file-based [persistent queue](https://docs.victoriametrics.com/victoriametrics/vmagent/#on-disk-persistence-and-data-processing-order). See [#8833](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8833)
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add `-promscrape.cluster.shardByLabels` command-line flag for selecting target labels used for sharding scrape targets among `vmagent` instances in cluster mode. See [#11044](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11044).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add `-vm-headers` and `-vm-bearer-token` flags for authenticating requests to the VictoriaMetrics import destination. The flags are available in `opentsdb`, `influx`, `remote-read`, `prometheus`, `mimir`, and `thanos` vmctl sub-commands. See [#8897](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8897).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): log calls to [/api/v1/admin/tsdb/delete_series](https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1admintsdbdelete_series) API handler. This should help to identify events of metrics deletion from the database. See [#11104](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11104).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): add the `last` value to graph legend statistics. See [#10759](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10759).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add support for [Monitoring Data eXchange (MDX)](https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange): the ability to route only metrics from VictoriaMetrics services to a specific `-remoteWrite.url`. MDX is useful for building monitoring-of-monitoring where one remote storage should receive the full metric stream and another should receive only VictoriaMetrics metrics. Enable per destination with `-remoteWrite.mdx.enable=true`. See [#10600](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10600).
|
||||
|
||||
* BUGFIX: [enterprise](https://docs.victoriametrics.com/enterprise/) [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly expose metric `vm_retention_filters_partitions_scheduled_rows`. See [#11138](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11138)
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix potential corruption of remote-write metadata `Unit` values. See [#11120](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11120). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): push metrics to configured `-pushmetrics.url` on shutdown when migration fails. Previously, metrics were not pushed if vmctl exited with an error. See [#11081](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11081). Thanks to @zasdaym for contribution.
|
||||
* BUGFIX: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): disallow restoring parts outside the configured `-storageDataPath` directory. See [710c920d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/710c920d6083327042a309e449fae4383617d817).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): correctly apply long tenant filters. Previously, such filters could be truncated, causing tenants to be matched incorrectly. See [#11096](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11096). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): don't cache empty responses for tenant IDs discovery during [multitenant queries](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenant-reads). This problem was visible during integration tests when multitenant queries were executed before the first ingestion happened. See [#10982](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10982)
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly escape `metricFamilyName` at metrics metadata response. See [#11129](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11129). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent more cases of panic during directory deletion on `NFS`-based mounts. See [#11060](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11060).
|
||||
|
||||
|
||||
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)
|
||||
|
||||
Released at 2026-06-08
|
||||
@@ -276,6 +304,25 @@ It enables back `Discovered targets` debug UI by default.
|
||||
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly apply `extra_filters[]` filter when querying `vm_account_id` or `vm_project_id` labels via [multitenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) request for `/api/v1/label/…/values` API. Before, `extra_filters` was ignored. See [#10503](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10503).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): revert the use of rollup result cache for [instant queries](https://docs.victoriametrics.com/keyConcepts.html#instant-query) that contain [`rate`](https://docs.victoriametrics.com/MetricsQL.html#rate) function with a lookbehind window larger than `-search.minWindowForInstantRollupOptimization`. The cache usage was removed since [v1.132.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.132.0). See [#10098](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10098#issuecomment-3895011084) for more details.
|
||||
|
||||
## [v1.136.12](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.12)
|
||||
|
||||
Released at 2026-06-19
|
||||
|
||||
**v1.136.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.136.x line will be supported for at least 12 months since [v1.136.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11360) release**
|
||||
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix potential corruption of remote-write metadata `Unit` values. See [#11120](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11120). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): push metrics to configured `-pushmetrics.url` on shutdown when migration fails. Previously, metrics were not pushed if vmctl exited with an error. See [#11081](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11081). Thanks to @zasdaym for contribution.
|
||||
* BUGFIX: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): disallow restoring parts outside the configured `-storageDataPath` directory. See [710c920d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/710c920d6083327042a309e449fae4383617d817).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly escape `metricFamilyName` at metrics metadata response. See [#11129](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11129). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): correctly apply long tenant filters. Previously, such filters could be truncated, causing tenants to be matched incorrectly. See [#11096](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11096). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent more cases of panic during directory deletion on `NFS`-based mounts. See [#11060](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11060).
|
||||
|
||||
## [v1.136.11](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.11)
|
||||
|
||||
Released at 2026-06-05
|
||||
@@ -637,6 +684,20 @@ See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/ch
|
||||
|
||||
See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/changelog_2025/#v11230)
|
||||
|
||||
## [v1.122.25](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.25)
|
||||
|
||||
Released at 2026-06-19
|
||||
|
||||
**v1.122.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.122.x line will be supported for at least 12 months since [v1.122.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11220) release**
|
||||
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): disallow restoring parts outside the configured `-storageDataPath` directory. See [710c920d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/710c920d6083327042a309e449fae4383617d817).
|
||||
|
||||
## [v1.122.24](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.24)
|
||||
|
||||
Released at 2026-06-05
|
||||
|
||||
@@ -121,7 +121,7 @@ It is allowed to run Enterprise components in [cases listed here](https://docs.v
|
||||
Binary releases of Enterprise components are available at [the releases page for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest),
|
||||
[the releases page for VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/releases/latest)
|
||||
and [the releases page for VictoriaTraces](https://github.com/VictoriaMetrics/VictoriaTraces/releases/latest).
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz`.
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz`.
|
||||
|
||||
In order to run binary release of Enterprise component, please download the `*-enterprise.tar.gz` archive for your OS and architecture
|
||||
from the corresponding releases page and unpack it. Then run the unpacked binary.
|
||||
@@ -139,8 +139,8 @@ For example, the following command runs VictoriaMetrics Enterprise binary with t
|
||||
obtained at [this page](https://victoriametrics.com/products/enterprise/trial/):
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz
|
||||
./victoria-metrics-prod -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
@@ -155,7 +155,7 @@ Alternatively, VictoriaMetrics Enterprise license can be stored in the file and
|
||||
It is allowed to run Enterprise components in [cases listed here](https://docs.victoriametrics.com/victoriametrics/enterprise/#valid-cases-for-victoriametrics-enterprise).
|
||||
|
||||
Docker images for Enterprise components are available at [VictoriaMetrics Docker Hub](https://hub.docker.com/u/victoriametrics) and [VictoriaMetrics Quay](https://quay.io/organization/victoriametrics).
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.145.0-enterprise`.
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.146.0-enterprise`.
|
||||
|
||||
In order to run Docker image of VictoriaMetrics Enterprise component, it is required to provide the license key via the command-line
|
||||
flag as described in the [binary-releases](https://docs.victoriametrics.com/victoriametrics/enterprise/#binary-releases) section.
|
||||
@@ -165,13 +165,13 @@ Enterprise license key can be obtained at [this page](https://victoriametrics.co
|
||||
For example, the following command runs VictoriaMetrics Enterprise Docker image with the specified license key:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.145.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.146.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
Alternatively, the license code can be stored in the file and then referred via `-licenseFile` command-line flag:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.145.0-enterprise -licenseFile=/path/to/vm-license
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.146.0-enterprise -licenseFile=/path/to/vm-license
|
||||
```
|
||||
|
||||
Example docker-compose configuration:
|
||||
@@ -181,7 +181,7 @@ version: "3.5"
|
||||
services:
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -213,7 +213,7 @@ is used to provide the license key in plain-text:
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
@@ -224,7 +224,7 @@ In order to provide the license key via existing secret, the following values fi
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
|
||||
license:
|
||||
secret:
|
||||
@@ -274,7 +274,7 @@ spec:
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
```
|
||||
|
||||
In order to provide the license key via an existing secret, the following custom resource is used:
|
||||
@@ -291,7 +291,7 @@ spec:
|
||||
name: vm-license
|
||||
key: license
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
```
|
||||
|
||||
Example secret with license key:
|
||||
@@ -342,7 +342,7 @@ Builds are available for amd64 and arm64 architectures.
|
||||
|
||||
Example archive:
|
||||
|
||||
`victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz`
|
||||
`victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz`
|
||||
|
||||
Includes:
|
||||
|
||||
@@ -351,7 +351,7 @@ Includes:
|
||||
|
||||
Example Docker image:
|
||||
|
||||
`victoriametrics/victoria-metrics:v1.145.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
`victoriametrics/victoria-metrics:v1.146.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
|
||||
## What Happens to Licensed Components When a License Expires
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ aliases:
|
||||
1. The main goal - **to help users and [clients](https://docs.victoriametrics.com/victoriametrics/enterprise/) using VictoriaMetrics products in the most efficient way**.
|
||||
1. Fixing bugs in the essential functionality of VictoriaMetrics components. Small usability bugs are usually the most annoying,
|
||||
so they **must be fixed first**. Bugs, which affect a small number of users at some rare edge cases, can be fixed later.
|
||||
1. Improving [public docs for VictoriaMetrics products](https://docs.victoriametrics.com).
|
||||
1. Improving [public docs for VictoriaMetrics products](https://docs.victoriametrics.com),
|
||||
so users could find answers to their questions via Google or any other AI-powered web search without the need
|
||||
to ask these questions at our [support channels](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#community-and-contributions).
|
||||
1. Simplifying usage of VictoriaMetrics products without breaking backwards compatibility, so users could regularly
|
||||
|
||||
@@ -35,8 +35,8 @@ scrape_configs:
|
||||
After you created the `scrape.yaml` file, download and unpack [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the same directory:
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
```
|
||||
|
||||
Then start VictoriaMetrics and instruct it to scrape targets defined in `scrape.yaml` and save scraped metrics
|
||||
@@ -150,8 +150,8 @@ Then start [single-node VictoriaMetrics](https://docs.victoriametrics.com/victor
|
||||
|
||||
```yaml
|
||||
# Download and unpack single-node VictoriaMetrics
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
|
||||
# Run single-node VictoriaMetrics with the given scrape.yaml
|
||||
./victoria-metrics-prod -promscrape.config=scrape.yaml
|
||||
|
||||
@@ -76,7 +76,7 @@ It is better to substitute the slow recording rule with the following [stream ag
|
||||
outputs: [rate_sum]
|
||||
```
|
||||
|
||||
> Field `interval` should be set to a value at least several times higher than the matched metrics collection interval.
|
||||
> It is recommended to set the `interval` field to a value at least 2 times the matched metrics collection interval.
|
||||
|
||||
This stream aggregation generates `http_request_duration_seconds_bucket:1m_without_instance_rate_sum` output series according to [output metric naming](#output-metric-names).
|
||||
Then these series can be used in [alerting rules](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules):
|
||||
@@ -396,7 +396,7 @@ before sending them to the configured `-remoteWrite.url`. The deduplication can
|
||||
|
||||
Labels can be dropped before deduplication is applied. See [these docs](#dropping-unneeded-labels).
|
||||
|
||||
Stream aggregation deduplication is applied before aggregation rules, so duplicate samples are dropped before aggregation.
|
||||
Stream aggregation deduplication is applied before aggregation rules, so duplicate samples are dropped before aggregation. The dropped old samples can be tracked with the `vm_streamaggr_dedup_dropped_samples_total` metric.
|
||||
|
||||
# Relabeling
|
||||
|
||||
@@ -444,7 +444,9 @@ outside the current [aggregation interval](https://docs.victoriametrics.com/vict
|
||||
|
||||
- To enable [aggregation windows](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#aggregation-windows).
|
||||
|
||||
The dropped old samples can be tracked with the `vm_streamaggr_ignored_samples_total{reason="too_old"}` metric.
|
||||
- To enable [deduplication](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication).
|
||||
|
||||
The dropped old samples can be tracked with the `vm_streamaggr_ignored_samples_total{reason="too_old"}` and `vm_streamaggr_dedup_dropped_samples_total` metrics.
|
||||
|
||||
## Ignore aggregation intervals on start
|
||||
|
||||
@@ -622,13 +624,12 @@ command line flags. See how to [shard data across remote write destinations](htt
|
||||
The following requirements must be met for sharded aggregation to work correctly:
|
||||
- All sharding vmagents should have the same deterministic sharding configuration.
|
||||
- The sharding configuration must align with the `by` and `without` lists:
|
||||
- Labels listed in `by` setting should be a subset of shard's routing key `-remoteWrite.shardByURL.labels`.
|
||||
With `-remoteWrite.shardByURL.labels=env,job` aggregator's `by` should include `by: env`, `by: job` or both: `by: [env, job]`.
|
||||
This makes sure that all the samples for the same `env` and `job` are aggregated together and produce the complete output.
|
||||
- Labels listed in `without` setting should be a superset of shard's routing key `--remoteWrite.shardByURL.ignoreLabels`.
|
||||
With `-remoteWrite.shardByURL.ignoreLabels=env,job` aggegator's `without` should include at least both labels `without: [env,job]`.
|
||||
This makes sure that `requests_total{env=test, job=foo}` and `requests_total{env=prod, job=foo}` are routed to the same aggregator
|
||||
and are aggregated together. See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938#issuecomment-2018470324).
|
||||
- Labels configured in `-remoteWrite.shardByURL.labels` must be a subset of the labels listed in `by`.
|
||||
For example, if the aggregation config specifies `by: [env, job]`, then `-remoteWrite.shardByURL.labels` may include `env`, `job`, or both.
|
||||
This ensures that all samples contributing to the same aggregation result are routed to the same aggregator instance and aggregated together to produce a complete output.
|
||||
- Labels configured in `-remoteWrite.shardByURL.ignoreLabels` must be a superset of the labels listed in `without`.
|
||||
For example, if the aggregation config specifies `without: [env, pod]`, then `-remoteWrite.shardByURL.ignoreLabels` must include at least `env` and `pod`.
|
||||
This ensures that labels removed during aggregation are not used for shard routing.
|
||||
- Aggregating vmagents should not produce collisions: the aggregation output should be unique across all the sharded agents.
|
||||
For example, `requests_total:5m_without_env_pod_total` produced by both `vmagent-aggr-1` and `vmagent-aggr-2` will collide
|
||||
unless they have labels uniquely identifying them. These labels should be either preserved during sharding and aggregation config,
|
||||
@@ -642,9 +643,9 @@ See also [why you shouldn't put an aggregator behind a load balancer](https://do
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
- [Unexpected spikes for `total` or `increase` outputs](#staleness).
|
||||
- [Unexpected spikes for `total` or `increase` outputs](#data-delay-and-staleness).
|
||||
- [Excessively large values for `total*`, `increase*`, and `rate*` outputs](#counter-resets).
|
||||
- [Lower than expected values for `total_prometheus` and `increase_prometheus` outputs](#staleness).
|
||||
- [Lower than expected values for `total_prometheus` and `increase_prometheus` outputs](#data-delay-and-staleness).
|
||||
- [High memory usage and CPU usage](#high-resource-usage).
|
||||
- [Unexpected results in vmagent cluster mode](#cluster-mode).
|
||||
- [Inaccurate aggregation results for histograms](#aggregation-windows)
|
||||
@@ -677,11 +678,19 @@ the following settings:
|
||||
|
||||
If counter-specific outputs, such as `total*`, `rate*`, and `increase*`, produce values that are significantly higher than anticipated, then check the `vm_streamaggr_counter_resets_total` metric. This metric increments each time when [counter reset event](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) happens and could be caused by duplication or collision of raw samples. If you observe duplication or collision, try solving this problem by either fixing the source of these metrics or by [deduplicating](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication) these samples before aggregation.
|
||||
|
||||
## Staleness
|
||||
## Data delay and staleness {#staleness}
|
||||
|
||||
The following outputs track the last seen per-series values in order to properly calculate output values:
|
||||
Stream aggregation processes input samples in a streaming manner and flushes results once per specified `interval`. Because of this, aggregation results can be heavily affected by data delays (see `vm_streamaggr_samples_lag_seconds_bucket` metric).
|
||||
|
||||
In particular:
|
||||
1. Stream aggregation won't produce results if input samples are delayed for multiple aggregation intervals, causing gaps in the output.
|
||||
2. Delayed and out-of-order samples can inflate or skew correctness of aggregation results.
|
||||
|
||||
Dropping delayed samples can result in missed observations in the results, while keeping delayed samples may inflate the results. It is up to the user to decide what they prefer in the produced results:
|
||||
1. If you prefer consistency in aggregation results and do not want delayed data to affect the next aggregation window, drop all potentially delayed samples via [ignore_old_samples](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples).
|
||||
2. If you prefer to have the accumulated changes from delayed data reflected in aggregation windows after the delay, increase `staleness_interval` in the [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
This is especially important for outputs that track the last seen per-series values in order to properly calculate output values:
|
||||
|
||||
- [histogram_bucket](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#histogram_bucket)
|
||||
- [increase](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase)
|
||||
- [increase_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase_prometheus)
|
||||
- [rate_avg](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#rate_avg)
|
||||
@@ -689,21 +698,19 @@ The following outputs track the last seen per-series values in order to properly
|
||||
- [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total)
|
||||
- [total_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total_prometheus)
|
||||
|
||||
The last seen per-series value is dropped if no new samples are received for the given time series during two consecutive aggregations
|
||||
intervals specified in [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config) via `interval` option.
|
||||
For these outputs, the last seen per-series value is dropped if no new samples are received for the given time series during consecutive aggregation intervals specified in the [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config) via `interval` option.
|
||||
If a new sample for the existing time series is received after that, then it is treated as the first sample for a new time series.
|
||||
This may lead to the following issues:
|
||||
This may lead to the following issues when data is delayed:
|
||||
|
||||
- Lower than expected results for [total_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total_prometheus) and [increase_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase_prometheus) outputs,
|
||||
since they ignore the first sample in a new time series.
|
||||
- Unexpected spikes for [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total) and [increase](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase) outputs, since they assume that new time series start from 0.
|
||||
- [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total) and [increase](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase) may produce unexpected spikes, since they assume that a new time series starts from `0`.
|
||||
- [total_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total_prometheus) and [increase_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase_prometheus) may produce lower than expected results, if you expect to see the accumulated changes reflected after the delay, since they ignore the first sample in a new time series.
|
||||
|
||||
These issues can be fixed in the following ways:
|
||||
These issues can be improved in the following ways:
|
||||
|
||||
- By increasing the `interval` option at [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines.
|
||||
- By specifying the `staleness_interval` option at [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines. By default, the `staleness_interval` is equal to `2 x interval`.
|
||||
delays in data ingestion pipelines. It is recommended to set `interval` to at least 2× the scrape or push interval of the input. Set it to a higher value if the input pipeline is prone to large delays.
|
||||
- By increasing the `staleness_interval` option in the [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines. By default, the `staleness_interval` is equal to `interval`.
|
||||
|
||||
## High resource usage
|
||||
|
||||
|
||||
@@ -66,6 +66,8 @@ specified individually per each `-remoteWrite.url`:
|
||||
|
||||
# interval is the interval for the aggregation.
|
||||
# The aggregated stats is sent to remote storage once per interval.
|
||||
# It is recommended to set `interval` to at least 2× the scrape or push interval of the input.
|
||||
# Set it to a higher value if the input pipeline is prone to large delays.
|
||||
#
|
||||
interval: 1m
|
||||
|
||||
@@ -94,7 +96,7 @@ specified individually per each `-remoteWrite.url`:
|
||||
# - total_prometheus
|
||||
# See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#staleness for more details.
|
||||
#
|
||||
# staleness_interval: 2m
|
||||
# staleness_interval: 1m
|
||||
|
||||
# ignore_first_sample_interval specifies the interval after which the agent begins sending samples.
|
||||
# By default, it is set to the staleness interval, and it helps reduce the initial sample load after an agent restart.
|
||||
@@ -220,6 +222,7 @@ Below are aggregation functions that can be put in the `outputs` list at [stream
|
||||
* [stddev](#stddev)
|
||||
* [stdvar](#stdvar)
|
||||
* [sum_samples](#sum_samples)
|
||||
* [sum_samples_total](#sum_samples_total)
|
||||
* [total](#total)
|
||||
* [total_prometheus](#total_prometheus)
|
||||
* [unique_samples](#unique_samples)
|
||||
@@ -291,9 +294,6 @@ The results of `histogram_bucket` is equal to the following [MetricsQL](https://
|
||||
sum(histogram_over_time(some_histogram_bucket[interval])) by (vmrange)
|
||||
```
|
||||
|
||||
Aggregating irregular and sporadic metrics (received from [Lambdas](https://aws.amazon.com/lambda/)
|
||||
or [Cloud Functions](https://cloud.google.com/functions)) can be controlled via [staleness_interval](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#staleness) option.
|
||||
|
||||
See also:
|
||||
- [quantiles](#quantiles)
|
||||
- [avg](#avg)
|
||||
@@ -506,6 +506,20 @@ See also:
|
||||
|
||||
- [count_samples](#count_samples)
|
||||
- [count_series](#count_series)
|
||||
- [sum_samples_total](#sum_samples_total)
|
||||
|
||||
### `sum_samples_total`
|
||||
|
||||
`sum_samples_total` {{% available_from "v1.146.0" %}}. sums input delta values into a cumulative [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/index.html#counter) and outputs the result at the given `interval`.
|
||||
`sum_samples_total` makes sense only for aggregating delta values from clients such as [StatsD counter](https://github.com/statsd/statsd/blob/master/docs/metric_types.md#counting).
|
||||
|
||||
The results of `sum_samples_total` is roughly equal to the following [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) query:
|
||||
|
||||
```metricsql
|
||||
sum(running_sum(some_delta_values))
|
||||
```
|
||||
|
||||
>Note: The aggregator will forget the cumulative counter if it has not seen input samples for `staleness_interval`(set to `interval` by default) per output result, so the output counter will start from `0` the next time it sees the input again. Increase the `staleness_interval` option if you want to extend the window to tolerate bigger gaps.
|
||||
|
||||
### total
|
||||
|
||||
@@ -547,6 +561,7 @@ See also:
|
||||
- [total_prometheus](#total_prometheus)
|
||||
- [increase](#increase)
|
||||
- [increase_prometheus](#increase_prometheus)
|
||||
- [sum_samples_total](#sum_samples_total)
|
||||
- [rate_sum](#rate_sum)
|
||||
- [rate_avg](#rate_avg)
|
||||
|
||||
@@ -576,6 +591,7 @@ See also:
|
||||
- [total](#total)
|
||||
- [increase](#increase)
|
||||
- [increase_prometheus](#increase_prometheus)
|
||||
- [sum_samples_total](#sum_samples_total)
|
||||
- [rate_sum](#rate_sum)
|
||||
- [rate_avg](#rate_avg)
|
||||
|
||||
|
||||
@@ -622,11 +622,13 @@ curl -Is http://localhost:8428/internal/resetRollupResultCache
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache
|
||||
curl -Is http://<vmselect>:8481/internal/resetRollupResultCache?propagate=1
|
||||
```
|
||||
|
||||
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag. If this
|
||||
flag isn't set, then cache need to be purged from each vmselect individually.
|
||||
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag when `propagate=1` argument is set.
|
||||
If this flag or the `propagate` argument isn't set, then cache need to be purged from each vmselect individually.
|
||||
|
||||
If `-search.resetCacheAuthKey` is set, it will be attached to the propagation request as query argument.
|
||||
|
||||
### TCP and UDP
|
||||
|
||||
|
||||
@@ -275,6 +275,10 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
-promscrape.cluster.replicationFactor int
|
||||
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info (default 1)
|
||||
-promscrape.cluster.shardByLabels array
|
||||
Optional list of target labels, which will be used for sharding targets among cluster members if -promscrape.cluster.membersCount is greater than 1. If none of the specified labels are found in a target, then all the target labels will be used for sharding. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-promscrape.config string
|
||||
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. The path can point to local file and to http url. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
|
||||
-promscrape.config.dryRun
|
||||
@@ -486,13 +490,13 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
-search.maxTSDBStatusTopNSeries int
|
||||
The maximum value of 'topN' argument that can be passed to /api/v1/status/tsdb API. This option allows limiting memory usage. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#tsdb-stats (default 1000)
|
||||
-search.maxTagKeys int
|
||||
The maximum number of tag keys returned from /api/v1/labels . See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
The maximum number of tag keys returned per search. See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
-search.maxTagValueSuffixesPerSearch int
|
||||
The maximum number of tag value suffixes returned from /metrics/find (default 100000)
|
||||
-search.maxTagValues int
|
||||
The maximum number of tag values returned from /api/v1/label/<label_name>/values . See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
The maximum number of tag values returned per search. See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
-search.maxUniqueTimeseries int
|
||||
The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional).
|
||||
The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect
|
||||
-search.maxWorkersPerQuery int
|
||||
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default netstorage.defaultMaxWorkersPerQuery())
|
||||
-search.minStalenessInterval duration
|
||||
|
||||
@@ -268,6 +268,39 @@ for the collected samples. Examples:
|
||||
```sh
|
||||
./vmagent -remoteWrite.url=http://remote-storage/api/v1/write -streamAggr.dropInputLabels=replica -streamAggr.dedupInterval=60s
|
||||
```
|
||||
|
||||
### Monitoring Data eXchange
|
||||
|
||||
The Monitoring Data eXchange (MDX){{% available_from "#" %}} feature allows `vmagent` to forward only VictoriaMetrics metrics to selected `-remoteWrite.url` destinations while dropping metrics from non-VictoriaMetrics services.
|
||||
|
||||
To enable MDX, set `-remoteWrite.mdx.enable=true` for the target URL and `-remoteWrite.mdx.enable=false` for other URLs:
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-all-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=false \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true
|
||||
```
|
||||
When MDX is enabled for a `-remoteWrite.url`, `vmagent` forwards only metrics that:
|
||||
- come from the target that exposes the `vm_app_version` metric (emitted by all VictoriaMetrics components)
|
||||
- contain the `victoriametrics_app=true` label, which will be added automatically to the metrics if the instance was deployed via [VictoriaMetrics Operator](https://docs.victoriametrics.com/operator/).
|
||||
|
||||
`victoriametrics_app=true` label will be added to all metrics that are preserved by MDX if it's absent.
|
||||
|
||||
- contain the label specified via `-mdx.label`.
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true \
|
||||
-mdx.label="service=victoriametrics"
|
||||
```
|
||||
In this configuration, metrics with the label `service=victoriametrics` are preserved even if their scrape targets do not expose `vm_app_version` metric.
|
||||
|
||||
The number of VictoriaMetrics metrics preserved by MDX is exposed as `vmagent_remotewrite_mdx_rows_preserved_total`.
|
||||
|
||||
The scope of MDX is at the per-url level, so it works after global level mechanisms, such as stream aggregation, relabeling, complexity limiter, and cardinality limiter. See [Life of a sample](https://docs.victoriametrics.com/victoriametrics/vmagent/#life-of-a-sample).
|
||||
|
||||
### Life of a sample
|
||||
|
||||
@@ -285,18 +318,20 @@ flowchart TB
|
||||
F --> G[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability">replicate</a> to each <b>-remoteWrite.url</b><br/>or <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages">shard</a> if <b>-remoteWrite.shardByURL</b> is set]
|
||||
|
||||
%% Left branch
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H2 --> H3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H3 --> H4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H4 --> H5[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange/">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H2 --> H3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H3 --> H4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H4 --> H5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H5 --> H6[[push to <b>-remoteWrite.url</b>]]
|
||||
|
||||
%% Right branch
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R2 --> R3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R3 --> R4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R4 --> R5[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R2 --> R3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R3 --> R4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R4 --> R5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R5 --> R6[[push to <b>-remoteWrite.url</b>]]
|
||||
```
|
||||
|
||||
Scraping has additional settings that can be applied before samples are pushed to the processing pipeline above:
|
||||
@@ -309,11 +344,11 @@ Scraping has additional settings that can be applied before samples are pushed t
|
||||
`vmagent` supports [the same set of push-based data ingestion protocols as VictoriaMetrics does](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data)
|
||||
in addition to the pull-based Prometheus-compatible targets' scraping:
|
||||
|
||||
* DataDog "submit metrics" API. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/).
|
||||
* Datadog "submit metrics" API. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/).
|
||||
* InfluxDB line protocol via `http://<vmagent>:8429/write`. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/).
|
||||
* Graphite plaintext protocol if the `-graphiteListenAddr` command-line flag is set. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#ingesting).
|
||||
* OpenTelemetry HTTP API. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/).
|
||||
* NewRelic API. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/newrelic/#sending-data-from-agent).
|
||||
* OpenTelemetry HTTP API via `http://<vmagent>:8429/opentelemetry/v1/metrics`. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/).
|
||||
* New Relic API. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/newrelic/#sending-data-from-agent).
|
||||
* OpenTSDB telnet and http protocols if `-opentsdbListenAddr` command-line flag is set. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/).
|
||||
* Zabbix Connector streaming protocol. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/zabbixconnector/#send-data-from-zabbix-connector).
|
||||
* Prometheus remote write protocol via `http://<vmagent>:8429/api/v1/write`.
|
||||
@@ -481,29 +516,38 @@ by specifying `-remoteWrite.forcePromProto` command-line flag for the correspond
|
||||
## Multitenancy
|
||||
|
||||
By default, `vmagent` collects the data without [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) identifiers
|
||||
and routes it to the remote storage specified via `-remoteWrite.url` command-line flag. The `-remoteWrite.url` can point to `/insert/<tenant_id>/prometheus/api/v1/write` path
|
||||
at `vminsert` according to [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format).
|
||||
and routes it to the remote storage specified via `-remoteWrite.url` command-line flag. Point `-remoteWrite.url` to vminsert's `/insert/<tenant_id>/prometheus/api/v1/write` path
|
||||
according to [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format).
|
||||
|
||||
> Note: the single-node version of VictoriaMetrics doesn't support multitenancy.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["requests_total{instance=foo}"] --> |/api/v1/write| V[vmagent]
|
||||
B["requests_total{instance=bar}"] <--> |scrape| V
|
||||
V --> |"/insert/#60;tenant_id#62;/#60;suffix#62;"| C[vminsert]
|
||||
A["requests_total{instance=foo}"] --> |<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#how-to-push-data-to-vmagent">push</a>| V[vmagent]
|
||||
B["requests_total{instance=bar}"] <--> |<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#how-to-collect-metrics-in-prometheus-format">pull</a>| V
|
||||
V --> |"/insert/#60;tenant_id#62;/prometheus/api/v1/write"| C[vminsert]
|
||||
```
|
||||
|
||||
In this case, all the metrics written to `/insert/tenant_id/prometheus/api/v1/write` will belong to the specified `<tenant_id>` tenant.
|
||||
In this case, all the metrics written to `/insert/<tenant_id>/prometheus/api/v1/write` will belong to the specified `<tenant_id>` tenant.
|
||||
|
||||
### Multitenancy via labels
|
||||
|
||||
vmagent can write data to multiple distinct tenants if `-remoteWrite.url` points to [multitenant URL at VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels)
|
||||
and tenant is specified via [multitenancy labels](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels):
|
||||
vmagent can write data to **multiple distinct tenants** if `-remoteWrite.url` points to the [multitenant URL in the VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels)
|
||||
and the tenant is specified via [multitenancy labels](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels):
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["requests_total{instance=foo, vm_account_id=0}"] --> |/api/v1/write| V[vmagent]
|
||||
B["requests_total{instance=bar, vm_account_id=1}"] <--> |scrape| V
|
||||
V --> |"/insert/multitenant/#60;suffix#62;"| C[vminsert]
|
||||
A["requests_total{instance=foo, vm_account_id=0}"] --> |<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#how-to-push-data-to-vmagent">push</a>| V[vmagent]
|
||||
B["requests_total{instance=bar, vm_account_id=1}"] <--> |<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#how-to-collect-metrics-in-prometheus-format">pull</a>| V
|
||||
V --> |"/insert/multitenant/prometheus/api/v1/write"| C[vminsert]
|
||||
```
|
||||
`<tenant_id>` is extracted from the `vm_account_id` and `vm_project_id` labels.
|
||||
|
||||
> A single payload pulled from or pushed to vmagent may contain time series belonging to multiple tenants.
|
||||
|
||||
When vminsert receives the data on the `/insert/multitenant` path, it extracts `<tenant_id>` from the `vm_account_id` and `vm_project_id` labels for
|
||||
each distinct time series.
|
||||
|
||||
> If `vm_account_id` or `vm_project_id` labels are missing or invalid, then the corresponding accountID and projectID are set to 0.
|
||||
|
||||
The `vm_account_id` and `vm_project_id` labels can be specified via [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) before sending the metrics to `-remoteWrite.url`.
|
||||
For example, the following relabeling rule instructs sending metrics to `<account_id>:0` tenant defined in the `prometheus.io/account_id` annotation of Kubernetes pod deployment:
|
||||
@@ -516,11 +560,12 @@ scrape_configs:
|
||||
target_label: vm_account_id
|
||||
```
|
||||
|
||||
vmagent can get tenant identifier from `__tenant_id__` label at target discovery phase.
|
||||
It implicitly converts `__tenant_id__` label into `vm_account_id` and `vm_project_id` labels and attaches
|
||||
it to the scraped metrics and metrics metadata.
|
||||
vmagent can get the tenant identifier from the `__tenant_id__` label during the target discovery phase.
|
||||
It implicitly converts the `__tenant_id__` label into `vm_account_id` and `vm_project_id` labels and attaches
|
||||
them to the scraped metrics and metrics metadata.
|
||||
|
||||
For example, the following relabeling rule instructs sending metrics to the `10:5` tenant defined in the `prometheus.io/tenant_id: 10:5` annotation of the Kubernetes pod deployment:
|
||||
For example, the following relabeling rule instructs sending metrics to the `10:5` tenant defined in the `prometheus.io/tenant_id: 10:5`
|
||||
annotation of the Kubernetes pod deployment:
|
||||
|
||||
```yaml
|
||||
scrape_configs:
|
||||
@@ -531,47 +576,54 @@ scrape_configs:
|
||||
target_label: __tenant_id__
|
||||
```
|
||||
|
||||
vmagent can [enforce adding labels](https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics) to all scraped
|
||||
or forwarded metrics.
|
||||
vmagent can also [enforce adding labels](https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics)
|
||||
on all scraped or forwarded metrics.
|
||||
|
||||
### Multitenancy via path
|
||||
|
||||
vmagent can write data to multiple distinct tenants if `-remoteWrite.url` points to [multitenant URL at VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels),
|
||||
tenant is specified in the [write path](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format), and `-enableMultitenantHandlers` command-line flag is set:
|
||||
vmagent can write data to multiple distinct tenants if:
|
||||
* its `-remoteWrite.url` points to the [VictoriaMetrics cluster multitenant URL](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels)
|
||||
* its `-enableMultitenantHandlers` command-line flag is set
|
||||
* clients ingest data into vmagent with the tenant specified in the [write path](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format)
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["requests_total{instance=foo}"] --> |/insert/0/#60;suffix#62;| V[vmagent]
|
||||
B["requests_total{instance=bar}"] --> |/insert/1/#60;suffix#62;| V
|
||||
V --> |"/insert/multitenant/#60;suffix#62;"| C[vminsert]
|
||||
A["requests_total{instance=foo}"] --> |<a href="https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format">/insert/0/#60;suffix#62;</a>| V[vmagent]
|
||||
B["requests_total{instance=bar}"] --> |<a href="https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format">/insert/1/#60;suffix#62;</a>| V
|
||||
V --> |"/insert/multitenant/prometheus/api/v1/write"| C[vminsert]
|
||||
```
|
||||
|
||||
In this configuration, vmagent accepts writes via the same multitenant endpoints (`/insert/<accountID>/<suffix>`) [as vminsert does](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format).
|
||||
For all received data, vmagent will automatically convert tenant identifiers from the URL to `vm_account_id` and `vm_project_id` labels and set tenant info in metadata.
|
||||
For all the received data, vmagent will automatically convert tenant identifiers in the URL path to `vm_account_id` and `vm_project_id` labels, and set tenant information in metadata.
|
||||
|
||||
These tenant labels are added before applying [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) specified via `-remoteWrite.relabelConfig`
|
||||
and `-remoteWrite.urlRelabelConfig` command-line flags.
|
||||
These tenant labels are added before applying [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/)
|
||||
specified via `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig` command-line flags.
|
||||
|
||||
### Multitenancy via headers
|
||||
|
||||
vmagent can write data to multiple distinct tenants if `-remoteWrite.url` points to [multitenant URL at VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels),
|
||||
tenant is specified [via headers](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-headers) {{% available_from "v1.143.0" %}}, both `-enableMultitenantHandlers` and `-enableMultitenancyViaHeaders` command-line flags are set:
|
||||
vmagent can write data to multiple distinct tenants if:
|
||||
* its `-remoteWrite.url` points to the [VictoriaMetrics cluster multitenant URL](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels)
|
||||
* its `-enableMultitenantHandlers` and `-enableMultitenancyViaHeaders` command-line flags are both set
|
||||
* clients ingest data into vmagent with the tenants specified [via headers](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-headers) {{% available_from "v1.143.0" %}}
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["requests_total{instance=foo}"] --> |/insert/#60;suffix#62; <br>--header AccountID: 0| V[vmagent]
|
||||
B["requests_total{instance=bar}"] --> |/insert/#60;suffix#62; <br>--header AccountID: 1| V
|
||||
V --> |"/insert/multitenant/#60;suffix#62;"| C[vminsert]
|
||||
A["requests_total{instance=foo}"] --> |<a href="https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-headers">/insert/#60;suffix#62;</a> <br>--header AccountID: 0| V[vmagent]
|
||||
B["requests_total{instance=bar}"] --> |<a href="https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-headers">/insert/#60;suffix#62;</a> <br>--header AccountID: 1| V
|
||||
V --> |"/insert/multitenant/prometheus/api/v1/write"| C[vminsert]
|
||||
```
|
||||
|
||||
In this configuration, vmagent accepts writes via the same simplified multitenant endpoints (`/insert/<suffix>`) [as vminsert does](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format).
|
||||
The tenant information is extracted from the `AccountID` and `ProjectID` HTTP headers, which are expected to be included in all incoming requests. If headers are missing, then the tenant is set to `0:0` as the default.
|
||||
The tenant information is extracted from the `AccountID` and `ProjectID` HTTP headers, which are expected to be included in all incoming requests.
|
||||
|
||||
For all received data, vmagent will automatically convert tenant identifiers from the headers to `vm_account_id` and `vm_project_id` labels and set tenant info in metadata.
|
||||
These tenant labels are added before applying [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) specified via `-remoteWrite.relabelConfig`
|
||||
> If headers are missing, then the tenant is set to `0:0` by default.
|
||||
|
||||
For all the received data, vmagent will automatically convert tenant identifiers in the headers to `vm_account_id` and `vm_project_id` labels, and set tenant info in metadata.
|
||||
These tenant labels are added before applying [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) specified via the `-remoteWrite.relabelConfig`
|
||||
and `-remoteWrite.urlRelabelConfig` command-line flags.
|
||||
|
||||
vmauth can [enforce adding headers](https://docs.victoriametrics.com/victoriametrics/vmauth/#modifying-http-headers) to all
|
||||
forwarded requests via `headers` param in the config file.
|
||||
forwarded requests via the `headers` parameter in the config file.
|
||||
|
||||
## Adding labels to metrics
|
||||
|
||||
@@ -797,6 +849,12 @@ For example, the following commands spread scrape targets among a cluster of two
|
||||
The `-promscrape.cluster.memberNum` can be set to a StatefulSet pod name when `vmagent` runs in Kubernetes.
|
||||
The pod name must end with a number in the range `0 ... promscrape.cluster.membersCount-1`. For example, `-promscrape.cluster.memberNum=vmagent-0`.
|
||||
|
||||
By default, targets are sharded among `vmagent` instances by all target labels after relabeling.
|
||||
Use `-promscrape.cluster.shardByLabels` {{% available_from "v1.146.0" %}} to shard targets by specified labels instead.
|
||||
For example, with `-promscrape.cluster.shardByLabels=service`, the targets with the same `service` label value will be scraped by the same `vmagent` instance,
|
||||
which is useful when perform stream aggregation that requires all metrics with the same `service` label value to be processed on the same `vmagent` instance.
|
||||
If none of the specified labels are present in the target labels, then all target labels will be used for sharding.
|
||||
|
||||
By default, each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
|
||||
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
|
||||
start a cluster of three `vmagent` instances, where two `vmagent` instances scrape each target:
|
||||
@@ -928,6 +986,29 @@ vmagent will generate the following persistent queue folders:
|
||||
2_0AAFDF53E314A72A
|
||||
```
|
||||
|
||||
### On-disk persistence and data processing order
|
||||
|
||||
By default, vmagent processes data in FIFO order. If data has been written to the on-disk queue,
|
||||
it must be flushed to the remote storage before newly ingested data can be forwarded there.
|
||||
During long outages, vmagent may accumulate large amounts of data in the file-based queue,
|
||||
which can introduce a significant lag between the moment data is collected by vmagent and the
|
||||
moment it becomes visible at the remote storage.
|
||||
|
||||
This behavior can be changed with the `-remoteWrite.inmemoryQueues` {{% available_from "v1.146.0" %}} command-line flag.
|
||||
When set to a non-zero value, vmagent starts the given number of additional workers,
|
||||
which send only recently ingested data from the in-memory queue, while the workers configured via `-remoteWrite.queues` drain the file-based backlog concurrently.
|
||||
This reduces the delivery lag for fresh samples after remote storage outages or slowdowns. The flag can be set individually per each `-remoteWrite.url`.
|
||||
|
||||
Note that these workers are started in addition to the workers configured via `-remoteWrite.queues`, so the total number of concurrent connections to
|
||||
the remote storage becomes the sum of both flags. Take this into account if the remote storage limits the number of concurrent requests.
|
||||
|
||||
This flag has the following possible limitations:
|
||||
|
||||
* Samples may arrive at the remote storage out of order, since recent data can be delivered before the older backlogged data.
|
||||
Do not use this option if the remote storage doesn't accept out-of-order samples.
|
||||
* Recent data isn't guaranteed to take the fast path: if the in-memory queue is full,
|
||||
newly ingested data is still written to the file-based queue and is delivered in FIFO order by the generic workers.
|
||||
|
||||
### Disabling On-disk persistence
|
||||
|
||||
There are cases when it is better to disable on-disk persistence for pending data on the `vmagent` side:
|
||||
|
||||
@@ -240,6 +240,10 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
-promscrape.cluster.replicationFactor int
|
||||
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info (default 1)
|
||||
-promscrape.cluster.shardByLabels array
|
||||
Optional list of target labels, which will be used for sharding targets among cluster members if -promscrape.cluster.membersCount is greater than 1. If none of the specified labels are found in a target, then all the target labels will be used for sharding. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-promscrape.config string
|
||||
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. The path can point to local file and to http url. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
|
||||
-promscrape.config.dryRun
|
||||
@@ -435,6 +439,10 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.inmemoryQueues array
|
||||
The number of additional workers per each -remoteWrite.url, which send only recently ingested data from the in-memory queue, while the file-based queue at -remoteWrite.tmpDataPath is drained by workers configured via -remoteWrite.queues. This reduces delivery lag for fresh samples when the file-based queue contains a backlog accumulated during remote storage outages. (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.keepDanglingQueues
|
||||
Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.
|
||||
-remoteWrite.label array
|
||||
|
||||
@@ -270,7 +270,7 @@ users:
|
||||
url_prefix: "http://victoria-metrics:8428/"
|
||||
```
|
||||
|
||||
JWT tokens must contain a `"vm_access": {}` claim, more on that in [JWT claim-based request templating](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating)
|
||||
The `vm_access` claim is optional starting from {{% available_from "#" %}}: when present it is used for [request templating](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating), and when absent the default tenant `0:0` is assumed for any `vm_access`-based placeholders. Routing can rely solely on other token claims via [JWT claim matching](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-matching).
|
||||
|
||||
For testing, skip signature verification with `skip_verify: true` (not recommended for production).
|
||||
|
||||
@@ -520,7 +520,8 @@ for dynamic URL rewriting based on `vm_access` claim fields.
|
||||
|
||||
`vmauth` can dynamically rewrite{{% available_from "v1.137.0" %}} upstream URLs and request headers using values from the JWT `vm_access` claim.
|
||||
This enables routing different users to different backends or tenants based solely on the JWT token,
|
||||
without maintaining separate user configs per tenant.
|
||||
without maintaining separate user configs per tenant. In addition `vm_access` claim could be defined at `jwt` section with `default_vm_access_claim` {{% available_from "#" %}}.
|
||||
In this case, if JWT token doesn't have `vm_access` claim defined, value from `default_vm_access_claim` will be used for templaing.
|
||||
|
||||
Example: minimal valid JWT. If vm_access is empty, tenant `0:0` is assumed and no additional filters are applied.
|
||||
```json
|
||||
@@ -575,6 +576,28 @@ Placeholders are supported in the following locations:
|
||||
Placeholders are **not** supported in response headers.
|
||||
They are also only valid for JWT-authenticated users — using them in configs for `username`/`password` or `bearer_token` users causes a configuration error.
|
||||
|
||||
Example: default `vm_access` claim:
|
||||
|
||||
```yaml
|
||||
users:
|
||||
- jwt:
|
||||
default_vm_access_claim:
|
||||
metrics_account_id: 10
|
||||
metrics_project_id: 10
|
||||
metrics_extra_filters:
|
||||
- '{instance="sandbox"}'
|
||||
metrics_extra_labels:
|
||||
- team=dev
|
||||
- env=dev
|
||||
public_keys:
|
||||
- |
|
||||
-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA...
|
||||
-----END PUBLIC KEY-----
|
||||
url_prefix: "http://vminsert:8480/insert/{{.MetricsAccountID}}:{{.MetricsProjectID}}/prometheus/?extra_filters={{.MetricsExtraFilters}}&extra_label={{.MetricsExtraLabels}}"
|
||||
```
|
||||
|
||||
|
||||
Example: route requests to the VictoriaMetrics single-node:
|
||||
|
||||
```yaml
|
||||
|
||||
@@ -34,9 +34,9 @@ vmctl command-line tool is available as:
|
||||
|
||||
Download and unpack vmctl:
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/vmutils-darwin-arm64-v1.145.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/vmutils-darwin-arm64-v1.146.0.tar.gz
|
||||
|
||||
tar xzf vmutils-darwin-arm64-v1.145.0.tar.gz
|
||||
tar xzf vmutils-darwin-arm64-v1.146.0.tar.gz
|
||||
```
|
||||
|
||||
Once binary is unpacked, see the full list of supported modes by running the following command:
|
||||
|
||||
@@ -46,9 +46,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -42,9 +42,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -41,9 +41,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -34,9 +34,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -49,9 +49,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -34,9 +34,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
584
docs/victoriametrics/vmestimator.md
Normal file
584
docs/victoriametrics/vmestimator.md
Normal file
@@ -0,0 +1,584 @@
|
||||
---
|
||||
weight: 12
|
||||
menu:
|
||||
docs:
|
||||
parent: victoriametrics
|
||||
weight: 12
|
||||
title: vmestimator
|
||||
tags:
|
||||
- metrics
|
||||
- cardinality
|
||||
aliases:
|
||||
- /vmestimator.html
|
||||
- /vmestimator/index.html
|
||||
- /vmestimator/
|
||||
---
|
||||
|
||||
`vmestimator` measures metrics cardinality across arbitrary label dimensions and exposes the results as metrics.
|
||||
|
||||
## Why measure?
|
||||
|
||||
Consider a setup where metrics are scraped from dozens of Prometheus targets.
|
||||
One day, a team deploys a new version of their service with a `trace_id` or `user_id` label.
|
||||
Overnight, that job's cardinality explodes from 500 to 500,000 time series.
|
||||
Suddenly, VictoriaMetrics consumes 100x more memory and disk.
|
||||
Ingestion slows down, storage struggles to keep up, and in the worst case becomes unavailable.
|
||||
|
||||
By the time someone gets paged, the damage is already done: indexes are bloated, caches are oversized, and observability across the entire system is affected.
|
||||
|
||||
`vmestimator` continuously tracks cardinality and exposes the estimation results as [metrics](https://github.com/VictoriaMetrics/vmestimator/blob/main/README.md#cardinality-metrics).
|
||||
This allows alerting on cardinality spikes within minutes and identifying the offending job directly from the alert.
|
||||
Instead of discovering the problem after it impacts the infrastructure, it becomes possible to react before it turns into an outage.
|
||||
|
||||
Per-job cardinality tracking is the most actionable use case, but it’s not the only one (see [use cases](https://github.com/VictoriaMetrics/vmestimator/#use-cases)).
|
||||
`vmestimator` can measure cardinality across arbitrary label dimensions,
|
||||
enabling use cases such as per-tenant usage analysis, long-term trend tracking, and capacity planning.
|
||||
|
||||
## Design
|
||||
|
||||
We recommend deploying `vmestimator` close to the metrics source, ideally alongside `vmagent` instances that scrape targets.
|
||||
Each `vmagent` mirrors all ingested metrics into the estimator.
|
||||
|
||||
To reduce overhead, persistent queueing and metadata ingestion can be disabled for the estimator remote write path.
|
||||
It is safe to send metrics from multiple independent `vmagent` instances into a single `vmestimator`.
|
||||
|
||||
Run vmestimator (see [configuration](https://github.com/VictoriaMetrics/vmestimator#configuration)):
|
||||
```bash
|
||||
/path/to/vmestimator -config=streams.yaml # -httpListenAddr=:8490
|
||||
```
|
||||
|
||||
Run vmagent:
|
||||
```bash
|
||||
/path/to/vmagent \
|
||||
-remoteWrite.url=http://127.0.0.1:8428/api/v1/write \
|
||||
-remoteWrite.url=http://127.0.0.1:8490/cardinality/api/v1/write \
|
||||
-remoteWrite.disableOnDiskQueue=false,true \
|
||||
-remoteWrite.disableMetadata=false,true
|
||||
```
|
||||
|
||||
The next step is to expose cardinality estimates as metrics.
|
||||
For this, `vmagent` should scrape the estimator `/metrics` endpoint and forward those metrics to a `vmsingle` instance (or another VictoriaMetrics storage).
|
||||
|
||||
<img style="min-width:0;width: 100%" src="https://github.com/user-attachments/assets/e52d9210-b6f9-457b-8d8f-1d6ff6ba1416" />
|
||||
|
||||
This setup is straightforward and introduces minimal overhead.
|
||||
The main drawback is that cardinality data shares the same storage with production metrics.
|
||||
If that storage becomes unavailable, the visibility into cardinality is lost precisely when it may be most needed.
|
||||
|
||||
To mitigate this, we recommend running a separate `vmsingle` instance dedicated to scraping and storing VictoriaMetrics-related monitoring signals only.
|
||||
This pattern is commonly referred to as a monitoring-of-monitoring (MoM) setup.
|
||||
In this architecture, `vmestimator` metrics are isolated from production observability storage,
|
||||
ensuring cardinality visibility remains available even during incidents affecting the primary monitoring system.
|
||||
|
||||
The resulting topology looks like this:
|
||||
<img style="min-width:0;width: 100%" src="https://github.com/user-attachments/assets/e2ca4a69-e931-47a1-9d91-99749382d4a9" />
|
||||
|
||||
## Install
|
||||
|
||||
Create a `streams.yaml` from [example config](https://github.com/VictoriaMetrics/vmestimator/blob/main/streams.yaml).
|
||||
Run the Docker image from [Docker Hub](https://hub.docker.com/r/victoriametrics/vmestimator) or [Quay](https://quay.io/repository/victoriametrics/vmestimator), mounting your config file:
|
||||
```bash
|
||||
docker run --rm \
|
||||
-p 8490:8490 \
|
||||
-v /path/to/streams.yaml:/streams.yaml \
|
||||
docker.io/victoriametrics/vmestimator:latest \
|
||||
-config=/streams.yaml
|
||||
```
|
||||
|
||||
See [Use Cases](https://github.com/VictoriaMetrics/vmestimator#use-cases) for more configuration examples and
|
||||
[Command-line flags](https://github.com/VictoriaMetrics/vmestimator#command-line-flags) for all available options.
|
||||
|
||||
To build from sources, see [How to build from sources](https://github.com/VictoriaMetrics/vmestimator#how-to-build-from-sources).
|
||||
|
||||
## Configuration
|
||||
|
||||
To run vmestimator a `streams.yaml` config has to be provided (see [example config](https://github.com/VictoriaMetrics/vmestimator/blob/main/streams.yaml)):
|
||||
|
||||
```bash
|
||||
/path/to/vmestimator -config=streams.yaml # -httpListenAddr=:8490
|
||||
```
|
||||
|
||||
Config reference:
|
||||
```yaml
|
||||
streams:
|
||||
-
|
||||
# The measurement window: how long unique series are retained before the HLL sketch resets.
|
||||
# Increases are always reflected immediately. Interval only controls how fast the estimate
|
||||
# drops after previously seen series disappear.
|
||||
#
|
||||
# Running two streams with different intervals (e.g. 5m and 1h) lets you derive churn rate
|
||||
# by comparing their estimates. See Use Cases -> Churn Rate
|
||||
#
|
||||
# default: 5m
|
||||
interval: 'golang duration'
|
||||
|
||||
# Label names used to split the cardinality estimate into per-combination groups.
|
||||
# Each distinct combination of values for these labels gets its own estimate metric.
|
||||
# Omit entirely for a single global estimate across all series.
|
||||
# Examples:
|
||||
# - ["job"]
|
||||
# - ["__name__"]
|
||||
# - ["vm_account_id","vm_project_id"]
|
||||
#
|
||||
# default: none (single global estimate)
|
||||
group_by: 'string array'
|
||||
|
||||
# Maximum number of distinct groups (HLL sketches) to track.
|
||||
# Once the limit is reached, excess groups are counted in a single shared "rejected" sketch
|
||||
# rather than getting their own entry. Acts as a memory cap and a safeguard against OOM
|
||||
# when the group_by label values grow unboundedly.
|
||||
# Memory upper bound per stream:
|
||||
# group_limit * 2^hll_precision bytes.
|
||||
#
|
||||
# default: 10000
|
||||
group_limit: 'integer'
|
||||
|
||||
# Number of shards used to reduce lock contention during parallel ingestion.
|
||||
# Slightly increases memory for global streams (no group_by); negligible otherwise.
|
||||
# Leave at the default unless you have profiled lock contention or have a specific reason to change it.
|
||||
#
|
||||
# default: min(64, 2*availableCPUs)
|
||||
buckets: 'integer'
|
||||
|
||||
# HyperLogLog precision p, in range [4..18].
|
||||
# Determines the number of registers m = 2^p and the relative error 1.04 / sqrt(m):
|
||||
# p=14 → m=16 384, error ~0.81%, memory ~16 KB per sketch (default, suits most cases)
|
||||
# p=18 → m=262 144, error ~0.20%, memory ~256 KB per sketch (billing-grade accuracy)
|
||||
# p=10 → m=1 024, error ~3.25%, memory ~1 KB per sketch (thousands of groups, memory-tight)
|
||||
# See more in https://research.google.com/pubs/archive/40671.pdf
|
||||
#
|
||||
# default: 14
|
||||
hll_precision: 'integer'
|
||||
|
||||
# Whether to use the sparse HyperLogLog representation for low-cardinality groups.
|
||||
# Sparse mode uses far less memory until a group's cardinality reaches ~2^(p-1),
|
||||
# at which point it automatically promotes to the dense representation.
|
||||
# See more in https://research.google.com/pubs/archive/40671.pdf
|
||||
#
|
||||
# default: true
|
||||
hll_sparse: 'boolean'
|
||||
|
||||
# Static labels attached to every output metric produced by this stream entry.
|
||||
# Useful when multiple vmestimator instances feed the same storage and you need
|
||||
# to distinguish their estimates in dashboards and alerts.
|
||||
labels: 'map key string: value string'
|
||||
```
|
||||
|
||||
## Cardinality Metrics
|
||||
|
||||
Cardinality estimates are exposed as the `cardinality_estimate` metric.
|
||||
All metrics include `interval`, `group_by_keys`, `group_by_values`, and any static labels defined in the stream config.
|
||||
|
||||
For global estimates (no `group_by` configured), `group_by_keys` is `__global__` and `group_by_values` is omitted:
|
||||
```
|
||||
cardinality_estimate{interval="1h0m0s",group_by_keys="__global__"} 142300
|
||||
```
|
||||
|
||||
For grouped estimates, one summary line shows the total number of distinct groups `group_by_keys="__group__"`, followed by one line per distinct label value combination.
|
||||
Each per-group line also includes individual `by_{key}="{val}"` labels:
|
||||
```
|
||||
cardinality_estimate{interval="5m0s",group_by_keys="__group__",group_by_values="instance,job"} 2
|
||||
cardinality_estimate{interval="5m0s",group_by_keys="instance,job",group_by_values="host1:9090,prometheus",by_instance="host1:9090",by_job="prometheus"} 312
|
||||
cardinality_estimate{interval="5m0s",group_by_keys="instance,job",group_by_values="host2:9100,node",by_instance="host2:9100",by_job="node"} 87
|
||||
```
|
||||
|
||||
Note: the total distinct group count in the summary line may exceed the number of per-group lines when `group_limit` is reached
|
||||
and excess groups are counted in a single shared "rejected" sketch rather than getting their own entry.
|
||||
|
||||
By default, cardinality estimates are merged with the estimator's operational metrics and exposed at `/metrics`.
|
||||
This is controlled by the `-cardinalityMetrics.exposeAt` flag:
|
||||
- `-cardinalityMetrics.exposeAt=/metrics` (default): cardinality metrics merged with operational metrics at `/metrics`
|
||||
- `-cardinalityMetrics.exposeAt=/cardinality/metrics`: cardinality metrics exposed at separate path
|
||||
- `-cardinalityMetrics.exposeAt=`: cardinality metrics not exposed via HTTP
|
||||
|
||||
Computing cardinality estimates is expensive, so results are cached.
|
||||
Cache duration is controlled by `-cardinalityMetrics.cacheTTL` (default: `30s`).
|
||||
Set to `0` to disable caching entirely.
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Basic
|
||||
|
||||
Global cardinality:
|
||||
```yaml
|
||||
# streams.yaml
|
||||
|
||||
- interval: '5m'
|
||||
```
|
||||
|
||||
Per metric name cardinality:
|
||||
```yaml
|
||||
# streams.yaml
|
||||
|
||||
- interval: '5m'
|
||||
group_by: ['__name__']
|
||||
```
|
||||
|
||||
Per job label cardinality:
|
||||
```yaml
|
||||
# streams.yaml
|
||||
|
||||
- interval: '5m'
|
||||
group_by: ['job']
|
||||
```
|
||||
|
||||
Per tenant cardinality:
|
||||
```yaml
|
||||
# streams.yaml
|
||||
|
||||
- interval: '5m'
|
||||
group_by: ['vm_account_id', 'vm_project_id']
|
||||
```
|
||||
|
||||
### Churn calculation
|
||||
|
||||
[Churn rate](https://valyala.medium.com/prometheus-storage-technical-terms-for-humans-4ab4de6c3d48#churn-rate) measures how quickly time series are created and disappear.
|
||||
[High churn](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate) means many series appear briefly and are replaced by new ones.
|
||||
This puts pressure on storage, because each new series must be indexed regardless of how short its lifetime is.
|
||||
|
||||
To measure churn, configure two streams with the same `group_by` but different intervals. A short one (`15m`) and a long one (`30m`):
|
||||
```yaml
|
||||
# streams.yaml
|
||||
|
||||
- interval: '15m'
|
||||
group_by: ['job']
|
||||
|
||||
- interval: '30m'
|
||||
group_by: ['job']
|
||||
```
|
||||
|
||||
When churn is low, both estimates are roughly equal.
|
||||
When churn is high, the `30m` estimate grows significantly larger than the `15m` estimate, because the long window accumulates series that have already disappeared.
|
||||
|
||||
The following query computes the churn ratio per job:
|
||||
```
|
||||
(
|
||||
sum(
|
||||
max(cardinality_estimate{group_by_keys="job",interval="30m0s"}) without (instance)
|
||||
) by (group_by_keys,group_by_values)
|
||||
-
|
||||
sum(
|
||||
max(cardinality_estimate{group_by_keys="job",interval="15m0s"}) without (instance)
|
||||
) by (group_by_keys,group_by_values)
|
||||
)
|
||||
/
|
||||
sum(
|
||||
max(cardinality_estimate{group_by_keys="job",interval="30m0s"}) without (instance)
|
||||
) by (group_by_keys,group_by_values) * 100
|
||||
```
|
||||
|
||||
A result near `0` means the series set is stable. The same series were active throughout the entire hour.
|
||||
A result near `1` means complete churn. Entirely different series appeared each 5-minute window.
|
||||
Values in between indicate the fraction of maximum possible churn that is occurring.
|
||||
|
||||
This helps identify jobs that create the most indexing pressure on storage, even when their current active cardinality appears moderate.
|
||||
|
||||
### Alerting
|
||||
|
||||
Pre-built alert rules for cardinality monitoring are available in
|
||||
[deployment/docker/rules/alerts-cardinality.yml](https://github.com/VictoriaMetrics/vmestimator/blob/main/deployment/docker/rules/alerts-cardinality.yml).
|
||||
|
||||
They require two streams with the same `group_by` but different intervals to also support churn detection:
|
||||
```yaml
|
||||
# streams.yaml
|
||||
# or use example config:
|
||||
# https://github.com/VictoriaMetrics/vmestimator/blob/main/streams.yaml
|
||||
|
||||
- interval: '15m'
|
||||
group_by: ['job']
|
||||
|
||||
- interval: '30m'
|
||||
group_by: ['job']
|
||||
```
|
||||
|
||||
The included alerts are:
|
||||
|
||||
- **JobTooHighCardinality** — fires when any job exceeds 20,000 estimated active series over the last 30 minutes.
|
||||
The threshold is a starting point and should be calibrated to reflect the expected cardinality of your largest jobs.
|
||||
|
||||
- **JobTooHighChurnRate** — fires when more than 10% of a job's series churned between the 15m and 30m windows.
|
||||
Catches jobs that generate continuous indexing pressure even when their active series count looks moderate.
|
||||
|
||||
- **CardinalityGroupLimitNearlyReached** — fires when the number of tracked groups exceeds 80% of the configured `group_limit`.
|
||||
Acts as an early warning that some label value combinations may soon be dropped from individual tracking.
|
||||
|
||||
- **CardinalityGroupLimitReached** — fires when groups are actively rejected because `group_limit` is full.
|
||||
At this point, some label combinations are being counted in a shared "rejected" sketch rather than tracked individually.
|
||||
|
||||
All alerts link to the [Cardinality Explorer dashboard](https://play-grafana.victoriametrics.com/d/mktd5h8/).
|
||||
|
||||
## Alternative solutions
|
||||
|
||||
### PromQL
|
||||
|
||||
Cardinality can be estimated with PromQL.
|
||||
|
||||
Global cardinality:
|
||||
```
|
||||
count({__name__=~".*"})
|
||||
```
|
||||
|
||||
Top ten metric names by cardinality:
|
||||
```
|
||||
topk(10, count({__name__=~".*"}) by (__name__))
|
||||
```
|
||||
|
||||
Top ten jobs by cardinality:
|
||||
```
|
||||
topk(10, count({__name__=~".*"}) by (job))
|
||||
```
|
||||
|
||||
This approach works for small setups but does not scale well, because these queries scan the entire time series set.
|
||||
Most critically, if the storage is overloaded or unavailable, these queries could not be executed.
|
||||
|
||||
### Cardinality Explorer
|
||||
|
||||
VictoriaMetrics includes a built-in [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer).
|
||||
It provides per-metric detail beyond raw series counts: query frequency, last access time, day-over-day change, and share of total cardinality.
|
||||
It is well suited for in-depth, ad-hoc investigation.
|
||||
For example, finding metrics that are high-cardinality but rarely queried,
|
||||
so they can be [dropped via relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/#how-to-drop-metrics-during-scrape) or reduce cardinality with [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/).
|
||||
|
||||
Both tools serve different purposes and work well together.
|
||||
Use `vmestimator` for continuous monitoring, alerting, and cross-cluster cardinality tracking.
|
||||
Use the cardinality explorer when you need to drill into a specific metric or label and understand what is driving its cardinality.
|
||||
|
||||
## Cluster
|
||||
|
||||
`vmestimator` supports a clustered deployment for high availability or when CPU on a single instance becomes a limiting factor.
|
||||
|
||||
Instances are split into two roles: **storage nodes** accept Prometheus remote write and maintain local HyperLogLog sketches; **selector nodes** query all storage nodes, merge their sketches, and expose a unified cardinality estimate. Cardinality estimate results should be scraped from selector nodes.
|
||||
|
||||
<img style="min-width:0;width: 100%" src="https://github.com/user-attachments/assets/846e5f77-378a-44dc-a4c8-2a1c64eca9d8" />
|
||||
|
||||
**Storage nodes:**
|
||||
```
|
||||
vmestimator -config=streams.yaml -httpListenAddr=:8491 -cardinalityMetrics.exposeAt=/cardinality/metrics
|
||||
vmestimator -config=streams.yaml -httpListenAddr=:8492 -cardinalityMetrics.exposeAt=/cardinality/metrics
|
||||
vmestimator -config=streams.yaml -httpListenAddr=:8493 -cardinalityMetrics.exposeAt=/cardinality/metrics
|
||||
```
|
||||
|
||||
**Selector nodes:**
|
||||
```
|
||||
vmestimator -storageNode=http://vmestimator-storage-1:8491 \
|
||||
-storageNode=http://vmestimator-storage-2:8492 \
|
||||
-storageNode=http://vmestimator-storage-3:8493 \
|
||||
-httpListenAddr=:8490
|
||||
```
|
||||
|
||||
Setting `-cardinalityMetrics.exposeAt=/cardinality/metrics` on storage nodes keeps per-node estimates off the default `/metrics` path. The `/metrics` endpoint then returns only operational metrics, while `/cardinality/metrics` exposes the node's local estimate — useful for inspecting or debugging a specific node.
|
||||
|
||||
A selector with `-storageNode` flags and no `-config` runs without local estimators and only merges remote data.
|
||||
|
||||
When multiple selector nodes are scraped, each returns a fully merged estimate.
|
||||
Deduplicate at query time to avoid overcounting:
|
||||
```
|
||||
max(cardinality_estimate) without (instance)
|
||||
```
|
||||
|
||||
## Operational metrics
|
||||
|
||||
When grouping is enabled, vmestimator exposes per-bucket operational metrics at `/metrics`:
|
||||
|
||||
- `vmestimator_estimator_group_size{group_by_keys, bucket}` — number of active groups in this bucket after the last rotation
|
||||
- `vmestimator_estimator_group_rejected_size{group_by_keys}` — estimated number of distinct group values rejected since the last rotation because `group_limit` was reached
|
||||
- `vmestimator_estimator_group_limit{group_by_keys, bucket}` — configured `group_limit` for this bucket
|
||||
|
||||
|
||||
## Dashboards
|
||||
|
||||
Two Grafana dashboards are available in the [dashboards](https://github.com/VictoriaMetrics/vmestimator/tree/main/dashboards) directory:
|
||||
|
||||
- [VictoriaMetrics - vmestimator](https://play-grafana.victoriametrics.com/d/mkv22l4/victoriametrics-vmestimator) — application health: CPU, memory, ingestion rates, concurrent inserts, and group key saturation.
|
||||
<img width="1507" height="801" alt="Screenshot 2026-06-29 at 19 06 46" src="https://github.com/user-attachments/assets/cbfd979d-f403-4270-b098-2d2f0b392172" />
|
||||
|
||||
- [VictoriaMetrics - Cardinality Explorer](https://play-grafana.victoriametrics.com/d/mktd5h8/victoriametrics-cardinality-explorer) — cardinality analysis: global estimates, per-group-key series counts, and top-10 highest-cardinality label value combinations.
|
||||
<img width="1510" height="796" alt="Screenshot 2026-06-29 at 19 05 47" src="https://github.com/user-attachments/assets/a1aea6e1-8714-4d5a-a629-8bdee978f1c6" />
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended to use the [docker images](https://hub.docker.com/r/victoriametrics/vmestimator).
|
||||
|
||||
Development build:
|
||||
1. [Install Go](https://golang.org/doc/install).
|
||||
1. Run `make vmestimator` from the root folder of [the repository](https://github.com/VictoriaMetrics/vmestimator).
|
||||
It builds `vmestimator` binary and places it into the `bin` folder.
|
||||
|
||||
Production build:
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
1. Run `make vmestimator-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/vmestimator).
|
||||
It builds `vmestimator-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
Building docker images:
|
||||
|
||||
Run `make package-vmestimator`. It builds `victoriametrics/vmestimator:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmestimator`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image by setting it via `<ROOT_IMAGE>` environment variable.
|
||||
For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```sh
|
||||
ROOT_IMAGE=scratch make package-vmestimator
|
||||
```
|
||||
|
||||
You can build and publish to your own registry and namespace:
|
||||
```
|
||||
DOCKER_REGISTRIES=ghcr.io DOCKER_NAMESPACE=foo make publish-vmestimator
|
||||
```
|
||||
|
||||
## Command-line flags
|
||||
|
||||
Run `vmestimator -help` in order to see all the available options:
|
||||
|
||||
```
|
||||
Usage of ./bin/vmestimator:
|
||||
-cardinalityMetrics.cacheTTL duration
|
||||
Duration for caching cardinality metrics response (default 30s)
|
||||
-cardinalityMetrics.exposeAt string
|
||||
HTTP path for exposing cardinality metrics. If set to the default /metrics, cardinality metrics are merged with regular metrics and exposed together. If set to a different path, only cardinality metrics are exposed at that endpoint. If set to an empty value, cardinality metrics are not exposed via HTTP at all. (default "/metrics")
|
||||
-config string
|
||||
Path to YAML configuration file. Must be set unless -storageNode is specified. See https://github.com/VictoriaMetrics/vmestimator/blob/main/streams.yaml for config example
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey value
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path
|
||||
-fs.maxConcurrency int
|
||||
The maximum number of concurrent goroutines to work with files; smaller values may help reducing Go scheduling latency on systems with small number of CPU cores; higher values may help reducing data ingestion latency on systems with high-latency storage such as NFS or Ceph (default 160)
|
||||
-http.connTimeout duration
|
||||
Incoming connections to -httpListenAddr are closed after the configured timeout. This may help evenly spreading load among a cluster of services behind TCP-level load balancer. Zero value disables closing of incoming connections (default 2m0s)
|
||||
-http.disableCORS
|
||||
Disable CORS for all origins (*)
|
||||
-http.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive for incoming connections at -httpListenAddr
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header, recommended: "default-src 'self'"
|
||||
-http.header.disableServerHostname
|
||||
Whether to disable 'X-Server-Hostname' header in HTTP responses
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header, recommended: 'max-age=31536000; includeSubDomains'
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password value
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
Flag value can be read from the given file when using -httpAuth.password=file:///abs/path/to/file or -httpAuth.password=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -httpAuth.password=http://host/path or -httpAuth.password=https://host/path
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr array
|
||||
TCP address to listen for incoming HTTP requests
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 5000)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Set higher value when clients send data over slow networks. Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage. See also -insert.maxQueueDuration (default 20)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage. The process may behave unexpectedly if this flag is set too small (e.g., 1 byte).
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metrics.exposeMetadata
|
||||
Whether to expose TYPE and HELP metadata at the /metrics page, which is exposed at -httpListenAddr . The metadata may be needed when the /metrics page is consumed by systems, which require this information. For example, Managed Prometheus in Google Cloud - https://cloud.google.com/stackdriver/docs/managed-prometheus/troubleshooting#missing-metric-type
|
||||
-metricsAuthKey value
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -metricsAuthKey=file:///abs/path/to/file or -metricsAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -metricsAuthKey=http://host/path or -metricsAuthKey=https://host/path
|
||||
-pprofAuthKey value
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -pprofAuthKey=file:///abs/path/to/file or -pprofAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -pprofAuthKey=http://host/path or -pprofAuthKey=https://host/path
|
||||
-pushmetrics.disableCompression
|
||||
Whether to disable request body compression when pushing metrics to every -pushmetrics.url
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to every -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.header array
|
||||
Optional HTTP request header to send to every -pushmetrics.url . For example, -pushmetrics.header='Authorization: Basic foobar' adds 'Authorization: Basic foobar' header to every request to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to every -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-secret.flags array
|
||||
Comma-separated list of flag names with secret values. Values for these flags are hidden in logs and on /metrics page
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-storageNode array
|
||||
HTTP URLs of remote vmestimator nodes to query for cardinality snapshots, e.g. http://vmestimator-2:8490
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tls array
|
||||
Whether to enable TLS for incoming HTTP requests at the given -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set. See also -mtls
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-tlsCertFile array
|
||||
Path to file with TLS certificate for the corresponding -httpListenAddr if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsKeyFile array
|
||||
Path to file with TLS key for the corresponding -httpListenAddr if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsMinVersion array
|
||||
Optional minimum TLS version to use for the corresponding -httpListenAddr if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
|
||||
6
go.mod
6
go.mod
@@ -7,11 +7,11 @@ require (
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.22.0
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.14.0
|
||||
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.8.0
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.51.1-0.20260624061259-dc94972a8708
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3
|
||||
github.com/VictoriaMetrics/metrics v1.43.2
|
||||
github.com/VictoriaMetrics/metricsql v0.87.1
|
||||
github.com/VictoriaMetrics/metrics v1.44.0
|
||||
github.com/VictoriaMetrics/metricsql v0.87.2
|
||||
github.com/aws/aws-sdk-go-v2 v1.42.0
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.25
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.27
|
||||
|
||||
14
go.sum
14
go.sum
@@ -52,18 +52,16 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapp
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.57.0/go.mod h1:YqwkQPrWSC7+byyc1VlKbWLBF5JsW5IoL6xUkemYSXk=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.50.1-0.20260415124154-6b7a6357aec0 h1:2x1Tszv41PnCdSMumEtejz/On1RQ45kHQ+hhKT53sOk=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.50.1-0.20260415124154-6b7a6357aec0/go.mod h1:fQtmzaSUL+HJmHozeAKmnTJTOMBT+vBccv/VWQEwhUQ=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3 h1:3eP8RRZitbga5EYiQ3IANrMPxpBwMAX4VA6akDaXwpU=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3/go.mod h1:H4sDxcvk6OmC6zOt++IlDyrwfbn4F1eSLwMpR+kpRt8=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.51.1-0.20260624061259-dc94972a8708 h1:D9/Jzlm3B8PBnrWxg4ft8KYZdG607dV3lpBfPCoiJD8=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.51.1-0.20260624061259-dc94972a8708/go.mod h1:H4sDxcvk6OmC6zOt++IlDyrwfbn4F1eSLwMpR+kpRt8=
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0 h1:FJT9uNXA2isppFuJErbLqD306KoFlehl7Wn2dg/6oIE=
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0/go.mod h1:QlGlzaJnDfFd8Lk6Ci/fuLxfTo3/GThPs2KH23mv710=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3 h1:rBabE0iIxcqKEMCwUmwHZ9dgEqXerg8FRbRDUvC7OVc=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3/go.mod h1:hHXhl4DA2fTL2HTZDJFXWgW0LNjo6B+4aj2Wmng3TjU=
|
||||
github.com/VictoriaMetrics/metrics v1.43.2 h1:+8pIQEGwchKS5CYFyvv3LKvNXGi7baZ9hmIV4RHqibY=
|
||||
github.com/VictoriaMetrics/metrics v1.43.2/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.1 h1:GdIblCDgXsrBJcBSDtFT8SLK7P+QHijdQmcr4L/f0Go=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.1/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VictoriaMetrics/metrics v1.44.0 h1:Fr8yqQSV+ZfYaDD/anqk1E8e9YPgfleSleJmAI0M0Tw=
|
||||
github.com/VictoriaMetrics/metrics v1.44.0/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.2 h1:7OsrcDBWREWKqqpnFyIUEOM4FNv2qHvCoww2GYz3Tc0=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.2/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
||||
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0=
|
||||
|
||||
@@ -91,6 +91,11 @@ func (r *Restore) Run(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot list src parts: %w", err)
|
||||
}
|
||||
for _, srcPart := range srcParts {
|
||||
if !srcPart.IsLocalPathInsideDir(r.Dst.Dir) {
|
||||
return fmt.Errorf("part file %s would be written outside storage directory %s", srcPart.Path, r.Dst.Dir)
|
||||
}
|
||||
}
|
||||
logger.Infof("obtaining list of parts at %s", dst)
|
||||
dstParts, err := dst.ListParts()
|
||||
if err != nil {
|
||||
|
||||
@@ -120,6 +120,17 @@ func (p *Part) ParseFromRemotePath(remotePath string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// IsLocalPathInsideDir returns true if the part's local path resolves inside dir.
|
||||
// It resolves ../../ sequences and prevents path traversal outside dir.
|
||||
func (p *Part) IsLocalPathInsideDir(dir string) bool {
|
||||
dir = filepath.Clean(dir)
|
||||
if dir == `/` {
|
||||
return true
|
||||
}
|
||||
|
||||
return strings.HasPrefix(p.LocalPath(dir), dir+string(filepath.Separator))
|
||||
}
|
||||
|
||||
// MaxPartSize is the maximum size for each part.
|
||||
//
|
||||
// The MaxPartSize reduces bandwidth usage during retires on network errors
|
||||
|
||||
54
lib/backup/common/part_test.go
Normal file
54
lib/backup/common/part_test.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIsLocalPathInsideDir(t *testing.T) {
|
||||
f := func(dir, path string, expected bool) {
|
||||
t.Helper()
|
||||
p := Part{Path: path}
|
||||
if got := p.IsLocalPathInsideDir(dir); got != expected {
|
||||
t.Fatalf("IsLocalPathInsideDir(%q, %q): got %v, want %v", dir, path, got, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// normal path inside dir
|
||||
f("/data/storage", "parts/segment1/data.bin", true)
|
||||
|
||||
// dir with trailing slash is normalized
|
||||
f("/data/storage/", "parts/segment1/data.bin", true)
|
||||
|
||||
// deeply nested path
|
||||
f("/data/storage", "a/b/c/d/e/file.dat", true)
|
||||
|
||||
// traversal that stays inside dir
|
||||
f("/data/storage", "foo/../bar/file.dat", true)
|
||||
|
||||
// root dir allows any path
|
||||
f("/", "any/path/here", true)
|
||||
|
||||
// root dir allows traversal attempts since nothing is outside /
|
||||
f("/", "../outside/marker.txt", true)
|
||||
|
||||
// path with leading slash is treated as relative by filepath.Join and stays inside dir
|
||||
f("/data/storage", "/outside/marker.txt", true)
|
||||
|
||||
// dir with .. components is normalized; path inside resolved dir
|
||||
f("/data/storage/../foo", "parts/file.dat", true)
|
||||
|
||||
// dir with .. components is normalized; traversal outside resolved dir
|
||||
f("/data/storage/../foo", "../storage/evil.txt", false)
|
||||
|
||||
// simple traversal
|
||||
f("/data/storage", "../outside/marker.txt", false)
|
||||
|
||||
// traversal with trailing slash in dir
|
||||
f("/data/storage/", "../outside/marker.txt", false)
|
||||
|
||||
// deep traversal
|
||||
f("/data/storage", "a/../../outside/marker.txt", false)
|
||||
|
||||
// sibling directory whose name shares a prefix with dir
|
||||
f("/data/storage", "../storagefoo/evil.txt", false)
|
||||
}
|
||||
@@ -129,6 +129,10 @@ func (fs *FS) NewReadCloser(p common.Part) (io.ReadCloser, error) {
|
||||
// On platforms with preallocation, writes go to a .tmp file that must be
|
||||
// finalized with FinalizeFile.
|
||||
func (fs *FS) NewDirectWriteCloser(p common.Part) (io.WriteCloser, error) {
|
||||
if !p.IsLocalPathInsideDir(fs.Dir) {
|
||||
logger.Fatalf("BUG: part file %s would be written outside storage directory %s", p.Path, fs.Dir)
|
||||
}
|
||||
|
||||
path := fs.writePath(p)
|
||||
if err := fs.mkdirAll(path); err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -58,10 +58,13 @@ var (
|
||||
|
||||
disableKeepAlive = flag.Bool("http.disableKeepAlive", false, "Whether to disable HTTP keep-alive for incoming connections at -httpListenAddr")
|
||||
disableResponseCompression = flag.Bool("http.disableResponseCompression", false, "Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth")
|
||||
maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, `The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown`)
|
||||
shutdownDelay = flag.Duration("http.shutdownDelay", 0, `Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers`)
|
||||
idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
|
||||
connTimeout = flag.Duration("http.connTimeout", 2*time.Minute, "Incoming connections to -httpListenAddr are closed after the configured timeout. "+
|
||||
maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, "The maximum duration for a graceful shutdown of the HTTP server. "+
|
||||
"During this period the server stops accepting new connections, but it will continue serving existing connections. "+
|
||||
"The remaining in-flight requests are canceled before the deadline, so the shutdown can finish within this duration. "+
|
||||
"A highly loaded server may require increased value for a graceful shutdown")
|
||||
shutdownDelay = flag.Duration("http.shutdownDelay", 0, `Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers`)
|
||||
idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
|
||||
connTimeout = flag.Duration("http.connTimeout", 2*time.Minute, "Incoming connections to -httpListenAddr are closed after the configured timeout. "+
|
||||
"This may help evenly spreading load among a cluster of services behind TCP-level load balancer. Zero value disables closing of incoming connections")
|
||||
|
||||
headerHSTS = flag.String("http.header.hsts", "", "Value for 'Strict-Transport-Security' header, recommended: 'max-age=31536000; includeSubDomains'")
|
||||
@@ -80,6 +83,7 @@ var (
|
||||
type server struct {
|
||||
shutdownDelayDeadline atomic.Int64
|
||||
s *http.Server
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// RequestHandler must serve the given request r and write response to w.
|
||||
@@ -156,7 +160,11 @@ func serve(addr string, rh RequestHandler, idx int, opts ServeOptions) {
|
||||
func serveWithListener(addr string, ln net.Listener, rh RequestHandler, disableBuiltinRoutes bool) {
|
||||
var s server
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
s.s = &http.Server{
|
||||
BaseContext: func(l net.Listener) context.Context {
|
||||
return ctx
|
||||
},
|
||||
|
||||
// Disable http/2, since it doesn't give any advantages for VictoriaMetrics services.
|
||||
TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
|
||||
@@ -170,6 +178,7 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler, disableB
|
||||
ErrorLog: log.New(&tlsErrorSkipLogger{}, "", 0),
|
||||
}
|
||||
s.s.SetKeepAlivesEnabled(!*disableKeepAlive)
|
||||
s.cancel = cancel
|
||||
if *connTimeout > 0 {
|
||||
s.s.ConnContext = func(ctx context.Context, _ net.Conn) context.Context {
|
||||
timeoutSec := connTimeout.Seconds()
|
||||
@@ -265,8 +274,18 @@ func stop(addr string) error {
|
||||
logger.Infof("Starting shutdown for http server %q", addr)
|
||||
}
|
||||
|
||||
// Cancel in-flight requests shortly before the deadline, reserving up to 2s (or 20%
|
||||
// of the window, whichever is smaller) for them to unwind, so Shutdown returns cleanly
|
||||
// within -http.maxGracefulShutdownDuration instead of timing out and dying via
|
||||
// logger.Fatalf -> os.Exit, which skips the storage flush and loses data.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/1502
|
||||
cancelInflightAfter := *maxGracefulShutdownDuration - min(*maxGracefulShutdownDuration/5, 2*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), *maxGracefulShutdownDuration)
|
||||
defer cancel()
|
||||
|
||||
t := time.AfterFunc(cancelInflightAfter, s.cancel)
|
||||
defer t.Stop()
|
||||
|
||||
if err := s.s.Shutdown(ctx); err != nil {
|
||||
return fmt.Errorf("cannot gracefully shutdown http server at %q in %.3fs; "+
|
||||
"probably, `-http.maxGracefulShutdownDuration` command-line flag value must be increased; error: %s", addr, maxGracefulShutdownDuration.Seconds(), err)
|
||||
|
||||
@@ -105,6 +105,10 @@ type body struct {
|
||||
Scope string `json:"scope,omitempty"`
|
||||
vmAccessClaim VMAccessClaim
|
||||
|
||||
// hasVMAccess is set to true when the token body contains a `vm_access` claim.
|
||||
// Presence enforcement is left to the caller via Token.HasVMAccess.
|
||||
hasVMAccess bool
|
||||
|
||||
buf []byte
|
||||
p *fastjson.Parser
|
||||
|
||||
@@ -121,7 +125,6 @@ type body struct {
|
||||
}
|
||||
|
||||
func (b *body) parse(src string) error {
|
||||
|
||||
var err error
|
||||
b.buf, err = decodeB64(b.buf[:0], src)
|
||||
if err != nil {
|
||||
@@ -132,6 +135,9 @@ func (b *body) parse(src string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if jv.Type() != fastjson.TypeObject {
|
||||
return fmt.Errorf("unexpected non json object; type: %q", jv.Type())
|
||||
}
|
||||
if expObject := jv.Get("exp"); expObject != nil {
|
||||
b.Exp, err = expObject.Int64()
|
||||
if err != nil {
|
||||
@@ -153,30 +159,31 @@ func (b *body) parse(src string) error {
|
||||
}
|
||||
|
||||
vaObject := jv.Get("vm_access")
|
||||
if vaObject == nil {
|
||||
return ErrVMAccessFieldMissing
|
||||
}
|
||||
// some IDPs encode custom claims as a string
|
||||
// try parsing as an object and fallback to a string
|
||||
switch vaObject.Type() {
|
||||
case fastjson.TypeObject:
|
||||
if err := b.vmAccessClaim.parseFrom(vaObject); err != nil {
|
||||
return err
|
||||
}
|
||||
case fastjson.TypeString:
|
||||
b.claimsParser = parserPool.Get()
|
||||
va, err := b.claimsParser.ParseBytes(vaObject.GetStringBytes())
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `vm_access` string json: %w", err)
|
||||
}
|
||||
if err := b.vmAccessClaim.parseFrom(va); err != nil {
|
||||
return fmt.Errorf("cannot parse `vm_access` values from string json: %w", err)
|
||||
}
|
||||
b.vmAccessClaimObject = va
|
||||
case fastjson.TypeNull:
|
||||
return ErrVMAccessFieldMissing
|
||||
switch {
|
||||
case vaObject == nil || vaObject.Type() == fastjson.TypeNull:
|
||||
b.hasVMAccess = false
|
||||
default:
|
||||
return fmt.Errorf("unexpected type for `vm_access` field; got: %q, want object {}", vaObject.Type())
|
||||
// some IDPs encode custom claims as a string
|
||||
// try parsing as an object and fallback to a string
|
||||
switch vaObject.Type() {
|
||||
case fastjson.TypeObject:
|
||||
if err := b.vmAccessClaim.parseFrom(vaObject); err != nil {
|
||||
return err
|
||||
}
|
||||
case fastjson.TypeString:
|
||||
b.claimsParser = parserPool.Get()
|
||||
va, err := b.claimsParser.ParseBytes(vaObject.GetStringBytes())
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `vm_access` string json: %w", err)
|
||||
}
|
||||
if err := b.vmAccessClaim.parseFrom(va); err != nil {
|
||||
return fmt.Errorf("cannot parse `vm_access` values from string json: %w", err)
|
||||
}
|
||||
b.vmAccessClaimObject = va
|
||||
default:
|
||||
return fmt.Errorf("unexpected type for `vm_access` field; got: %q, want object {}", vaObject.Type())
|
||||
}
|
||||
b.hasVMAccess = true
|
||||
}
|
||||
b.Jti = bytesutil.ToUnsafeString(jv.GetStringBytes("jti"))
|
||||
|
||||
@@ -218,6 +225,7 @@ func (b *body) reset() {
|
||||
b.buf = b.buf[:0]
|
||||
b.allClaims = nil
|
||||
b.vmAccessClaim.reset()
|
||||
b.hasVMAccess = false
|
||||
if b.p != nil {
|
||||
parserPool.Put(b.p)
|
||||
b.p = nil
|
||||
@@ -229,11 +237,9 @@ func (b *body) reset() {
|
||||
if b.vmAccessClaimObject != nil {
|
||||
b.vmAccessClaimObject = nil
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Parse parses JWT token from given source string
|
||||
//
|
||||
// Token field is valid until src is reachable
|
||||
func (t *Token) Parse(src string, enforceAuthPrefix bool) error {
|
||||
if enforceAuthPrefix && (len(src) < len(prefix) || !strings.EqualFold(src[:len(prefix)], prefix)) {
|
||||
@@ -268,6 +274,11 @@ func (t *Token) Parse(src string, enforceAuthPrefix bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// HasVMAccessClaim reports whether the parsed token contains a `vm_access` claim.
|
||||
func (t *Token) HasVMAccessClaim() bool {
|
||||
return t.body.hasVMAccess
|
||||
}
|
||||
|
||||
// Issuer returns `iss` claim value from token body
|
||||
func (t *Token) Issuer() string {
|
||||
return t.body.Iss
|
||||
@@ -371,30 +382,30 @@ func (t *Token) Reset() {
|
||||
|
||||
// VMAccessClaim represent JWT claim object
|
||||
type VMAccessClaim struct {
|
||||
MetricsExtraFilters []string `json:"metrics_extra_filters,omitempty"`
|
||||
MetricsExtraLabels []string `json:"metrics_extra_labels,omitempty"`
|
||||
LogsExtraFilters []string `json:"logs_extra_filters,omitempty"`
|
||||
LogsExtraStreamFilters []string `json:"logs_extra_stream_filters,omitempty"`
|
||||
MetricsExtraFilters []string `json:"metrics_extra_filters,omitempty" yaml:"metrics_extra_filters,omitempty"`
|
||||
MetricsExtraLabels []string `json:"metrics_extra_labels,omitempty" yaml:"metrics_extra_labels,omitempty"`
|
||||
LogsExtraFilters []string `json:"logs_extra_filters,omitempty" yaml:"logs_extra_filters,omitempty"`
|
||||
LogsExtraStreamFilters []string `json:"logs_extra_stream_filters,omitempty" yaml:"logs_extra_stream_filters,omitempty"`
|
||||
|
||||
MetricsAccountID uint32 `json:"metrics_account_id,omitempty"`
|
||||
MetricsProjectID uint32 `json:"metrics_project_id,omitempty"`
|
||||
MetricsAccountID uint32 `json:"metrics_account_id,omitempty" yaml:"metrics_account_id,omitempty"`
|
||||
MetricsProjectID uint32 `json:"metrics_project_id,omitempty" yaml:"metrics_project_id,omitempty"`
|
||||
|
||||
LogsAccountID uint32 `json:"logs_account_id,omitempty"`
|
||||
LogsProjectID uint32 `json:"logs_project_id,omitempty"`
|
||||
LogsAccountID uint32 `json:"logs_account_id,omitempty" yaml:"logs_account_id,omitempty"`
|
||||
LogsProjectID uint32 `json:"logs_project_id,omitempty" yaml:"logs_project_id,omitempty"`
|
||||
|
||||
// Properties below are deprecated and retained only for compatibility with vmgateway, which is itself deprecated.
|
||||
|
||||
// promql filters applied to each select query
|
||||
// Deprecated
|
||||
ExtraFilters []string `json:"extra_filters,omitempty"`
|
||||
ExtraFilters []string `json:"extra_filters,omitempty" yaml:"-"`
|
||||
// Deprecated
|
||||
Tenant TenantID `json:"tenant_id"`
|
||||
Tenant TenantID `json:"tenant_id" yaml:"-"`
|
||||
// role can be denied as 1 = read, 2 = write, 3 = read and write
|
||||
// 0 = unconfigured - read and write
|
||||
// Deprecated
|
||||
Mode int `json:"mode,omitempty"`
|
||||
Mode int `json:"mode,omitempty" yaml:"-"`
|
||||
// Deprecated
|
||||
Labels []string `json:"extra_labels,omitempty"`
|
||||
Labels []string `json:"extra_labels,omitempty" yaml:"-"`
|
||||
// labelsBuf holds allocated memory for Labels
|
||||
// Deprecated
|
||||
labelsBuf []byte
|
||||
@@ -425,7 +436,6 @@ func (vac *VMAccessClaim) reset() {
|
||||
}
|
||||
|
||||
func (vac *VMAccessClaim) parseFrom(jv *fastjson.Value) error {
|
||||
|
||||
if err := vac.Tenant.parseFrom(jv); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -569,6 +579,9 @@ func NewToken(auth string, enforceAuthPrefix bool) (*Token, error) {
|
||||
if err := t.parse(jwt[0], jwt[1], jwt[2]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !t.body.hasVMAccess {
|
||||
return nil, ErrVMAccessFieldMissing
|
||||
}
|
||||
return &t, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -168,17 +168,10 @@ func TestParseJWTBody_Failure(t *testing.T) {
|
||||
true,
|
||||
)
|
||||
|
||||
// invalid body type json
|
||||
// non-object body type
|
||||
f(
|
||||
`[]`,
|
||||
"missing `vm_access` claim",
|
||||
true,
|
||||
)
|
||||
|
||||
// missing vm_access claim
|
||||
f(
|
||||
`{}`,
|
||||
"missing `vm_access` claim",
|
||||
`unexpected non json object; type: "array"`,
|
||||
true,
|
||||
)
|
||||
|
||||
@@ -189,13 +182,6 @@ func TestParseJWTBody_Failure(t *testing.T) {
|
||||
true,
|
||||
)
|
||||
|
||||
// vm_access claim null
|
||||
f(
|
||||
`{"vm_access": null}`,
|
||||
"missing `vm_access` claim",
|
||||
true,
|
||||
)
|
||||
|
||||
// invalid vm_access: account_id type mismatch
|
||||
f(
|
||||
`{"vm_access": {"tenant_id": {"account_id": "1", "project_id": 5}}}`,
|
||||
@@ -555,6 +541,33 @@ func TestParseJWTBody_Success(t *testing.T) {
|
||||
)
|
||||
}
|
||||
|
||||
func TestParseJWTBody_VMAccessPresence(t *testing.T) {
|
||||
f := func(data string, wantHasVMAccess bool) {
|
||||
t.Helper()
|
||||
|
||||
encodedLen := base64.RawURLEncoding.EncodedLen(len(data))
|
||||
encoded := make([]byte, encodedLen)
|
||||
base64.RawURLEncoding.Encode(encoded, []byte(data))
|
||||
|
||||
var b body
|
||||
if err := b.parse(string(encoded)); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if b.hasVMAccess != wantHasVMAccess {
|
||||
t.Fatalf("unexpected hasVMAccess; got %v; want %v", b.hasVMAccess, wantHasVMAccess)
|
||||
}
|
||||
}
|
||||
|
||||
// vm_access claim is present
|
||||
f(`{"vm_access": {}}`, true)
|
||||
f(`{"vm_access": {"metrics_account_id": 1}}`, true)
|
||||
|
||||
// vm_access claim is absent or null - parsing must succeed with hasVMAccess=false
|
||||
f(`{}`, false)
|
||||
f(`{"vm_access": null}`, false)
|
||||
f(`{"role": "admin"}`, false)
|
||||
}
|
||||
|
||||
func TestNewTokenFromRequest_Failure(t *testing.T) {
|
||||
f := func(r *http.Request) {
|
||||
t.Helper()
|
||||
@@ -866,7 +879,6 @@ func TestNewTokenFromRequest_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTokenMatchClaims(t *testing.T) {
|
||||
|
||||
/*
|
||||
{
|
||||
"iss": "https://login.microsoftonline.com/-6691-4868-a77b-1b0f9bbe5f43/v2.0",
|
||||
|
||||
311
lib/mdx/filter.go
Normal file
311
lib/mdx/filter.go
Normal file
@@ -0,0 +1,311 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
var (
|
||||
vmLabel = flag.String("mdx.label", "", "Optional label value in the form 'name=value' used to identify VictoriaMetrics metrics for MDX. "+
|
||||
"Metrics containing the specified label are forwarded to `-remoteWrite.url` endpoints configured with `-remoteWrite.mdx.enable=true`.")
|
||||
)
|
||||
|
||||
const (
|
||||
vmAppLabelName = "victoriametrics_app"
|
||||
vmAppLabelValue = "true"
|
||||
vmAppVersionMetricName = "vm_app_version"
|
||||
)
|
||||
|
||||
// Ctx defines filtering context
|
||||
type Ctx struct {
|
||||
// labels hold modified timeseries labels
|
||||
// valid until PutContext call
|
||||
labels []prompb.Label
|
||||
|
||||
buf []byte
|
||||
hasVMAppLabel bool
|
||||
hasVMAppVersionLabel bool
|
||||
hasFilterLabelValue bool
|
||||
jobLabelValue string
|
||||
instanceLabelValue string
|
||||
}
|
||||
|
||||
func (ctx *Ctx) reset() {
|
||||
// do not reset labels intentionally
|
||||
// it must live until PutContext call
|
||||
|
||||
ctx.buf = ctx.buf[:0]
|
||||
ctx.hasVMAppLabel = false
|
||||
ctx.hasVMAppVersionLabel = false
|
||||
ctx.hasFilterLabelValue = false
|
||||
ctx.jobLabelValue = ""
|
||||
ctx.instanceLabelValue = ""
|
||||
}
|
||||
|
||||
var ctxPool = &sync.Pool{
|
||||
New: func() any {
|
||||
return &Ctx{}
|
||||
},
|
||||
}
|
||||
|
||||
// GetContext returns filtering context
|
||||
func GetContext() *Ctx {
|
||||
return ctxPool.Get().(*Ctx)
|
||||
}
|
||||
|
||||
// PutContext resets context
|
||||
func PutContext(ctx *Ctx) {
|
||||
clear(ctx.labels)
|
||||
ctx.labels = ctx.labels[:0]
|
||||
ctx.reset()
|
||||
ctxPool.Put(ctx)
|
||||
}
|
||||
|
||||
// Filter manages the list of VictoriaMetrics instances grouped by job:instance labels.
|
||||
// job and instance must present at timeseries.
|
||||
//
|
||||
// Filter keeps timeseries with any of the following conditions:
|
||||
// * vm_app_version present
|
||||
// * victoriametrics_app=true label present at timeseries
|
||||
// * if labels has label value defined with flag `-mdx.label`
|
||||
//
|
||||
// Filter track entries with TTL of 1 hour
|
||||
type Filter struct {
|
||||
tracker *instanceTracker
|
||||
filterByLabelName string
|
||||
label string
|
||||
}
|
||||
|
||||
// NewFilter returns new Filter instance
|
||||
func NewFilter() *Filter {
|
||||
filter := &Filter{
|
||||
tracker: newInstanceTracker(),
|
||||
}
|
||||
if len(*vmLabel) > 0 {
|
||||
n := strings.IndexByte(*vmLabel, '=')
|
||||
if n < 0 {
|
||||
logger.Fatalf("missing '=' in `-mdx.label`. It must contain label in the form `name=value`; got %q", *vmLabel)
|
||||
}
|
||||
filter.filterByLabelName = (*vmLabel)[:n]
|
||||
filter.label = (*vmLabel)[n+1:]
|
||||
if len(filter.filterByLabelName) == 0 || len(filter.label) == 0 {
|
||||
logger.Fatalf("label name and value cannot be empty in `-mdx.label`. It must contain label in the form `name=value`; got %q", *vmLabel)
|
||||
}
|
||||
}
|
||||
|
||||
return filter
|
||||
}
|
||||
|
||||
// VMInstancesCount returns amount of currently tracked instances
|
||||
func (filter *Filter) VMInstancesCount() int {
|
||||
return filter.tracker.len()
|
||||
}
|
||||
|
||||
// MustStop stops filter instance
|
||||
func (filter *Filter) MustStop() {
|
||||
filter.tracker.mustStop()
|
||||
}
|
||||
|
||||
// Filter filters provided timeseries with given context.
|
||||
//
|
||||
// Returned timeseries is valid as long as Ctx is valid
|
||||
func (filter *Filter) Filter(ctx *Ctx, tss []prompb.TimeSeries) []prompb.TimeSeries {
|
||||
dstTss := tss[:0]
|
||||
for _, ts := range tss {
|
||||
ctx.prepare(ts.Labels, filter.filterByLabelName, filter.label)
|
||||
key := ctx.formatTimeSeriesKey()
|
||||
if len(key) == 0 {
|
||||
// metrics with empty job or instance labels must be always dropped
|
||||
// despite any other conditions
|
||||
continue
|
||||
}
|
||||
if ctx.hasVMAppLabel {
|
||||
filter.trackInstance(key)
|
||||
dstTss = append(dstTss, ts)
|
||||
continue
|
||||
}
|
||||
if ctx.hasFilterLabelValue || ctx.hasVMAppVersionLabel {
|
||||
ts.Labels = ctx.addVMAppLabel(ts.Labels)
|
||||
filter.trackInstance(key)
|
||||
dstTss = append(dstTss, ts)
|
||||
continue
|
||||
}
|
||||
ok := filter.tracker.has(key)
|
||||
if ok {
|
||||
ts.Labels = ctx.addVMAppLabel(ts.Labels)
|
||||
dstTss = append(dstTss, ts)
|
||||
}
|
||||
}
|
||||
return dstTss
|
||||
}
|
||||
|
||||
func (filter *Filter) trackInstance(key string) {
|
||||
if filter.tracker.has(key) {
|
||||
return
|
||||
}
|
||||
key = strings.Clone(key)
|
||||
filter.tracker.register(key)
|
||||
}
|
||||
|
||||
func (ctx *Ctx) prepare(labels []prompb.Label, filterByLabelName, label string) {
|
||||
ctx.reset()
|
||||
|
||||
// always use the last label=value pair
|
||||
// because in case of possible label duplicates,
|
||||
// the last added label must win
|
||||
for _, l := range labels {
|
||||
switch l.Name {
|
||||
case "job":
|
||||
ctx.jobLabelValue = l.Value
|
||||
case "instance":
|
||||
ctx.instanceLabelValue = l.Value
|
||||
case vmAppLabelName:
|
||||
if l.Value == vmAppLabelValue {
|
||||
ctx.hasVMAppLabel = true
|
||||
}
|
||||
case "__name__":
|
||||
if l.Value == vmAppVersionMetricName {
|
||||
ctx.hasVMAppVersionLabel = true
|
||||
}
|
||||
}
|
||||
if len(filterByLabelName) > 0 {
|
||||
if l.Name == filterByLabelName && l.Value == label {
|
||||
ctx.hasFilterLabelValue = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// formatTimeSeriesKey returns timeseries key after ctx.prepare call
|
||||
// if it catched job and instances labels
|
||||
//
|
||||
// returned string is valid until next ctx.prepare
|
||||
func (ctx *Ctx) formatTimeSeriesKey() string {
|
||||
if len(ctx.jobLabelValue) == 0 || len(ctx.instanceLabelValue) == 0 {
|
||||
return ""
|
||||
}
|
||||
buf := ctx.buf[:0]
|
||||
buf = strconv.AppendQuote(buf, ctx.jobLabelValue)
|
||||
buf = append(buf, ':')
|
||||
buf = strconv.AppendQuote(buf, ctx.instanceLabelValue)
|
||||
ctx.buf = buf
|
||||
return bytesutil.ToUnsafeString(buf)
|
||||
}
|
||||
|
||||
func (ctx *Ctx) addVMAppLabel(labels []prompb.Label) []prompb.Label {
|
||||
// unconditionally add vmAppLabelValue at the end of labels list
|
||||
// it will overwrite any exist vmAppLabelName labels with a value different to vmAppLabelValue
|
||||
// it's guaranteed by VictoriaMetrics ingestion contract
|
||||
poolLabels := ctx.labels
|
||||
poolLabelsLen := len(poolLabels)
|
||||
poolLabels = append(poolLabels, labels...)
|
||||
poolLabels = append(poolLabels, prompb.Label{Name: vmAppLabelName, Value: vmAppLabelValue})
|
||||
ctx.labels = poolLabels
|
||||
return poolLabels[poolLabelsLen:len(poolLabels):len(poolLabels)]
|
||||
}
|
||||
|
||||
type instanceTracker struct {
|
||||
mu sync.RWMutex
|
||||
lastAccessByKey map[string]*atomic.Uint64
|
||||
wg sync.WaitGroup
|
||||
stop chan struct{}
|
||||
}
|
||||
|
||||
func newInstanceTracker() *instanceTracker {
|
||||
c := &instanceTracker{
|
||||
lastAccessByKey: make(map[string]*atomic.Uint64),
|
||||
stop: make(chan struct{}),
|
||||
}
|
||||
c.wg.Add(1)
|
||||
go c.startStaleWatcher()
|
||||
return c
|
||||
}
|
||||
|
||||
func (it *instanceTracker) len() int {
|
||||
it.mu.RLock()
|
||||
s := len(it.lastAccessByKey)
|
||||
it.mu.RUnlock()
|
||||
return s
|
||||
}
|
||||
|
||||
func (it *instanceTracker) has(key string) bool {
|
||||
it.mu.RLock()
|
||||
lat, ok := it.lastAccessByKey[key]
|
||||
it.mu.RUnlock()
|
||||
if ok {
|
||||
lat.Store(fasttime.UnixTimestamp())
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (it *instanceTracker) register(key string) {
|
||||
it.mu.Lock()
|
||||
// key could be registered by concurrent goroutine
|
||||
lat, ok := it.lastAccessByKey[key]
|
||||
if !ok {
|
||||
lat = &atomic.Uint64{}
|
||||
it.lastAccessByKey[key] = lat
|
||||
}
|
||||
it.mu.Unlock()
|
||||
lat.Store(fasttime.UnixTimestamp())
|
||||
}
|
||||
|
||||
func (it *instanceTracker) startStaleWatcher() {
|
||||
defer it.wg.Done()
|
||||
|
||||
t := time.NewTicker(time.Minute)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-it.stop:
|
||||
return
|
||||
case <-t.C:
|
||||
it.cleanStale()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var entryTTLSeconds = uint64(time.Hour.Seconds())
|
||||
|
||||
func (it *instanceTracker) cleanStale() {
|
||||
ct := fasttime.UnixTimestamp()
|
||||
var toDelete map[string]*atomic.Uint64
|
||||
|
||||
it.mu.RLock()
|
||||
for key, lastAccessTime := range it.lastAccessByKey {
|
||||
accessedAt := lastAccessTime.Load()
|
||||
if ct > accessedAt+entryTTLSeconds {
|
||||
if toDelete == nil {
|
||||
toDelete = make(map[string]*atomic.Uint64)
|
||||
}
|
||||
toDelete[key] = lastAccessTime
|
||||
}
|
||||
}
|
||||
it.mu.RUnlock()
|
||||
|
||||
if len(toDelete) > 0 {
|
||||
it.mu.Lock()
|
||||
for key, lastAccessTime := range toDelete {
|
||||
accessedAt := lastAccessTime.Load()
|
||||
// concurrent goroutine may refresh lastAccessTime
|
||||
if ct > accessedAt+entryTTLSeconds {
|
||||
delete(it.lastAccessByKey, key)
|
||||
}
|
||||
}
|
||||
it.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (it *instanceTracker) mustStop() {
|
||||
close(it.stop)
|
||||
it.wg.Wait()
|
||||
}
|
||||
104
lib/mdx/filter_synctest_test.go
Normal file
104
lib/mdx/filter_synctest_test.go
Normal file
@@ -0,0 +1,104 @@
|
||||
//go:build synctest
|
||||
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
func TestMdxInstanceCleanup(t *testing.T) {
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
filter := NewFilter()
|
||||
defer filter.MustStop()
|
||||
assertFilterLen := func(expectedLen int) {
|
||||
t.Helper()
|
||||
if filter.VMInstancesCount() != expectedLen {
|
||||
t.Fatalf("unexpected instance map length; got %d; want %d", filter.VMInstancesCount(), expectedLen)
|
||||
}
|
||||
}
|
||||
|
||||
ctx := GetContext()
|
||||
filter.Filter(ctx, []prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_up"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "scrape_targets_up"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
PutContext(ctx)
|
||||
|
||||
time.Sleep(1 * time.Minute)
|
||||
// the entries should not be cleaned.
|
||||
assertFilterLen(2)
|
||||
|
||||
time.Sleep(58 * time.Minute)
|
||||
// receive samples from victoria-metrics1:8428 after 59 minutes.
|
||||
// so the entry will be refreshed.
|
||||
ctx = GetContext()
|
||||
filter.Filter(ctx, []prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_up"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
PutContext(ctx)
|
||||
assertFilterLen(2)
|
||||
|
||||
// entry for job:instance - test:vmagent1:8429 must be removed
|
||||
time.Sleep(4 * time.Minute)
|
||||
assertFilterLen(1)
|
||||
|
||||
// no samples from vmagent1:8429 in the last hour, so it should be removed from the mdx instance list.
|
||||
time.Sleep(2 * time.Hour)
|
||||
|
||||
assertFilterLen(0)
|
||||
})
|
||||
|
||||
}
|
||||
435
lib/mdx/filter_test.go
Normal file
435
lib/mdx/filter_test.go
Normal file
@@ -0,0 +1,435 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
func TestMdxInstanceFilter(t *testing.T) {
|
||||
originalVmLabel := *vmLabel
|
||||
*vmLabel = "service=victoriametrics"
|
||||
t.Cleanup(func() {
|
||||
*vmLabel = originalVmLabel
|
||||
})
|
||||
f := func(input []prompb.TimeSeries, expectedOutput []prompb.TimeSeries) {
|
||||
t.Helper()
|
||||
filter := NewFilter()
|
||||
defer filter.MustStop()
|
||||
|
||||
ctx := GetContext()
|
||||
defer PutContext(ctx)
|
||||
inputCopy := append([]prompb.TimeSeries{}, input...)
|
||||
output := filter.Filter(ctx, inputCopy)
|
||||
if diff := cmp.Diff(expectedOutput, output); len(diff) > 0 {
|
||||
t.Fatalf("unexpected result (-want, +got):\n%s", diff)
|
||||
}
|
||||
// make sure that result is the same over multiple calls
|
||||
inputCopy = append([]prompb.TimeSeries{}, input...)
|
||||
output = filter.Filter(ctx, inputCopy)
|
||||
if diff := cmp.Diff(expectedOutput, output); len(diff) > 0 {
|
||||
t.Fatalf("unexpected result (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
}
|
||||
// metrics with vm_app_version and different order of labels.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics2:8428"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics3:8428"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics2:8428"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics3:8428"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
}},
|
||||
)
|
||||
// metrics without vm_app_version but with service=victoriametrics that is specified in `-mdx.label`.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
})
|
||||
// metrics without vm_app_version but with service=victoriametrics that is specified in `-mdx.label`.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
}},
|
||||
[]prompb.TimeSeries{
|
||||
// 2.
|
||||
// metrics without vm_app_version but with service=victoriametrics that is specified in `-mdx.label`.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
}})
|
||||
|
||||
// metrics with vm_app_version and service=victoriametrics should be preserved.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
// metrics without vm_app_version and `service=victoriametrics` but with `victoriametrics_app=true`, which should be preserved.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics6:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics6:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// metrics without vm_app_version and service=victoriametrics and `victoriametrics_app=true`, which should be filtered out.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{},
|
||||
)
|
||||
|
||||
// metrics with vm_app_version but job or instance is empty (or missing), they should be dropped.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: ""},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent2:8429"},
|
||||
{Name: "job", Value: ""},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent2:8429"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{})
|
||||
|
||||
// metrics without vm_app_version, but the instances were already registered with first timeseries
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_rows_inserted_total"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vminsert_request_duration_seconds_bucket"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_rows_inserted_total"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vminsert_request_duration_seconds_bucket"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// metrics without vm_app_version, `service=victoriametrics` and `victoriametrics_app=true`, and the instance wasn't already registered in the previous call, so it will be dropped.
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vminsert_request_duration_seconds_bucket"},
|
||||
{Name: "instance", Value: "victoria-metrics7:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
}},
|
||||
)
|
||||
|
||||
// metrics with duplicate victoriametrics_app label
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "victoriametrics_app", Value: "other_value"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "victoriametrics_app", Value: "other_value"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// metrics with duplicate job and instance labels
|
||||
// last value wins
|
||||
f([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "job", Value: "test2"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_requests_total"},
|
||||
{Name: "job", Value: "test2"},
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "job", Value: "test2"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_requests_total"},
|
||||
{Name: "job", Value: "test2"},
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
}})
|
||||
|
||||
}
|
||||
|
||||
func TestMdxInstanceFilterConcurrent(t *testing.T) {
|
||||
originalVmLabel := *vmLabel
|
||||
*vmLabel = "service=victoriametrics"
|
||||
t.Cleanup(func() { *vmLabel = originalVmLabel })
|
||||
|
||||
filter := NewFilter()
|
||||
defer filter.MustStop()
|
||||
|
||||
const concurrency = 8
|
||||
const iterations = 200
|
||||
|
||||
generateSeries := func(g int) []prompb.TimeSeries {
|
||||
return []prompb.TimeSeries{
|
||||
{Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: fmt.Sprintf("vm-%d:8428", g)},
|
||||
}},
|
||||
// shared job:instance
|
||||
{Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "vmagent:8428"},
|
||||
}},
|
||||
}
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for worker := range concurrency {
|
||||
wg.Go(func() {
|
||||
input := generateSeries(worker)
|
||||
var expectedOutput []prompb.TimeSeries
|
||||
for _, inputTs := range input {
|
||||
labels := append([]prompb.Label{}, inputTs.Labels...)
|
||||
labels = append(labels, prompb.Label{Name: vmAppLabelName, Value: vmAppLabelValue})
|
||||
expectedOutput = append(expectedOutput, prompb.TimeSeries{Labels: labels})
|
||||
}
|
||||
for range iterations {
|
||||
ctx := GetContext()
|
||||
inputCopy := append([]prompb.TimeSeries{}, input...)
|
||||
output := filter.Filter(ctx, inputCopy)
|
||||
if diff := cmp.Diff(expectedOutput, output); len(diff) > 0 {
|
||||
t.Errorf("unexpected result (-want, +got):\n%s", diff)
|
||||
}
|
||||
PutContext(ctx)
|
||||
|
||||
}
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// goroutines + 1 shared
|
||||
if got := filter.VMInstancesCount(); got != concurrency+1 {
|
||||
t.Errorf("unexpected instance count: got %d, want %d", got, concurrency+1)
|
||||
}
|
||||
}
|
||||
85
lib/mdx/filter_timing_test.go
Normal file
85
lib/mdx/filter_timing_test.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
func BenchmarkFilter(b *testing.B) {
|
||||
f := func(name string, input, want []prompb.TimeSeries) {
|
||||
b.Helper()
|
||||
|
||||
b.Run(name, func(b *testing.B) {
|
||||
filter := NewFilter()
|
||||
defer filter.MustStop()
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
ctx := GetContext()
|
||||
localInput := append([]prompb.TimeSeries{}, input...)
|
||||
tss := filter.Filter(ctx, localInput)
|
||||
if len(tss) != len(want) {
|
||||
diff := cmp.Diff(want, tss)
|
||||
b.Fatalf("unexpected result (-want, +got):\n%s", diff)
|
||||
}
|
||||
PutContext(ctx)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
input := []prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "http_requests_total"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "http_requests_errors_total"},
|
||||
},
|
||||
},
|
||||
}
|
||||
expected := []prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "http_requests_total"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "http_requests_errors_total"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
}
|
||||
f("match vm_app_version", input, expected)
|
||||
}
|
||||
@@ -26,6 +26,8 @@ type FastQueue struct {
|
||||
// isPQDisabled is set to true when pq is disabled.
|
||||
isPQDisabled bool
|
||||
|
||||
prioritizeInMemoryData bool
|
||||
|
||||
// pq is file-based queue
|
||||
pq *queue
|
||||
|
||||
@@ -39,6 +41,31 @@ type FastQueue struct {
|
||||
stopDeadline uint64
|
||||
}
|
||||
|
||||
// OpenFastQueueOpts defines options for FastQueue
|
||||
type OpenFastQueueOpts struct {
|
||||
// MaxInmemoryBlocks defines amount of blocks to hold in memory before falling back to file-based persistence.
|
||||
MaxInmemoryBlocks int
|
||||
// MaxPendingBytes limits file-based size of the queue.
|
||||
// If MaxPendingBytes is 0, then the queue size is unlimited.
|
||||
// The oldest data is dropped when the queue
|
||||
// reaches MaxPendingSize.
|
||||
MaxPendingBytes int64
|
||||
// IsPQDisabled defines whether file-based queue could be used.
|
||||
// If it is set to true, then write requests that exceed in-memory buffer capacity are rejected.
|
||||
// in-memory queue part can be stored on disk during graceful shutdown.
|
||||
IsPQDisabled bool
|
||||
// PrioritizeInMemoryData instructs FastQueue to write data into the in-memory queue
|
||||
// even if the file-based queue is not empty.
|
||||
// This is useful when data order doesn't matter and getting the most recent data
|
||||
// as fast as possible is more important.
|
||||
PrioritizeInmemoryData bool
|
||||
}
|
||||
|
||||
// MustOpenFastQueueWithOpts opens persistent queue at the given path with given opts
|
||||
func MustOpenFastQueueWithOpts(path, name string, opts OpenFastQueueOpts) *FastQueue {
|
||||
return mustOpenFastQueue(path, name, opts)
|
||||
}
|
||||
|
||||
// MustOpenFastQueue opens persistent queue at the given path.
|
||||
//
|
||||
// It holds up to maxInmemoryBlocks in memory before falling back to file-based persistence.
|
||||
@@ -49,11 +76,22 @@ type FastQueue struct {
|
||||
// if isPQDisabled is set to true, then write requests that exceed in-memory buffer capacity are rejected.
|
||||
// in-memory queue part can be stored on disk during graceful shutdown.
|
||||
func MustOpenFastQueue(path, name string, maxInmemoryBlocks int, maxPendingBytes int64, isPQDisabled bool) *FastQueue {
|
||||
opts := OpenFastQueueOpts{
|
||||
MaxInmemoryBlocks: maxInmemoryBlocks,
|
||||
MaxPendingBytes: maxPendingBytes,
|
||||
IsPQDisabled: isPQDisabled,
|
||||
}
|
||||
return mustOpenFastQueue(path, name, opts)
|
||||
}
|
||||
func mustOpenFastQueue(path, name string, opts OpenFastQueueOpts) *FastQueue {
|
||||
maxPendingBytes := opts.MaxPendingBytes
|
||||
isPQDisabled := opts.IsPQDisabled
|
||||
pq := mustOpen(path, name, maxPendingBytes)
|
||||
fq := &FastQueue{
|
||||
pq: pq,
|
||||
isPQDisabled: isPQDisabled,
|
||||
ch: make(chan *bytesutil.ByteBuffer, maxInmemoryBlocks),
|
||||
pq: pq,
|
||||
isPQDisabled: isPQDisabled,
|
||||
prioritizeInMemoryData: opts.PrioritizeInmemoryData,
|
||||
ch: make(chan *bytesutil.ByteBuffer, opts.MaxInmemoryBlocks),
|
||||
}
|
||||
fq.cond.L = &fq.mu
|
||||
fq.lastInmemoryBlockReadTime = fasttime.UnixTimestamp()
|
||||
@@ -81,7 +119,7 @@ func MustOpenFastQueue(path, name string, maxInmemoryBlocks int, maxPendingBytes
|
||||
if isPQDisabled {
|
||||
persistenceStatus = "disabled"
|
||||
}
|
||||
logger.Infof("opened fast queue at %q with maxInmemoryBlocks=%d, it contains %d pending bytes, persistence is %s", path, maxInmemoryBlocks, pendingBytes, persistenceStatus)
|
||||
logger.Infof("opened fast queue at %q with maxInmemoryBlocks=%d, it contains %d pending bytes, persistence is %s", path, opts.MaxInmemoryBlocks, pendingBytes, persistenceStatus)
|
||||
return fq
|
||||
}
|
||||
|
||||
@@ -97,7 +135,7 @@ func (fq *FastQueue) IsWriteBlocked() bool {
|
||||
}
|
||||
fq.mu.Lock()
|
||||
defer fq.mu.Unlock()
|
||||
return len(fq.ch) == cap(fq.ch) || fq.pq.GetPendingBytes() > 0
|
||||
return len(fq.ch) == cap(fq.ch) || (fq.pq.GetPendingBytes() > 0 && !fq.prioritizeInMemoryData)
|
||||
}
|
||||
|
||||
// UnblockAllReaders unblocks all the readers.
|
||||
@@ -193,19 +231,24 @@ func (fq *FastQueue) tryWriteBlock(block []byte, ignoreDisabledPQ bool) bool {
|
||||
defer fq.mu.Unlock()
|
||||
|
||||
isPQWriteAllowed := !fq.isPQDisabled || ignoreDisabledPQ
|
||||
|
||||
fq.flushInmemoryBlocksToFileIfNeededLocked()
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
// The file-based queue isn't drained yet. This means that in-memory queue cannot be used yet.
|
||||
// So put the block to file-based queue.
|
||||
if len(fq.ch) > 0 {
|
||||
logger.Panicf("BUG: the in-memory queue must be empty when the file-based queue is non-empty; it contains %d pending bytes", n)
|
||||
if !isPQWriteAllowed && fq.pq.GetPendingBytes() > 0 {
|
||||
// fast path: there is pending data at file-based queue,
|
||||
// it must be drained before in-memory queue could be used.
|
||||
// File-based queue could be non-empty after vmagent restart
|
||||
// and vmagent couldn't flush in-memory queue during shutdown.
|
||||
return false
|
||||
}
|
||||
if !fq.prioritizeInMemoryData {
|
||||
fq.flushInmemoryBlocksToFileIfNeededLocked()
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
// The file-based queue isn't drained yet. This means that in-memory queue cannot be used yet.
|
||||
// So put the block to file-based queue.
|
||||
if len(fq.ch) > 0 {
|
||||
logger.Panicf("BUG: the in-memory queue must be empty when the file-based queue is non-empty; it contains %d pending bytes", n)
|
||||
}
|
||||
fq.pq.MustWriteBlock(block)
|
||||
return true
|
||||
}
|
||||
if !isPQWriteAllowed {
|
||||
return false
|
||||
}
|
||||
fq.pq.MustWriteBlock(block)
|
||||
return true
|
||||
}
|
||||
if len(fq.ch) == cap(fq.ch) {
|
||||
// There is no space left in the in-memory queue. Put the data to file-based queue.
|
||||
@@ -216,7 +259,7 @@ func (fq *FastQueue) tryWriteBlock(block []byte, ignoreDisabledPQ bool) bool {
|
||||
fq.pq.MustWriteBlock(block)
|
||||
return true
|
||||
}
|
||||
// Fast path - put the block to in-memory queue.
|
||||
|
||||
bb := blockBufPool.Get()
|
||||
bb.B = append(bb.B[:0], block...)
|
||||
fq.ch <- bb
|
||||
@@ -229,12 +272,41 @@ func (fq *FastQueue) tryWriteBlock(block []byte, ignoreDisabledPQ bool) bool {
|
||||
}
|
||||
|
||||
// MustReadBlock reads the next block from fq into dst and returns it.
|
||||
// It first reads from the in-memory queue, then checks file-based queue.
|
||||
// It first reads from the file-based queue, then checks in-memory queue.
|
||||
// It blocks until a block is available or the stop deadline is exceeded, in which case it returns (dst, false).
|
||||
func (fq *FastQueue) MustReadBlock(dst []byte) ([]byte, bool) {
|
||||
fq.mu.Lock()
|
||||
defer fq.mu.Unlock()
|
||||
|
||||
for {
|
||||
if fq.stopDeadline > 0 && fasttime.UnixTimestamp() > fq.stopDeadline {
|
||||
return dst, false
|
||||
}
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
data, ok := fq.pq.MustReadBlockNonblocking(dst)
|
||||
if ok {
|
||||
return data, true
|
||||
}
|
||||
dst = data
|
||||
}
|
||||
if len(fq.ch) > 0 {
|
||||
return fq.mustReadInMemoryBlockLocked(dst), true
|
||||
}
|
||||
if fq.stopDeadline > 0 {
|
||||
return dst, false
|
||||
}
|
||||
// There are no blocks. Wait for new block.
|
||||
fq.pq.ResetIfEmpty()
|
||||
fq.cond.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
// MustReadInMemoryBlockBlocking reads the next block from the in-memory queue into dst and returns it.
|
||||
// It blocks until a block is available or the stop deadline is exceeded, in which case it returns (dst, false).
|
||||
func (fq *FastQueue) MustReadInMemoryBlockBlocking(dst []byte) ([]byte, bool) {
|
||||
fq.mu.Lock()
|
||||
defer fq.mu.Unlock()
|
||||
|
||||
for {
|
||||
if fq.stopDeadline > 0 && fasttime.UnixTimestamp() > fq.stopDeadline {
|
||||
return dst, false
|
||||
@@ -242,19 +314,10 @@ func (fq *FastQueue) MustReadBlock(dst []byte) ([]byte, bool) {
|
||||
if len(fq.ch) > 0 {
|
||||
return fq.mustReadInMemoryBlockLocked(dst), true
|
||||
}
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
data, ok := fq.pq.MustReadBlockNonblocking(dst)
|
||||
if ok {
|
||||
return data, true
|
||||
}
|
||||
dst = data
|
||||
continue
|
||||
}
|
||||
if fq.stopDeadline > 0 {
|
||||
return dst, false
|
||||
}
|
||||
// There are no blocks. Wait for new block.
|
||||
fq.pq.ResetIfEmpty()
|
||||
fq.cond.Wait()
|
||||
}
|
||||
}
|
||||
@@ -277,9 +340,6 @@ func (fq *FastQueue) mustReadInMemoryBlockLocked(dst []byte) []byte {
|
||||
if len(fq.ch) == 0 {
|
||||
logger.Panicf("BUG: the function must not be called when in-memory queue is empty. Caller should verify the queue len upfront")
|
||||
}
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
logger.Panicf("BUG: the file-based queue must be empty when the in-memory queue is non-empty; it contains %d pending bytes", n)
|
||||
}
|
||||
bb := <-fq.ch
|
||||
fq.pendingInmemoryBytes -= uint64(len(bb.B))
|
||||
fq.lastInmemoryBlockReadTime = fasttime.UnixTimestamp()
|
||||
|
||||
@@ -364,3 +364,64 @@ func TestFastQueueWriteReadWithIgnoreDisabledPQ(t *testing.T) {
|
||||
fq.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
func TestFastQueueWriteReadWithPrioritizeInmemory(t *testing.T) {
|
||||
path := "fast-queue-write-read-inmemory-disabled-pq-force-write"
|
||||
fs.MustRemoveDir(path)
|
||||
|
||||
capacity := 20
|
||||
opts := OpenFastQueueOpts{
|
||||
MaxInmemoryBlocks: capacity,
|
||||
PrioritizeInmemoryData: true,
|
||||
}
|
||||
fq := MustOpenFastQueueWithOpts(path, "foobar", opts)
|
||||
if n := fq.GetInmemoryQueueLen(); n != 0 {
|
||||
t.Fatalf("unexpected non-zero inmemory queue size: %d", n)
|
||||
}
|
||||
var blocks []string
|
||||
for i := range capacity {
|
||||
block := fmt.Sprintf("block %d", i)
|
||||
if !fq.TryWriteBlock([]byte(block)) {
|
||||
t.Fatalf("TryWriteBlock must return true in this context")
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
if n := fq.GetInmemoryQueueLen(); n != capacity {
|
||||
t.Fatalf("unexpected non-zero inmemory queue size: %d: %d", n, capacity)
|
||||
}
|
||||
for i := range capacity {
|
||||
block := fmt.Sprintf("block %d-%d", i, i)
|
||||
if !fq.TryWriteBlock([]byte(block)) {
|
||||
t.Fatalf("TryWriteBlock must return true in this context")
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
|
||||
// in case of capacity exceed last element is written into file-based queue
|
||||
if n := fq.GetInmemoryQueueLen(); n != capacity-1 {
|
||||
t.Fatalf("unexpected non-zero inmemory queue size: %d: %d", n, capacity)
|
||||
}
|
||||
|
||||
// make sure that recently ingested elemements returned first
|
||||
for idx := capacity + 1; idx < capacity*2; idx++ {
|
||||
buf, ok := fq.MustReadInMemoryBlockBlocking(nil)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected ok=false")
|
||||
}
|
||||
if string(buf) != blocks[idx] {
|
||||
t.Fatalf("unexpected block read; got %q; want %q: %d", buf, blocks[idx], idx)
|
||||
}
|
||||
}
|
||||
blocks = blocks[:capacity+1]
|
||||
for _, block := range blocks {
|
||||
buf, ok := fq.MustReadBlock(nil)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected ok=false")
|
||||
}
|
||||
if string(buf) != block {
|
||||
t.Fatalf("unexpected block read; got %q; want %q", buf, block)
|
||||
}
|
||||
}
|
||||
fq.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
@@ -112,11 +112,19 @@ func (m *TimeSeries) size() (n int) {
|
||||
}
|
||||
for _, e := range m.Labels {
|
||||
l := e.size()
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
for _, e := range m.Samples {
|
||||
l := e.size()
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
@@ -126,10 +134,18 @@ func (m *Label) size() (n int) {
|
||||
return 0
|
||||
}
|
||||
if l := len(m.Name); l > 0 {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
if l := len(m.Value); l > 0 {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
@@ -160,6 +176,11 @@ func (m *WriteRequest) marshalToSizedBuffer(dst []byte) (int, error) {
|
||||
}
|
||||
|
||||
func encodeVarint(dst []byte, offset int, v uint64) int {
|
||||
if v < 1<<7 {
|
||||
offset--
|
||||
dst[offset] = byte(v)
|
||||
return offset
|
||||
}
|
||||
offset -= sov(v)
|
||||
base := offset
|
||||
for v >= 1<<7 {
|
||||
@@ -180,7 +201,11 @@ func (m *WriteRequest) size() (n int) {
|
||||
}
|
||||
for _, e := range m.Metadata {
|
||||
l := e.size()
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
@@ -238,13 +263,25 @@ func (m *MetricMetadata) size() (n int) {
|
||||
n += 1 + sov(uint64(m.Type))
|
||||
}
|
||||
if l := len(m.MetricFamilyName); l > 0 {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
if l := len(m.Help); l > 0 {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
if l := len(m.Unit); l > 0 {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
if l < 128 {
|
||||
n += 2 + l
|
||||
} else {
|
||||
n += 1 + l + sov(uint64(l))
|
||||
}
|
||||
}
|
||||
if m.AccountID != 0 {
|
||||
n += 1 + sov(uint64(m.AccountID))
|
||||
|
||||
@@ -36,10 +36,12 @@ function submitRelabelDebugForm(e) {
|
||||
<div class="container-fluid">
|
||||
<a href="https://docs.victoriametrics.com/victoriametrics/relabeling/" target="_blank">Relabeling docs</a>{% space %}
|
||||
|
||||
{% if targetURL != "" %}
|
||||
<a href="metric-relabel-debug{% if targetID != "" %}?id={%s targetID %}{% endif %}">Metric relabel debug</a>
|
||||
{% else %}
|
||||
<a href="target-relabel-debug{% if targetID != "" %}?id={%s targetID %}{% endif %}">Target relabel debug</a>
|
||||
{% if targetID != "" %}
|
||||
{% if targetURL != "" %}
|
||||
<a href="metric-relabel-debug?id={%s targetID %}">Metric relabel debug</a>
|
||||
{% else %}
|
||||
<a href="target-relabel-debug?id={%s targetID %}">Target relabel debug</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
<br>
|
||||
|
||||
@@ -80,425 +80,417 @@ func StreamRelabelDebugStepsHTML(qw422016 *qt422016.Writer, targetURL, targetID
|
||||
//line lib/promrelabel/debug.qtpl:37
|
||||
qw422016.N().S(` `)
|
||||
//line lib/promrelabel/debug.qtpl:39
|
||||
if targetURL != "" {
|
||||
//line lib/promrelabel/debug.qtpl:39
|
||||
qw422016.N().S(`<a href="metric-relabel-debug`)
|
||||
if targetID != "" {
|
||||
//line lib/promrelabel/debug.qtpl:40
|
||||
if targetID != "" {
|
||||
if targetURL != "" {
|
||||
//line lib/promrelabel/debug.qtpl:40
|
||||
qw422016.N().S(`?id=`)
|
||||
//line lib/promrelabel/debug.qtpl:40
|
||||
qw422016.E().S(targetID)
|
||||
//line lib/promrelabel/debug.qtpl:40
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:40
|
||||
qw422016.N().S(`">Metric relabel debug</a>`)
|
||||
qw422016.N().S(`<a href="metric-relabel-debug?id=`)
|
||||
//line lib/promrelabel/debug.qtpl:41
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:41
|
||||
qw422016.N().S(`<a href="target-relabel-debug`)
|
||||
//line lib/promrelabel/debug.qtpl:42
|
||||
if targetID != "" {
|
||||
//line lib/promrelabel/debug.qtpl:42
|
||||
qw422016.N().S(`?id=`)
|
||||
//line lib/promrelabel/debug.qtpl:42
|
||||
qw422016.E().S(targetID)
|
||||
//line lib/promrelabel/debug.qtpl:41
|
||||
qw422016.N().S(`">Metric relabel debug</a>`)
|
||||
//line lib/promrelabel/debug.qtpl:42
|
||||
}
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:42
|
||||
qw422016.N().S(`">Target relabel debug</a>`)
|
||||
qw422016.N().S(`<a href="target-relabel-debug?id=`)
|
||||
//line lib/promrelabel/debug.qtpl:43
|
||||
qw422016.E().S(targetID)
|
||||
//line lib/promrelabel/debug.qtpl:43
|
||||
qw422016.N().S(`">Target relabel debug</a>`)
|
||||
//line lib/promrelabel/debug.qtpl:44
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:45
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:43
|
||||
//line lib/promrelabel/debug.qtpl:45
|
||||
qw422016.N().S(`<br>`)
|
||||
//line lib/promrelabel/debug.qtpl:46
|
||||
//line lib/promrelabel/debug.qtpl:48
|
||||
if err != nil {
|
||||
//line lib/promrelabel/debug.qtpl:47
|
||||
//line lib/promrelabel/debug.qtpl:49
|
||||
htmlcomponents.StreamErrorNotification(qw422016, err)
|
||||
//line lib/promrelabel/debug.qtpl:48
|
||||
//line lib/promrelabel/debug.qtpl:50
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:48
|
||||
//line lib/promrelabel/debug.qtpl:50
|
||||
qw422016.N().S(`<div class="m-3"><form method="POST" onsubmit="submitRelabelDebugForm(event)">`)
|
||||
//line lib/promrelabel/debug.qtpl:52
|
||||
//line lib/promrelabel/debug.qtpl:54
|
||||
streamrelabelDebugFormInputs(qw422016, metric, relabelConfigs)
|
||||
//line lib/promrelabel/debug.qtpl:53
|
||||
//line lib/promrelabel/debug.qtpl:55
|
||||
if targetID != "" {
|
||||
//line lib/promrelabel/debug.qtpl:53
|
||||
//line lib/promrelabel/debug.qtpl:55
|
||||
qw422016.N().S(`<input type="hidden" name="id" value="`)
|
||||
//line lib/promrelabel/debug.qtpl:54
|
||||
//line lib/promrelabel/debug.qtpl:56
|
||||
qw422016.E().S(targetID)
|
||||
//line lib/promrelabel/debug.qtpl:54
|
||||
//line lib/promrelabel/debug.qtpl:56
|
||||
qw422016.N().S(`" />`)
|
||||
//line lib/promrelabel/debug.qtpl:55
|
||||
//line lib/promrelabel/debug.qtpl:57
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:55
|
||||
//line lib/promrelabel/debug.qtpl:57
|
||||
qw422016.N().S(`<input type="submit" value="Submit" class="btn btn-primary m-1" />`)
|
||||
//line lib/promrelabel/debug.qtpl:57
|
||||
//line lib/promrelabel/debug.qtpl:59
|
||||
if targetID != "" {
|
||||
//line lib/promrelabel/debug.qtpl:57
|
||||
//line lib/promrelabel/debug.qtpl:59
|
||||
qw422016.N().S(`<button type="button" onclick="location.href='?id=`)
|
||||
//line lib/promrelabel/debug.qtpl:58
|
||||
//line lib/promrelabel/debug.qtpl:60
|
||||
qw422016.E().S(targetID)
|
||||
//line lib/promrelabel/debug.qtpl:58
|
||||
//line lib/promrelabel/debug.qtpl:60
|
||||
qw422016.N().S(`'" class="btn btn-secondary m-1">Reset</button>`)
|
||||
//line lib/promrelabel/debug.qtpl:59
|
||||
//line lib/promrelabel/debug.qtpl:61
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:59
|
||||
//line lib/promrelabel/debug.qtpl:61
|
||||
qw422016.N().S(`</form></div><div class="row"><main class="col-12">`)
|
||||
//line lib/promrelabel/debug.qtpl:65
|
||||
//line lib/promrelabel/debug.qtpl:67
|
||||
streamrelabelDebugSteps(qw422016, dss, targetURL, targetID)
|
||||
//line lib/promrelabel/debug.qtpl:65
|
||||
//line lib/promrelabel/debug.qtpl:67
|
||||
qw422016.N().S(`</main></div></div></body></html>`)
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
func WriteRelabelDebugStepsHTML(qq422016 qtio422016.Writer, targetURL, targetID string, dss []DebugStep, metric, relabelConfigs string, err error) {
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
StreamRelabelDebugStepsHTML(qw422016, targetURL, targetID, dss, metric, relabelConfigs, err)
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
func RelabelDebugStepsHTML(targetURL, targetID string, dss []DebugStep, metric, relabelConfigs string, err error) string {
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
WriteRelabelDebugStepsHTML(qb422016, targetURL, targetID, dss, metric, relabelConfigs, err)
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:71
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
//line lib/promrelabel/debug.qtpl:75
|
||||
func streamrelabelDebugFormInputs(qw422016 *qt422016.Writer, metric, relabelConfigs string) {
|
||||
//line lib/promrelabel/debug.qtpl:73
|
||||
//line lib/promrelabel/debug.qtpl:75
|
||||
qw422016.N().S(`<div>Relabel configs:<br/><textarea name="relabel_configs" style="width: 100%; height: 15em; font-family: monospace" class="m-1">`)
|
||||
//line lib/promrelabel/debug.qtpl:76
|
||||
//line lib/promrelabel/debug.qtpl:78
|
||||
qw422016.E().S(relabelConfigs)
|
||||
//line lib/promrelabel/debug.qtpl:76
|
||||
//line lib/promrelabel/debug.qtpl:78
|
||||
qw422016.N().S(`</textarea></div><div>Labels:<br/><textarea name="metric" style="width: 100%; height: 5em; font-family: monospace" class="m-1">`)
|
||||
//line lib/promrelabel/debug.qtpl:81
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
qw422016.E().S(metric)
|
||||
//line lib/promrelabel/debug.qtpl:81
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
qw422016.N().S(`</textarea></div>`)
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
func writerelabelDebugFormInputs(qq422016 qtio422016.Writer, metric, relabelConfigs string) {
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
streamrelabelDebugFormInputs(qw422016, metric, relabelConfigs)
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
func relabelDebugFormInputs(metric, relabelConfigs string) string {
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
writerelabelDebugFormInputs(qb422016, metric, relabelConfigs)
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:83
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
func writerelabelDebugFormInputs(qq422016 qtio422016.Writer, metric, relabelConfigs string) {
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
streamrelabelDebugFormInputs(qw422016, metric, relabelConfigs)
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
func relabelDebugFormInputs(metric, relabelConfigs string) string {
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
writerelabelDebugFormInputs(qb422016, metric, relabelConfigs)
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:85
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:87
|
||||
func streamrelabelDebugSteps(qw422016 *qt422016.Writer, dss []DebugStep, targetURL, targetID string) {
|
||||
//line lib/promrelabel/debug.qtpl:86
|
||||
//line lib/promrelabel/debug.qtpl:88
|
||||
if len(dss) > 0 {
|
||||
//line lib/promrelabel/debug.qtpl:86
|
||||
//line lib/promrelabel/debug.qtpl:88
|
||||
qw422016.N().S(`<div class="m-3"><b>Original labels:</b> <samp>`)
|
||||
//line lib/promrelabel/debug.qtpl:88
|
||||
//line lib/promrelabel/debug.qtpl:90
|
||||
streammustFormatLabels(qw422016, dss[0].In)
|
||||
//line lib/promrelabel/debug.qtpl:88
|
||||
//line lib/promrelabel/debug.qtpl:90
|
||||
qw422016.N().S(`</samp></div>`)
|
||||
//line lib/promrelabel/debug.qtpl:90
|
||||
//line lib/promrelabel/debug.qtpl:92
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:90
|
||||
//line lib/promrelabel/debug.qtpl:92
|
||||
qw422016.N().S(`<table class="table table-striped table-hover table-bordered table-sm"><thead><tr><th scope="col" style="width: 5%">Step</th><th scope="col" style="width: 25%">Relabeling Rule</th><th scope="col" style="width: 35%">Input Labels</th><th scope="col" stile="width: 35%">Output labels</a></tr></thead><tbody>`)
|
||||
//line lib/promrelabel/debug.qtpl:101
|
||||
for i, ds := range dss {
|
||||
//line lib/promrelabel/debug.qtpl:103
|
||||
for i, ds := range dss {
|
||||
//line lib/promrelabel/debug.qtpl:105
|
||||
inLabels, inErr := promutil.NewLabelsFromString(ds.In)
|
||||
outLabels, outErr := promutil.NewLabelsFromString(ds.Out)
|
||||
changedLabels := getChangedLabelNames(inLabels, outLabels)
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:106
|
||||
//line lib/promrelabel/debug.qtpl:108
|
||||
qw422016.N().S(`<tr><td>`)
|
||||
//line lib/promrelabel/debug.qtpl:108
|
||||
//line lib/promrelabel/debug.qtpl:110
|
||||
qw422016.N().D(i)
|
||||
//line lib/promrelabel/debug.qtpl:108
|
||||
//line lib/promrelabel/debug.qtpl:110
|
||||
qw422016.N().S(`</td><td><b><pre class="m-2">`)
|
||||
//line lib/promrelabel/debug.qtpl:109
|
||||
//line lib/promrelabel/debug.qtpl:111
|
||||
qw422016.E().S(ds.Rule)
|
||||
//line lib/promrelabel/debug.qtpl:109
|
||||
//line lib/promrelabel/debug.qtpl:111
|
||||
qw422016.N().S(`</pre></b></td><td>`)
|
||||
//line lib/promrelabel/debug.qtpl:111
|
||||
//line lib/promrelabel/debug.qtpl:113
|
||||
if inErr == nil {
|
||||
//line lib/promrelabel/debug.qtpl:111
|
||||
//line lib/promrelabel/debug.qtpl:113
|
||||
qw422016.N().S(`<div class="m-2" style="font-size: 0.9em" title="deleted and updated labels highlighted in red">`)
|
||||
//line lib/promrelabel/debug.qtpl:113
|
||||
//line lib/promrelabel/debug.qtpl:115
|
||||
streamlabelsWithHighlight(qw422016, inLabels, changedLabels, "#D15757")
|
||||
//line lib/promrelabel/debug.qtpl:113
|
||||
//line lib/promrelabel/debug.qtpl:115
|
||||
qw422016.N().S(`</div>`)
|
||||
//line lib/promrelabel/debug.qtpl:115
|
||||
//line lib/promrelabel/debug.qtpl:117
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:115
|
||||
//line lib/promrelabel/debug.qtpl:117
|
||||
qw422016.N().S(`<div class="m-2" style="font-size: 0.9em; color: red" title="error parsing input labels"><pre>`)
|
||||
//line lib/promrelabel/debug.qtpl:117
|
||||
qw422016.E().S(inErr.Error())
|
||||
//line lib/promrelabel/debug.qtpl:117
|
||||
qw422016.N().S(`</pre></div>`)
|
||||
//line lib/promrelabel/debug.qtpl:119
|
||||
break
|
||||
//line lib/promrelabel/debug.qtpl:120
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:120
|
||||
qw422016.N().S(`</td><td>`)
|
||||
//line lib/promrelabel/debug.qtpl:123
|
||||
if outErr == nil {
|
||||
//line lib/promrelabel/debug.qtpl:123
|
||||
qw422016.N().S(`<div class="m-2" style="font-size: 0.9em" title="added and updated labels highlighted in blue">`)
|
||||
//line lib/promrelabel/debug.qtpl:125
|
||||
streamlabelsWithHighlight(qw422016, outLabels, changedLabels, "#4495e0")
|
||||
//line lib/promrelabel/debug.qtpl:125
|
||||
qw422016.N().S(`</div>`)
|
||||
//line lib/promrelabel/debug.qtpl:127
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:127
|
||||
qw422016.N().S(`<div class="m-2" style="font-size: 0.9em; color: red" title="error parsing output labels"><pre>`)
|
||||
//line lib/promrelabel/debug.qtpl:129
|
||||
qw422016.E().S(outErr.Error())
|
||||
//line lib/promrelabel/debug.qtpl:129
|
||||
qw422016.E().S(inErr.Error())
|
||||
//line lib/promrelabel/debug.qtpl:119
|
||||
qw422016.N().S(`</pre></div>`)
|
||||
//line lib/promrelabel/debug.qtpl:131
|
||||
//line lib/promrelabel/debug.qtpl:121
|
||||
break
|
||||
//line lib/promrelabel/debug.qtpl:132
|
||||
//line lib/promrelabel/debug.qtpl:122
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:132
|
||||
qw422016.N().S(`</td></tr>`)
|
||||
//line lib/promrelabel/debug.qtpl:135
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:135
|
||||
qw422016.N().S(`</tbody></table>`)
|
||||
//line lib/promrelabel/debug.qtpl:138
|
||||
if len(dss) > 0 {
|
||||
//line lib/promrelabel/debug.qtpl:138
|
||||
qw422016.N().S(`<div class="m-3"><b>Resulting labels:</b> <samp>`)
|
||||
//line lib/promrelabel/debug.qtpl:140
|
||||
streammustFormatLabels(qw422016, dss[len(dss)-1].Out)
|
||||
//line lib/promrelabel/debug.qtpl:140
|
||||
qw422016.N().S(`</samp>`)
|
||||
//line lib/promrelabel/debug.qtpl:141
|
||||
if targetURL != "" {
|
||||
//line lib/promrelabel/debug.qtpl:141
|
||||
qw422016.N().S(`<div><b>Target URL:</b>`)
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
qw422016.N().S(` `)
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
qw422016.N().S(`<a href="`)
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
qw422016.E().S(targetURL)
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
qw422016.N().S(`" target="_blank">`)
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
qw422016.E().S(targetURL)
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
qw422016.N().S(`</a>`)
|
||||
//line lib/promrelabel/debug.qtpl:144
|
||||
if targetID != "" {
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.N().S(` `)
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.N().S(`(<a href="target_response?id=`)
|
||||
//line lib/promrelabel/debug.qtpl:146
|
||||
qw422016.E().S(targetID)
|
||||
//line lib/promrelabel/debug.qtpl:146
|
||||
qw422016.N().S(`" target="_blank" title="click to fetch target response on behalf of the scraper">response</a>)`)
|
||||
//line lib/promrelabel/debug.qtpl:147
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:147
|
||||
//line lib/promrelabel/debug.qtpl:122
|
||||
qw422016.N().S(`</td><td>`)
|
||||
//line lib/promrelabel/debug.qtpl:125
|
||||
if outErr == nil {
|
||||
//line lib/promrelabel/debug.qtpl:125
|
||||
qw422016.N().S(`<div class="m-2" style="font-size: 0.9em" title="added and updated labels highlighted in blue">`)
|
||||
//line lib/promrelabel/debug.qtpl:127
|
||||
streamlabelsWithHighlight(qw422016, outLabels, changedLabels, "#4495e0")
|
||||
//line lib/promrelabel/debug.qtpl:127
|
||||
qw422016.N().S(`</div>`)
|
||||
//line lib/promrelabel/debug.qtpl:149
|
||||
//line lib/promrelabel/debug.qtpl:129
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:129
|
||||
qw422016.N().S(`<div class="m-2" style="font-size: 0.9em; color: red" title="error parsing output labels"><pre>`)
|
||||
//line lib/promrelabel/debug.qtpl:131
|
||||
qw422016.E().S(outErr.Error())
|
||||
//line lib/promrelabel/debug.qtpl:131
|
||||
qw422016.N().S(`</pre></div>`)
|
||||
//line lib/promrelabel/debug.qtpl:133
|
||||
break
|
||||
//line lib/promrelabel/debug.qtpl:134
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:149
|
||||
qw422016.N().S(`</div>`)
|
||||
//line lib/promrelabel/debug.qtpl:151
|
||||
//line lib/promrelabel/debug.qtpl:134
|
||||
qw422016.N().S(`</td></tr>`)
|
||||
//line lib/promrelabel/debug.qtpl:137
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:137
|
||||
qw422016.N().S(`</tbody></table>`)
|
||||
//line lib/promrelabel/debug.qtpl:140
|
||||
if len(dss) > 0 {
|
||||
//line lib/promrelabel/debug.qtpl:140
|
||||
qw422016.N().S(`<div class="m-3"><b>Resulting labels:</b> <samp>`)
|
||||
//line lib/promrelabel/debug.qtpl:142
|
||||
streammustFormatLabels(qw422016, dss[len(dss)-1].Out)
|
||||
//line lib/promrelabel/debug.qtpl:142
|
||||
qw422016.N().S(`</samp>`)
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
if targetURL != "" {
|
||||
//line lib/promrelabel/debug.qtpl:143
|
||||
qw422016.N().S(`<div><b>Target URL:</b>`)
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.N().S(` `)
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.N().S(`<a href="`)
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.E().S(targetURL)
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.N().S(`" target="_blank">`)
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.E().S(targetURL)
|
||||
//line lib/promrelabel/debug.qtpl:145
|
||||
qw422016.N().S(`</a>`)
|
||||
//line lib/promrelabel/debug.qtpl:146
|
||||
if targetID != "" {
|
||||
//line lib/promrelabel/debug.qtpl:147
|
||||
qw422016.N().S(` `)
|
||||
//line lib/promrelabel/debug.qtpl:147
|
||||
qw422016.N().S(`(<a href="target_response?id=`)
|
||||
//line lib/promrelabel/debug.qtpl:148
|
||||
qw422016.E().S(targetID)
|
||||
//line lib/promrelabel/debug.qtpl:148
|
||||
qw422016.N().S(`" target="_blank" title="click to fetch target response on behalf of the scraper">response</a>)`)
|
||||
//line lib/promrelabel/debug.qtpl:149
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:149
|
||||
qw422016.N().S(`</div>`)
|
||||
//line lib/promrelabel/debug.qtpl:151
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:151
|
||||
qw422016.N().S(`</div>`)
|
||||
//line lib/promrelabel/debug.qtpl:153
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
func writerelabelDebugSteps(qq422016 qtio422016.Writer, dss []DebugStep, targetURL, targetID string) {
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
streamrelabelDebugSteps(qw422016, dss, targetURL, targetID)
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
func relabelDebugSteps(dss []DebugStep, targetURL, targetID string) string {
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
writerelabelDebugSteps(qb422016, dss, targetURL, targetID)
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:152
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
//line lib/promrelabel/debug.qtpl:156
|
||||
func StreamRelabelDebugStepsJSON(qw422016 *qt422016.Writer, targetURL, targetID string, dss []DebugStep, metric, relabelConfigs string, err error) {
|
||||
//line lib/promrelabel/debug.qtpl:154
|
||||
//line lib/promrelabel/debug.qtpl:156
|
||||
qw422016.N().S(`{`)
|
||||
//line lib/promrelabel/debug.qtpl:156
|
||||
if err != nil {
|
||||
//line lib/promrelabel/debug.qtpl:156
|
||||
qw422016.N().S(`"status": "error","error":`)
|
||||
//line lib/promrelabel/debug.qtpl:158
|
||||
qw422016.N().Q(fmt.Sprintf("Error: %s", err))
|
||||
//line lib/promrelabel/debug.qtpl:159
|
||||
} else {
|
||||
if err != nil {
|
||||
//line lib/promrelabel/debug.qtpl:158
|
||||
qw422016.N().S(`"status": "error","error":`)
|
||||
//line lib/promrelabel/debug.qtpl:160
|
||||
qw422016.N().Q(fmt.Sprintf("Error: %s", err))
|
||||
//line lib/promrelabel/debug.qtpl:161
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:162
|
||||
var hasError bool
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:160
|
||||
//line lib/promrelabel/debug.qtpl:162
|
||||
qw422016.N().S(`"status": "success","steps": [`)
|
||||
//line lib/promrelabel/debug.qtpl:163
|
||||
for i, ds := range dss {
|
||||
//line lib/promrelabel/debug.qtpl:165
|
||||
for i, ds := range dss {
|
||||
//line lib/promrelabel/debug.qtpl:167
|
||||
inLabels, inErr := promutil.NewLabelsFromString(ds.In)
|
||||
outLabels, outErr := promutil.NewLabelsFromString(ds.Out)
|
||||
changedLabels := getChangedLabelNames(inLabels, outLabels)
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:168
|
||||
//line lib/promrelabel/debug.qtpl:170
|
||||
qw422016.N().S(`{"inLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:170
|
||||
//line lib/promrelabel/debug.qtpl:172
|
||||
qw422016.N().Q(labelsWithHighlight(inLabels, changedLabels, "#D15757"))
|
||||
//line lib/promrelabel/debug.qtpl:170
|
||||
//line lib/promrelabel/debug.qtpl:172
|
||||
qw422016.N().S(`,"outLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:171
|
||||
//line lib/promrelabel/debug.qtpl:173
|
||||
qw422016.N().Q(labelsWithHighlight(outLabels, changedLabels, "#4495e0"))
|
||||
//line lib/promrelabel/debug.qtpl:171
|
||||
//line lib/promrelabel/debug.qtpl:173
|
||||
qw422016.N().S(`,"rule":`)
|
||||
//line lib/promrelabel/debug.qtpl:172
|
||||
//line lib/promrelabel/debug.qtpl:174
|
||||
qw422016.N().Q(ds.Rule)
|
||||
//line lib/promrelabel/debug.qtpl:172
|
||||
//line lib/promrelabel/debug.qtpl:174
|
||||
qw422016.N().S(`,"errors": {`)
|
||||
//line lib/promrelabel/debug.qtpl:174
|
||||
if inErr != nil {
|
||||
//line lib/promrelabel/debug.qtpl:174
|
||||
qw422016.N().S(`"inLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:175
|
||||
qw422016.N().Q(`<span style="color: #D15757">` + inErr.Error() + `</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:175
|
||||
if outErr != nil {
|
||||
//line lib/promrelabel/debug.qtpl:175
|
||||
qw422016.N().S(`,`)
|
||||
//line lib/promrelabel/debug.qtpl:175
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:176
|
||||
hasError = true
|
||||
|
||||
if inErr != nil {
|
||||
//line lib/promrelabel/debug.qtpl:176
|
||||
qw422016.N().S(`"inLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:177
|
||||
} else {
|
||||
qw422016.N().Q(`<span style="color: #D15757">` + inErr.Error() + `</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:177
|
||||
if outErr != nil {
|
||||
//line lib/promrelabel/debug.qtpl:177
|
||||
qw422016.N().S(`,`)
|
||||
//line lib/promrelabel/debug.qtpl:177
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:178
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:179
|
||||
if outErr != nil {
|
||||
//line lib/promrelabel/debug.qtpl:179
|
||||
qw422016.N().S(`"outLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:180
|
||||
qw422016.N().Q(`<span style="color: #D15757">` + outErr.Error() + `</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:181
|
||||
hasError = true
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:182
|
||||
//line lib/promrelabel/debug.qtpl:179
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:180
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:181
|
||||
if outErr != nil {
|
||||
//line lib/promrelabel/debug.qtpl:181
|
||||
qw422016.N().S(`"outLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:182
|
||||
qw422016.N().Q(`<span style="color: #D15757">` + outErr.Error() + `</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:183
|
||||
hasError = true
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:184
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:184
|
||||
qw422016.N().S(`}}`)
|
||||
//line lib/promrelabel/debug.qtpl:185
|
||||
//line lib/promrelabel/debug.qtpl:187
|
||||
if i != len(dss)-1 {
|
||||
//line lib/promrelabel/debug.qtpl:185
|
||||
//line lib/promrelabel/debug.qtpl:187
|
||||
qw422016.N().S(`,`)
|
||||
//line lib/promrelabel/debug.qtpl:185
|
||||
//line lib/promrelabel/debug.qtpl:187
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:186
|
||||
//line lib/promrelabel/debug.qtpl:188
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:186
|
||||
//line lib/promrelabel/debug.qtpl:188
|
||||
qw422016.N().S(`]`)
|
||||
//line lib/promrelabel/debug.qtpl:188
|
||||
//line lib/promrelabel/debug.qtpl:190
|
||||
if len(dss) > 0 && !hasError {
|
||||
//line lib/promrelabel/debug.qtpl:188
|
||||
//line lib/promrelabel/debug.qtpl:190
|
||||
qw422016.N().S(`,"originalLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:190
|
||||
qw422016.N().Q(mustFormatLabels(dss[0].In))
|
||||
//line lib/promrelabel/debug.qtpl:190
|
||||
qw422016.N().S(`,"resultingLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:191
|
||||
qw422016.N().Q(mustFormatLabels(dss[len(dss)-1].Out))
|
||||
//line lib/promrelabel/debug.qtpl:192
|
||||
qw422016.N().Q(mustFormatLabels(dss[0].In))
|
||||
//line lib/promrelabel/debug.qtpl:192
|
||||
qw422016.N().S(`,"resultingLabels":`)
|
||||
//line lib/promrelabel/debug.qtpl:193
|
||||
qw422016.N().Q(mustFormatLabels(dss[len(dss)-1].Out))
|
||||
//line lib/promrelabel/debug.qtpl:194
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:193
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:193
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
qw422016.N().S(`}`)
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
func WriteRelabelDebugStepsJSON(qq422016 qtio422016.Writer, targetURL, targetID string, dss []DebugStep, metric, relabelConfigs string, err error) {
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
StreamRelabelDebugStepsJSON(qw422016, targetURL, targetID, dss, metric, relabelConfigs, err)
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
func RelabelDebugStepsJSON(targetURL, targetID string, dss []DebugStep, metric, relabelConfigs string, err error) string {
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
WriteRelabelDebugStepsJSON(qb422016, targetURL, targetID, dss, metric, relabelConfigs, err)
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:195
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
func streamlabelsWithHighlight(qw422016 *qt422016.Writer, labels *promutil.Labels, highlight map[string]struct{}, color string) {
|
||||
func WriteRelabelDebugStepsJSON(qq422016 qtio422016.Writer, targetURL, targetID string, dss []DebugStep, metric, relabelConfigs string, err error) {
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
StreamRelabelDebugStepsJSON(qw422016, targetURL, targetID, dss, metric, relabelConfigs, err)
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
func RelabelDebugStepsJSON(targetURL, targetID string, dss []DebugStep, metric, relabelConfigs string, err error) string {
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
WriteRelabelDebugStepsJSON(qb422016, targetURL, targetID, dss, metric, relabelConfigs, err)
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:197
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:199
|
||||
func streamlabelsWithHighlight(qw422016 *qt422016.Writer, labels *promutil.Labels, highlight map[string]struct{}, color string) {
|
||||
//line lib/promrelabel/debug.qtpl:201
|
||||
labelsList := labels.GetLabels()
|
||||
metricName := ""
|
||||
for i, label := range labelsList {
|
||||
@@ -509,153 +501,153 @@ func streamlabelsWithHighlight(qw422016 *qt422016.Writer, labels *promutil.Label
|
||||
}
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:209
|
||||
//line lib/promrelabel/debug.qtpl:211
|
||||
if metricName != "" {
|
||||
//line lib/promrelabel/debug.qtpl:210
|
||||
if _, ok := highlight["__name__"]; ok {
|
||||
//line lib/promrelabel/debug.qtpl:210
|
||||
qw422016.N().S(`<span style="font-weight:bold;color:`)
|
||||
//line lib/promrelabel/debug.qtpl:211
|
||||
qw422016.E().S(color)
|
||||
//line lib/promrelabel/debug.qtpl:211
|
||||
qw422016.N().S(`">`)
|
||||
//line lib/promrelabel/debug.qtpl:211
|
||||
qw422016.E().S(metricName)
|
||||
//line lib/promrelabel/debug.qtpl:211
|
||||
qw422016.N().S(`</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:212
|
||||
} else {
|
||||
if _, ok := highlight["__name__"]; ok {
|
||||
//line lib/promrelabel/debug.qtpl:212
|
||||
qw422016.N().S(`<span style="font-weight:bold;color:`)
|
||||
//line lib/promrelabel/debug.qtpl:213
|
||||
qw422016.E().S(color)
|
||||
//line lib/promrelabel/debug.qtpl:213
|
||||
qw422016.N().S(`">`)
|
||||
//line lib/promrelabel/debug.qtpl:213
|
||||
qw422016.E().S(metricName)
|
||||
//line lib/promrelabel/debug.qtpl:214
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:215
|
||||
if len(labelsList) == 0 {
|
||||
//line lib/promrelabel/debug.qtpl:215
|
||||
return
|
||||
//line lib/promrelabel/debug.qtpl:215
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:216
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:216
|
||||
qw422016.N().S(`{`)
|
||||
//line lib/promrelabel/debug.qtpl:218
|
||||
for i, label := range labelsList {
|
||||
//line lib/promrelabel/debug.qtpl:219
|
||||
if _, ok := highlight[label.Name]; ok {
|
||||
//line lib/promrelabel/debug.qtpl:219
|
||||
qw422016.N().S(`<span style="font-weight:bold;color:`)
|
||||
//line lib/promrelabel/debug.qtpl:220
|
||||
qw422016.E().S(color)
|
||||
//line lib/promrelabel/debug.qtpl:220
|
||||
qw422016.N().S(`">`)
|
||||
//line lib/promrelabel/debug.qtpl:220
|
||||
qw422016.E().S(label.Name)
|
||||
//line lib/promrelabel/debug.qtpl:220
|
||||
qw422016.N().S(`=`)
|
||||
//line lib/promrelabel/debug.qtpl:220
|
||||
qw422016.E().Q(label.Value)
|
||||
//line lib/promrelabel/debug.qtpl:220
|
||||
//line lib/promrelabel/debug.qtpl:213
|
||||
qw422016.N().S(`</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:221
|
||||
//line lib/promrelabel/debug.qtpl:214
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:215
|
||||
qw422016.E().S(metricName)
|
||||
//line lib/promrelabel/debug.qtpl:216
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:217
|
||||
if len(labelsList) == 0 {
|
||||
//line lib/promrelabel/debug.qtpl:217
|
||||
return
|
||||
//line lib/promrelabel/debug.qtpl:217
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:218
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:218
|
||||
qw422016.N().S(`{`)
|
||||
//line lib/promrelabel/debug.qtpl:220
|
||||
for i, label := range labelsList {
|
||||
//line lib/promrelabel/debug.qtpl:221
|
||||
if _, ok := highlight[label.Name]; ok {
|
||||
//line lib/promrelabel/debug.qtpl:221
|
||||
qw422016.N().S(`<span style="font-weight:bold;color:`)
|
||||
//line lib/promrelabel/debug.qtpl:222
|
||||
qw422016.E().S(color)
|
||||
//line lib/promrelabel/debug.qtpl:222
|
||||
qw422016.N().S(`">`)
|
||||
//line lib/promrelabel/debug.qtpl:222
|
||||
qw422016.E().S(label.Name)
|
||||
//line lib/promrelabel/debug.qtpl:222
|
||||
qw422016.N().S(`=`)
|
||||
//line lib/promrelabel/debug.qtpl:222
|
||||
qw422016.E().Q(label.Value)
|
||||
//line lib/promrelabel/debug.qtpl:222
|
||||
qw422016.N().S(`</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:223
|
||||
}
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:224
|
||||
qw422016.E().S(label.Name)
|
||||
//line lib/promrelabel/debug.qtpl:224
|
||||
qw422016.N().S(`=`)
|
||||
//line lib/promrelabel/debug.qtpl:224
|
||||
qw422016.E().Q(label.Value)
|
||||
//line lib/promrelabel/debug.qtpl:225
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:226
|
||||
if i < len(labelsList)-1 {
|
||||
//line lib/promrelabel/debug.qtpl:224
|
||||
//line lib/promrelabel/debug.qtpl:226
|
||||
qw422016.N().S(`,`)
|
||||
//line lib/promrelabel/debug.qtpl:224
|
||||
//line lib/promrelabel/debug.qtpl:226
|
||||
qw422016.N().S(` `)
|
||||
//line lib/promrelabel/debug.qtpl:224
|
||||
//line lib/promrelabel/debug.qtpl:226
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:225
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:225
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
qw422016.N().S(`}`)
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
func writelabelsWithHighlight(qq422016 qtio422016.Writer, labels *promutil.Labels, highlight map[string]struct{}, color string) {
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
streamlabelsWithHighlight(qw422016, labels, highlight, color)
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
func labelsWithHighlight(labels *promutil.Labels, highlight map[string]struct{}, color string) string {
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
writelabelsWithHighlight(qb422016, labels, highlight, color)
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:227
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
func writelabelsWithHighlight(qq422016 qtio422016.Writer, labels *promutil.Labels, highlight map[string]struct{}, color string) {
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
streamlabelsWithHighlight(qw422016, labels, highlight, color)
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
func labelsWithHighlight(labels *promutil.Labels, highlight map[string]struct{}, color string) string {
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
writelabelsWithHighlight(qb422016, labels, highlight, color)
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:229
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:231
|
||||
func streammustFormatLabels(qw422016 *qt422016.Writer, s string) {
|
||||
//line lib/promrelabel/debug.qtpl:230
|
||||
//line lib/promrelabel/debug.qtpl:232
|
||||
labels, err := promutil.NewLabelsFromString(s)
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:231
|
||||
if err != nil {
|
||||
//line lib/promrelabel/debug.qtpl:231
|
||||
qw422016.N().S(`<span style="color: red" title="error parsing labels:`)
|
||||
//line lib/promrelabel/debug.qtpl:232
|
||||
qw422016.E().S(err.Error())
|
||||
//line lib/promrelabel/debug.qtpl:232
|
||||
qw422016.N().S(`">`)
|
||||
//line lib/promrelabel/debug.qtpl:232
|
||||
qw422016.E().S("error parsing labels: " + err.Error())
|
||||
//line lib/promrelabel/debug.qtpl:232
|
||||
qw422016.N().S(`</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:233
|
||||
} else {
|
||||
if err != nil {
|
||||
//line lib/promrelabel/debug.qtpl:233
|
||||
qw422016.N().S(`<span style="color: red" title="error parsing labels:`)
|
||||
//line lib/promrelabel/debug.qtpl:234
|
||||
streamlabelsWithHighlight(qw422016, labels, nil, "")
|
||||
qw422016.E().S(err.Error())
|
||||
//line lib/promrelabel/debug.qtpl:234
|
||||
qw422016.N().S(`">`)
|
||||
//line lib/promrelabel/debug.qtpl:234
|
||||
qw422016.E().S("error parsing labels: " + err.Error())
|
||||
//line lib/promrelabel/debug.qtpl:234
|
||||
qw422016.N().S(`</span>`)
|
||||
//line lib/promrelabel/debug.qtpl:235
|
||||
} else {
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
streamlabelsWithHighlight(qw422016, labels, nil, "")
|
||||
//line lib/promrelabel/debug.qtpl:237
|
||||
}
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
func writemustFormatLabels(qq422016 qtio422016.Writer, s string) {
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
streammustFormatLabels(qw422016, s)
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
}
|
||||
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
func mustFormatLabels(s string) string {
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
writemustFormatLabels(qb422016, s)
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
qs422016 := string(qb422016.B)
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
return qs422016
|
||||
//line lib/promrelabel/debug.qtpl:236
|
||||
//line lib/promrelabel/debug.qtpl:238
|
||||
}
|
||||
|
||||
@@ -50,6 +50,17 @@ func (ie *IfExpression) Parse(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseFromMetricExpr parses if from given MetricExpr
|
||||
func (ie *IfExpression) ParseFromMetricExpr(me *metricsql.MetricExpr) error {
|
||||
var ieLocal ifExpression
|
||||
if err := ieLocal.parseFromMetricExpr(me); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ie.ies = []*ifExpression{&ieLocal}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON unmarshals ie from JSON data.
|
||||
func (ie *IfExpression) UnmarshalJSON(data []byte) error {
|
||||
var v any
|
||||
@@ -182,6 +193,16 @@ func (ie *ifExpression) Parse(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ie *ifExpression) parseFromMetricExpr(me *metricsql.MetricExpr) error {
|
||||
lfss, err := metricExprToLabelFilterss(me)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse series selector: %w", err)
|
||||
}
|
||||
ie.s = string(me.AppendString(nil))
|
||||
ie.lfss = lfss
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON unmarshals ie from JSON data.
|
||||
func (ie *ifExpression) UnmarshalJSON(data []byte) error {
|
||||
var s string
|
||||
|
||||
@@ -76,6 +76,9 @@ var (
|
||||
"Every %d occurrence in the template is substituted with -promscrape.cluster.memberNum at urls to vmagent instances responsible for scraping the given target "+
|
||||
"at /service-discovery page. For example -promscrape.cluster.memberURLTemplate='http://vmagent-%d:8429/targets'. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more details")
|
||||
clusterShardByLabels = flagutil.NewArrayString("promscrape.cluster.shardByLabels", "Optional list of target labels, which will be used for sharding targets among cluster members "+
|
||||
"if -promscrape.cluster.membersCount is greater than 1. If none of the specified labels are found in a target, then all the target labels will be used for sharding. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info")
|
||||
clusterReplicationFactor = flag.Int("promscrape.cluster.replicationFactor", 1, "The number of members in the cluster, which scrape the same targets. "+
|
||||
"If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info")
|
||||
@@ -86,7 +89,10 @@ var (
|
||||
"Bigger uncompressed responses are rejected. See also max_scrape_size option at https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs")
|
||||
)
|
||||
|
||||
var clusterMemberID int
|
||||
var (
|
||||
clusterMemberID int
|
||||
clusterShardByLabelsSorted []string
|
||||
)
|
||||
|
||||
func mustInitClusterMemberID() {
|
||||
s := *clusterMemberNum
|
||||
@@ -110,6 +116,15 @@ func mustInitClusterMemberID() {
|
||||
clusterMemberID = n
|
||||
}
|
||||
|
||||
func initClusterShardByLabels() {
|
||||
if len(*clusterShardByLabels) == 0 {
|
||||
clusterShardByLabelsSorted = nil
|
||||
return
|
||||
}
|
||||
clusterShardByLabelsSorted = slices.Clone(*clusterShardByLabels)
|
||||
slices.Sort(clusterShardByLabelsSorted)
|
||||
}
|
||||
|
||||
// Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
type Config struct {
|
||||
Global GlobalConfig `yaml:"global,omitempty"`
|
||||
@@ -1138,12 +1153,28 @@ func (stc *StaticConfig) appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConf
|
||||
}
|
||||
|
||||
func appendScrapeWorkKey(dst []byte, labels *promutil.Labels) []byte {
|
||||
for _, label := range labels.GetLabels() {
|
||||
// Do not use strconv.AppendQuote, since it is slow according to CPU profile.
|
||||
dst = append(dst, label.Name...)
|
||||
dst = append(dst, '=')
|
||||
dst = append(dst, label.Value...)
|
||||
dst = append(dst, ',')
|
||||
originalDstLen := len(dst)
|
||||
for _, targetLabelName := range clusterShardByLabelsSorted {
|
||||
for _, label := range labels.GetLabels() {
|
||||
if label.Name == targetLabelName {
|
||||
// Do not use strconv.AppendQuote, since it is slow according to CPU profile.
|
||||
dst = append(dst, label.Name...)
|
||||
dst = append(dst, '=')
|
||||
dst = append(dst, label.Value...)
|
||||
dst = append(dst, ',')
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Use all labels to compute the key if `promscrape.cluster.shardByLabels` is not configured
|
||||
if len(dst) == originalDstLen {
|
||||
for _, label := range labels.GetLabels() {
|
||||
dst = append(dst, label.Name...)
|
||||
dst = append(dst, '=')
|
||||
dst = append(dst, label.Value...)
|
||||
dst = append(dst, ',')
|
||||
}
|
||||
return dst
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user